From e67c9f4f96c200a2bd68085cb7623f9cc2d415ff Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 8 Feb 2024 22:24:14 +0100 Subject: [PATCH 001/214] Add module, enum and first few lexer functions --- nemo/src/io.rs | 1 + nemo/src/io/lexer.rs | 198 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 nemo/src/io/lexer.rs diff --git a/nemo/src/io.rs b/nemo/src/io.rs index e2fdb18ef..46defcab6 100644 --- a/nemo/src/io.rs +++ b/nemo/src/io.rs @@ -6,6 +6,7 @@ pub mod compression_format; pub mod export_manager; pub mod formats; pub mod import_manager; +pub mod lexer; pub mod parser; pub mod resource_providers; diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs new file mode 100644 index 000000000..25e0119db --- /dev/null +++ b/nemo/src/io/lexer.rs @@ -0,0 +1,198 @@ +//! Lexical tokenization of rulewerk-style rules. + +use nom::{ + branch::alt, + bytes::complete::{is_a, is_not, tag}, + character::complete::multispace0, + combinator::{peek, recognize}, + error::ParseError, + multi::many0, + sequence::{delimited, tuple}, + IResult, Parser, +}; +use nom_locate::LocatedSpan; + +type Span<'a> = LocatedSpan<&'a str>; + +/// All the tokens the input gets parsed into. +#[derive(Debug, PartialEq)] +enum Token<'a> { + // Directives + Base(Span<'a>), + Prefix(Span<'a>), + Import(Span<'a>), + Export(Span<'a>), + // Syntactic symbols + QuestionMark(Span<'a>), + BracketOpen(Span<'a>), + BracketClose(Span<'a>), + SquaredBracketOpen(Span<'a>), + SquaredBracketClose(Span<'a>), + CurlyBracketOpen(Span<'a>), + CurlyBracketClose(Span<'a>), + Dot(Span<'a>), + Comma(Span<'a>), + Colon(Span<'a>), + ImplicationArrow(Span<'a>), + Greater(Span<'a>), + Equal(Span<'a>), + Less(Span<'a>), + Not(Span<'a>), + DoubleCaret(Span<'a>), + Hash(Span<'a>), + Underscore(Span<'a>), + AtSign(Span<'a>), + // Names or values + Identifier(Span<'a>), + IRI(Span<'a>), + Integer(Span<'a>), + Float(Span<'a>), + String(Span<'a>), + // miscellaneous + Comment(Span<'a>), + Illegal(Span<'a>), + EOF(Span<'a>), +} + +// FIXME: Figure out when erros occur +fn tokenize<'a>(input: Span<'a>) -> Vec> { + let (rest, vec) = many0(ignore_ws(alt((comment, base, prefix, import, export))))(input) + .expect("An error occured"); + vec +} + +fn ignore_ws<'a, F, O, E: ParseError>>( + inner: F, +) -> impl FnMut(Span<'a>) -> IResult, O, E> +where + F: Parser, O, E>, +{ + delimited(multispace0, inner, multispace0) +} + +fn comment<'a>(input: Span<'a>) -> IResult, Token<'a>> { + recognize(tuple(( + tag("%"), + is_not("\n\r"), + alt((tag("\n\r"), tag("\n"))), + )))(input) + .map(|(rest, span)| (rest, Token::Comment(span))) +} + +/// Recognize the `@base` directive +fn base<'a>(input: Span<'a>) -> IResult, Token<'a>> { + tag("@base")(input).map(|(rest, span)| (rest, Token::Base(span))) +} + +fn prefix<'a>(input: Span<'a>) -> IResult, Token<'a>> { + tag("@prefix")(input).map(|(rest, span)| (rest, Token::Prefix(span))) +} + +fn import<'a>(input: Span<'a>) -> IResult, Token<'a>> { + tag("@import")(input).map(|(rest, span)| (rest, Token::Import(span))) +} + +fn export<'a>(input: Span<'a>) -> IResult, Token<'a>> { + tag("@export")(input).map(|(rest, span)| (rest, Token::Export(span))) +} + +#[cfg(test)] +mod test { + use nom::multi::many0; + + use super::{Span, Token}; + // is `tag` the right denomination? + #[test] + fn base_tag() { + assert_eq!( + super::base(Span::new("@base")).unwrap().1, + Token::Base(unsafe { Span::new_from_raw_offset(0, 1, "@base", ()) }) + ); + } + + // is `tag` the right denomination? + #[test] + fn prefix_tag() { + assert_eq!( + super::prefix(Span::new("@prefix")).unwrap().1, + Token::Prefix(unsafe { Span::new_from_raw_offset(0, 1, "@prefix", ()) }) + ); + } + + // is `tag` the right denomination? + #[test] + fn import_tag() { + assert_eq!( + super::import(Span::new("@import")).unwrap().1, + Token::Import(unsafe { Span::new_from_raw_offset(0, 1, "@import", ()) }) + ); + } + + // is `tag` the right denomination? + #[test] + fn export_tag() { + assert_eq!( + super::export(Span::new("@export")).unwrap().1, + Token::Export(unsafe { Span::new_from_raw_offset(0, 1, "@export", ()) }) + ); + } + + #[test] + fn comment() { + assert_eq!( + super::comment(Span::new( + "% Some meaningful comment with some other %'s in it\n" + )) + .unwrap() + .1, + Token::Comment(unsafe { + Span::new_from_raw_offset( + 0, + 1, + "% Some meaningful comment with some other %'s in it\n", + (), + ) + }) + ); + assert_eq!( + super::comment(Span::new( + "% Some meaningful comment with some other %'s in it\n\r" + )) + .unwrap() + .1, + Token::Comment(unsafe { + Span::new_from_raw_offset( + 0, + 1, + "% Some meaningful comment with some other %'s in it\n\r", + (), + ) + }) + ); + assert_eq!( + super::comment(Span::new( + "% Some meaningful comment\n%that is more than one line long\n" + )) + .unwrap() + .1, + Token::Comment(unsafe { + Span::new_from_raw_offset(0, 1, "% Some meaningful comment\n", ()) + }) + ); + assert_eq!( + many0(super::comment)(Span::new( + "% Some meaningful comment\n%that is more than one line long\n" + )) + .unwrap() + .1, + vec![ + Token::Comment(unsafe { + Span::new_from_raw_offset(0, 1, "% Some meaningful comment\n", ()) + }), + Token::Comment(unsafe { + Span::new_from_raw_offset(26, 2, "%that is more than one line long\n", ()) + }) + ] + ); + } +} From 0bb2c1da24caec721b0e9055b838f507c7bcfb0a Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 15 Feb 2024 19:27:32 +0100 Subject: [PATCH 002/214] Switch to character based tokenizer --- Cargo.lock | 1 + nemo/Cargo.toml | 1 + nemo/src/io/lexer.rs | 552 +++++++++++++++++++++++++++++++------------ 3 files changed, 401 insertions(+), 153 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a2bf0955..46134e7f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1186,6 +1186,7 @@ dependencies = [ "test-log", "thiserror", "tokio", + "unicode-ident", ] [[package]] diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 5ccc66f02..173a3de22 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -45,6 +45,7 @@ ascii_tree = "0.1.1" serde_json = "1.0.108" serde = {version = "1.0.138", features = ["derive"] } dyn-clone = "1.0.16" +unicode-ident = "1.0.12" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 25e0119db..a8a4b8415 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1,198 +1,444 @@ //! Lexical tokenization of rulewerk-style rules. -use nom::{ - branch::alt, - bytes::complete::{is_a, is_not, tag}, - character::complete::multispace0, - combinator::{peek, recognize}, - error::ParseError, - multi::many0, - sequence::{delimited, tuple}, - IResult, Parser, -}; -use nom_locate::LocatedSpan; - -type Span<'a> = LocatedSpan<&'a str>; +use std::str::Chars; -/// All the tokens the input gets parsed into. -#[derive(Debug, PartialEq)] -enum Token<'a> { - // Directives - Base(Span<'a>), - Prefix(Span<'a>), - Import(Span<'a>), - Export(Span<'a>), - // Syntactic symbols - QuestionMark(Span<'a>), - BracketOpen(Span<'a>), - BracketClose(Span<'a>), - SquaredBracketOpen(Span<'a>), - SquaredBracketClose(Span<'a>), - CurlyBracketOpen(Span<'a>), - CurlyBracketClose(Span<'a>), - Dot(Span<'a>), - Comma(Span<'a>), - Colon(Span<'a>), - ImplicationArrow(Span<'a>), - Greater(Span<'a>), - Equal(Span<'a>), - Less(Span<'a>), - Not(Span<'a>), - DoubleCaret(Span<'a>), - Hash(Span<'a>), - Underscore(Span<'a>), - AtSign(Span<'a>), - // Names or values - Identifier(Span<'a>), - IRI(Span<'a>), - Integer(Span<'a>), - Float(Span<'a>), - String(Span<'a>), - // miscellaneous - Comment(Span<'a>), - Illegal(Span<'a>), - EOF(Span<'a>), -} +const EOF_CHAR: char = '\0'; -// FIXME: Figure out when erros occur -fn tokenize<'a>(input: Span<'a>) -> Vec> { - let (rest, vec) = many0(ignore_ws(alt((comment, base, prefix, import, export))))(input) - .expect("An error occured"); - vec +#[derive(Debug)] +struct Lexer<'a> { + chars: Chars<'a>, } -fn ignore_ws<'a, F, O, E: ParseError>>( - inner: F, -) -> impl FnMut(Span<'a>) -> IResult, O, E> -where - F: Parser, O, E>, -{ - delimited(multispace0, inner, multispace0) -} +impl Lexer<'_> { + fn new(input: &str) -> Lexer { + Lexer { + chars: input.chars(), + } + } + fn peek(&self, count: usize) -> char { + self.chars.clone().nth(count - 1).unwrap_or(EOF_CHAR) + } + fn bump(&mut self) -> Option { + self.chars.next() + } + fn is_eof(&self) -> bool { + self.chars.as_str().is_empty() + } + fn bump_while(&mut self, mut predicate: impl FnMut(char) -> bool) { + while predicate(self.peek(1)) && !self.is_eof() { + self.bump(); + } + } + fn advance_token(&mut self) -> TokenKind { + use TokenKind::*; + let first_char = match self.bump() { + Some(c) => c, + None => return Eof, + }; + match first_char { + '%' => match (self.peek(1), self.peek(2)) { + (n1, n2) if n1.is_digit(16) && n2.is_digit(16) => self.pct_encoded(), + _ => self.comment(), + }, + '\n' => Whitespace(true), + c if is_whitespace(c) => self.whitespace(), + c if unicode_ident::is_xid_start(c) => self.ident(), + c @ '0'..='9' => self.number(), + '?' => QuestionMark, + '!' => ExclamationMark, + '(' => OpenParen, + ')' => CloseParen, + '[' => OpenBracket, + ']' => CloseBracket, + '{' => OpenBrace, + '}' => CloseBrace, + '.' => Dot, + ',' => Comma, + ':' => Colon, + ';' => Semicolon, + '>' => Greater, + '=' => Equal, + '<' => Less, + '~' => Tilde, + '^' => Caret, + '#' => Hash, + '_' => Underscore, + '@' => At, + '+' => Plus, + '-' => Minus, + '*' => Star, + '/' => Slash, + '$' => Dollar, + '&' => Ampersand, + '\'' => Apostrophe, + _ => todo!(), + } + } -fn comment<'a>(input: Span<'a>) -> IResult, Token<'a>> { - recognize(tuple(( - tag("%"), - is_not("\n\r"), - alt((tag("\n\r"), tag("\n"))), - )))(input) - .map(|(rest, span)| (rest, Token::Comment(span))) + fn number(&mut self) -> TokenKind { + self.bump_while(is_hex_digit); + TokenKind::Number + } + fn pct_encoded(&mut self) -> TokenKind { + self.bump(); + self.bump(); + TokenKind::PctEncoded + } + fn comment(&mut self) -> TokenKind { + self.bump_while(|c| c != '\n'); + self.bump(); + TokenKind::Comment + } + fn whitespace(&mut self) -> TokenKind { + self.bump_while(|c| is_whitespace(c) && c != '\n'); + if '\n' == self.peek(1) { + self.bump(); + return TokenKind::Whitespace(true); + } + TokenKind::Whitespace(false) + } + fn ident(&mut self) -> TokenKind { + self.bump_while(unicode_ident::is_xid_continue); + TokenKind::Ident + } } -/// Recognize the `@base` directive -fn base<'a>(input: Span<'a>) -> IResult, Token<'a>> { - tag("@base")(input).map(|(rest, span)| (rest, Token::Base(span))) +fn is_hex_digit(c: char) -> bool { + c.is_digit(16) } -fn prefix<'a>(input: Span<'a>) -> IResult, Token<'a>> { - tag("@prefix")(input).map(|(rest, span)| (rest, Token::Prefix(span))) +fn is_whitespace(c: char) -> bool { + // support also vertical tab, form feed, NEXT LINE (latin1), + // LEFT-TO-RIGHT MARK, RIGHT-TO-LEFT MARK, LINE SEPARATOR and PARAGRAPH SEPARATOR? + matches!(c, ' ' | '\n' | '\t' | '\r') } -fn import<'a>(input: Span<'a>) -> IResult, Token<'a>> { - tag("@import")(input).map(|(rest, span)| (rest, Token::Import(span))) +fn is_ident(s: &str) -> bool { + let mut chars = s.chars(); + if let Some(char) = chars.next() { + unicode_ident::is_xid_start(char) && chars.all(unicode_ident::is_xid_continue) + } else { + false + } } -fn export<'a>(input: Span<'a>) -> IResult, Token<'a>> { - tag("@export")(input).map(|(rest, span)| (rest, Token::Export(span))) +/// All the tokens the input gets parsed into. +#[derive(Debug, PartialEq, Copy, Clone)] +enum TokenKind { + // Syntactic symbols: + /// '?' + QuestionMark, + /// '!' + ExclamationMark, + /// '(' + OpenParen, + /// ')' + CloseParen, + /// '[' + OpenBracket, + /// ']' + CloseBracket, + /// '{' + OpenBrace, + /// '}' + CloseBrace, + /// '.' + Dot, + /// ',' + Comma, + /// ':' + Colon, + /// ';' + Semicolon, + /// '>' + Greater, + /// '=' + Equal, + /// '<' + Less, + /// '~' + Tilde, + /// '^' + Caret, + /// '#' + Hash, + /// '_' + Underscore, + /// '@' + At, + /// '+' + Plus, + /// '-' + Minus, + /// '*' + Star, + /// '/' + Slash, + /// '$' + Dollar, + /// '&' + Ampersand, + /// "'" + Apostrophe, + // Multi-char tokens: + /// Identifier for keywords and predicate names + Ident, + /// All other Utf8 characters that can be used in an IRI + Utf8Chars, + /// Percent-encoded characters in IRIs + PctEncoded, + /// Base 10 digits + Number, + /// A string literal + String, + /// A comment, starting with `%` + Comment, + /// A comment, starting with `%%` + DocComment, + /// bool: ends_with_newline + Whitespace(bool), + /// catch all token + Illegal, + /// signals end of file + Eof, } #[cfg(test)] mod test { - use nom::multi::many0; + use super::TokenKind::*; + use crate::io::lexer::Lexer; - use super::{Span, Token}; - // is `tag` the right denomination? #[test] - fn base_tag() { + fn tokenize() { assert_eq!( - super::base(Span::new("@base")).unwrap().1, - Token::Base(unsafe { Span::new_from_raw_offset(0, 1, "@base", ()) }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("P(?X) :- A(?X).\t\n A(Human)."); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Ident, + OpenParen, + QuestionMark, + Ident, + CloseParen, + Whitespace(false), + Colon, + Minus, + Whitespace(false), + Ident, + OpenParen, + QuestionMark, + Ident, + CloseParen, + Dot, + Whitespace(true), + Whitespace(false), + Ident, + OpenParen, + Ident, + CloseParen, + Dot, + Eof + ] + ) } - // is `tag` the right denomination? #[test] - fn prefix_tag() { + fn comment() { assert_eq!( - super::prefix(Span::new("@prefix")).unwrap().1, - Token::Prefix(unsafe { Span::new_from_raw_offset(0, 1, "@prefix", ()) }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("% Some Comment\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![Comment, Eof] + ) } - // is `tag` the right denomination? #[test] - fn import_tag() { + fn pct_enc_with_comment() { assert_eq!( - super::import(Span::new("@import")).unwrap().1, - Token::Import(unsafe { Span::new_from_raw_offset(0, 1, "@import", ()) }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("%38%a3% Some Comment\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![PctEncoded, PctEncoded, Comment, Eof] + ) } - // is `tag` the right denomination? #[test] - fn export_tag() { + fn ident() { assert_eq!( - super::export(Span::new("@export")).unwrap().1, - Token::Export(unsafe { Span::new_from_raw_offset(0, 1, "@export", ()) }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("some_Ident(Alice). %comment at the end of a line\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Ident, + OpenParen, + Ident, + CloseParen, + Dot, + Whitespace(false), + Comment, + Eof + ] + ) } #[test] - fn comment() { + #[should_panic] + fn forbidden_ident() { assert_eq!( - super::comment(Span::new( - "% Some meaningful comment with some other %'s in it\n" - )) - .unwrap() - .1, - Token::Comment(unsafe { - Span::new_from_raw_offset( - 0, - 1, - "% Some meaningful comment with some other %'s in it\n", - (), - ) - }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("_someIdent(Alice). %comment at the end of a line\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Ident, + OpenParen, + Ident, + CloseParen, + Dot, + Whitespace(false), + Comment, + Eof + ] + ) + } + + #[test] + fn iri() { assert_eq!( - super::comment(Span::new( - "% Some meaningful comment with some other %'s in it\n\r" - )) - .unwrap() - .1, - Token::Comment(unsafe { - Span::new_from_raw_offset( - 0, - 1, - "% Some meaningful comment with some other %'s in it\n\r", - (), - ) - }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new(""); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Less, Ident, Colon, Slash, Slash, Ident, Dot, Ident, Dot, Ident, Slash, Greater, + Eof + ] + ) + } + + #[test] + fn iri_pct_enc() { assert_eq!( - super::comment(Span::new( - "% Some meaningful comment\n%that is more than one line long\n" - )) - .unwrap() - .1, - Token::Comment(unsafe { - Span::new_from_raw_offset(0, 1, "% Some meaningful comment\n", ()) - }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Less, + Ident, + Colon, + Slash, + Slash, + Ident, + PctEncoded, + PctEncoded, + Ident, + PctEncoded, + PctEncoded, + Dot, + Ident, + Dot, + Ident, + Greater, + Whitespace(true), + Eof + ] + ) + } + + #[test] + fn pct_enc_comment() { assert_eq!( - many0(super::comment)(Span::new( - "% Some meaningful comment\n%that is more than one line long\n" - )) - .unwrap() - .1, + { + let mut vec = vec![]; + let mut lexer = Lexer::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, vec![ - Token::Comment(unsafe { - Span::new_from_raw_offset(0, 1, "% Some meaningful comment\n", ()) - }), - Token::Comment(unsafe { - Span::new_from_raw_offset(26, 2, "%that is more than one line long\n", ()) - }) + PctEncoded, + Whitespace(false), + Ident, + Whitespace(false), + Ident, + Whitespace(false), + Ident, + Whitespace(false), + Ident, + Whitespace(false), + Ident, + Comma, + Whitespace(true), + Comment, + Comment, + Eof ] - ); + ) } } From 6c1aa6f410968264c02e215de6b7a23f932bd3f3 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Sat, 17 Feb 2024 08:07:29 +0100 Subject: [PATCH 003/214] Add ucschar and iprivate lexing for IRIs --- nemo/src/io/lexer.rs | 58 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index a8a4b8415..806a786c4 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -71,6 +71,26 @@ impl Lexer<'_> { '$' => Dollar, '&' => Ampersand, '\'' => Apostrophe, + '\u{A0}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFEF}' + | '\u{10000}'..='\u{1FFFD}' + | '\u{20000}'..='\u{2FFFD}' + | '\u{30000}'..='\u{3FFFD}' + | '\u{40000}'..='\u{4FFFD}' + | '\u{50000}'..='\u{5FFFD}' + | '\u{60000}'..='\u{6FFFD}' + | '\u{70000}'..='\u{7FFFD}' + | '\u{80000}'..='\u{8FFFD}' + | '\u{90000}'..='\u{9FFFD}' + | '\u{A0000}'..='\u{AFFFD}' + | '\u{B0000}'..='\u{BFFFD}' + | '\u{C0000}'..='\u{CFFFD}' + | '\u{D0000}'..='\u{DFFFD}' + | '\u{E1000}'..='\u{EFFFD}' => self.ucschar(), + '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}' => { + self.iprivate() + } _ => todo!(), } } @@ -101,6 +121,16 @@ impl Lexer<'_> { self.bump_while(unicode_ident::is_xid_continue); TokenKind::Ident } + + fn ucschar(&mut self) -> TokenKind { + self.bump_while(is_ucschar); + TokenKind::UcsChars + } + + fn iprivate(&mut self) -> TokenKind { + self.bump_while(is_iprivate); + TokenKind::Iprivate + } } fn is_hex_digit(c: char) -> bool { @@ -122,6 +152,30 @@ fn is_ident(s: &str) -> bool { } } +fn is_ucschar(c: char) -> bool { + matches!(c, '\u{A0}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFEF}' + | '\u{10000}'..='\u{1FFFD}' + | '\u{20000}'..='\u{2FFFD}' + | '\u{30000}'..='\u{3FFFD}' + | '\u{40000}'..='\u{4FFFD}' + | '\u{50000}'..='\u{5FFFD}' + | '\u{60000}'..='\u{6FFFD}' + | '\u{70000}'..='\u{7FFFD}' + | '\u{80000}'..='\u{8FFFD}' + | '\u{90000}'..='\u{9FFFD}' + | '\u{A0000}'..='\u{AFFFD}' + | '\u{B0000}'..='\u{BFFFD}' + | '\u{C0000}'..='\u{CFFFD}' + | '\u{D0000}'..='\u{DFFFD}' + | '\u{E1000}'..='\u{EFFFD}') +} + +fn is_iprivate(c: char) -> bool { + matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') +} + /// All the tokens the input gets parsed into. #[derive(Debug, PartialEq, Copy, Clone)] enum TokenKind { @@ -184,7 +238,9 @@ enum TokenKind { /// Identifier for keywords and predicate names Ident, /// All other Utf8 characters that can be used in an IRI - Utf8Chars, + UcsChars, + /// Characters in private use areas + Iprivate, /// Percent-encoded characters in IRIs PctEncoded, /// Base 10 digits From 114cbaf558a0eba80dcebf1094482dc25d2a4467 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 20 Feb 2024 15:49:40 +0100 Subject: [PATCH 004/214] Add loop and Span to tokenizer --- nemo/src/io/lexer.rs | 586 +++++++++++++++++++++++++------------------ 1 file changed, 343 insertions(+), 243 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 806a786c4..697236b3c 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -4,17 +4,50 @@ use std::str::Chars; const EOF_CHAR: char = '\0'; -#[derive(Debug)] +#[derive(Debug, Copy, Clone, PartialEq)] +struct Span<'a> { + offset: usize, + line: usize, + // size: usize, + fragment: &'a str, +} +// impl<'a> Span<'a> { +impl<'a> Span<'a> { + fn new(offset: usize, line: usize, input: &'a str) -> Span<'a> { + // fn new(offset: usize, line: usize, size: usize) -> Span { + Span { + offset, + line, + fragment: input, + // size, + } + } +} + +#[derive(Debug, Clone)] struct Lexer<'a> { + input: &'a str, + len_remaining: usize, + offset: usize, + lines: usize, chars: Chars<'a>, } - -impl Lexer<'_> { - fn new(input: &str) -> Lexer { +impl<'a> Lexer<'a> { + fn new(input: &'a str) -> Lexer<'a> { Lexer { + input, + len_remaining: input.len(), + offset: 0, + lines: 1, chars: input.chars(), } } + fn consumed_char_length(&self) -> usize { + self.len_remaining - self.chars.as_str().len() + } + fn update_remaining_len(&mut self) { + self.len_remaining = self.chars.as_str().len(); + } fn peek(&self, count: usize) -> char { self.chars.clone().nth(count - 1).unwrap_or(EOF_CHAR) } @@ -29,69 +62,102 @@ impl Lexer<'_> { self.bump(); } } - fn advance_token(&mut self) -> TokenKind { + fn get_tokens(&mut self) -> Vec { use TokenKind::*; - let first_char = match self.bump() { - Some(c) => c, - None => return Eof, - }; - match first_char { - '%' => match (self.peek(1), self.peek(2)) { - (n1, n2) if n1.is_digit(16) && n2.is_digit(16) => self.pct_encoded(), - _ => self.comment(), - }, - '\n' => Whitespace(true), - c if is_whitespace(c) => self.whitespace(), - c if unicode_ident::is_xid_start(c) => self.ident(), - c @ '0'..='9' => self.number(), - '?' => QuestionMark, - '!' => ExclamationMark, - '(' => OpenParen, - ')' => CloseParen, - '[' => OpenBracket, - ']' => CloseBracket, - '{' => OpenBrace, - '}' => CloseBrace, - '.' => Dot, - ',' => Comma, - ':' => Colon, - ';' => Semicolon, - '>' => Greater, - '=' => Equal, - '<' => Less, - '~' => Tilde, - '^' => Caret, - '#' => Hash, - '_' => Underscore, - '@' => At, - '+' => Plus, - '-' => Minus, - '*' => Star, - '/' => Slash, - '$' => Dollar, - '&' => Ampersand, - '\'' => Apostrophe, - '\u{A0}'..='\u{D7FF}' - | '\u{F900}'..='\u{FDCF}' - | '\u{FDF0}'..='\u{FFEF}' - | '\u{10000}'..='\u{1FFFD}' - | '\u{20000}'..='\u{2FFFD}' - | '\u{30000}'..='\u{3FFFD}' - | '\u{40000}'..='\u{4FFFD}' - | '\u{50000}'..='\u{5FFFD}' - | '\u{60000}'..='\u{6FFFD}' - | '\u{70000}'..='\u{7FFFD}' - | '\u{80000}'..='\u{8FFFD}' - | '\u{90000}'..='\u{9FFFD}' - | '\u{A0000}'..='\u{AFFFD}' - | '\u{B0000}'..='\u{BFFFD}' - | '\u{C0000}'..='\u{CFFFD}' - | '\u{D0000}'..='\u{DFFFD}' - | '\u{E1000}'..='\u{EFFFD}' => self.ucschar(), - '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}' => { - self.iprivate() - } - _ => todo!(), + let mut vec = Vec::new(); + loop { + let old_line_num = self.lines; + let first_char = match self.bump() { + Some(c) => c, + None => { + let eof_tok = Token::new( + Eof, + Span::new( + self.offset, + self.lines, + &self.input[self.offset..self.offset], + ), + ); + vec.push(eof_tok); + return vec; + } + }; + let token_kind = match first_char { + '%' => match (self.peek(1), self.peek(2)) { + (n1, n2) if n1.is_digit(16) && n2.is_digit(16) => self.pct_encoded(), + _ => self.comment(), + }, + '\n' => { + self.lines += 1; + Whitespace + } + c if is_whitespace(c) => self.whitespace(), + c if unicode_ident::is_xid_start(c) => self.ident(), + c @ '0'..='9' => self.number(), + '?' => QuestionMark, + '!' => ExclamationMark, + '(' => OpenParen, + ')' => CloseParen, + '[' => OpenBracket, + ']' => CloseBracket, + '{' => OpenBrace, + '}' => CloseBrace, + '.' => Dot, + ',' => Comma, + ':' => Colon, + ';' => Semicolon, + '>' => Greater, + '=' => Equal, + '<' => Less, + '~' => Tilde, + '^' => Caret, + '#' => Hash, + '_' => Underscore, + '@' => At, + '+' => Plus, + '-' => Minus, + '*' => Star, + '/' => Slash, + '$' => Dollar, + '&' => Ampersand, + '\'' => Apostrophe, + '\u{A0}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFEF}' + | '\u{10000}'..='\u{1FFFD}' + | '\u{20000}'..='\u{2FFFD}' + | '\u{30000}'..='\u{3FFFD}' + | '\u{40000}'..='\u{4FFFD}' + | '\u{50000}'..='\u{5FFFD}' + | '\u{60000}'..='\u{6FFFD}' + | '\u{70000}'..='\u{7FFFD}' + | '\u{80000}'..='\u{8FFFD}' + | '\u{90000}'..='\u{9FFFD}' + | '\u{A0000}'..='\u{AFFFD}' + | '\u{B0000}'..='\u{BFFFD}' + | '\u{C0000}'..='\u{CFFFD}' + | '\u{D0000}'..='\u{DFFFD}' + | '\u{E1000}'..='\u{EFFFD}' => self.ucschar(), + '\u{E000}'..='\u{F8FF}' + | '\u{F0000}'..='\u{FFFFD}' + | '\u{100000}'..='\u{10FFFD}' => self.iprivate(), + _ => todo!(), + }; + let tok_len = self.consumed_char_length(); + + // let fragment = &*self.input; + let token = Token::new( + token_kind, + Span::new( + self.offset, + old_line_num, + &self.input[self.offset..(self.offset + tok_len)], + ), + // Span::new(self.offset, self.lines, tok_len), + ); + self.offset += tok_len; + self.update_remaining_len(); + vec.push(token); } } @@ -107,15 +173,17 @@ impl Lexer<'_> { fn comment(&mut self) -> TokenKind { self.bump_while(|c| c != '\n'); self.bump(); + self.lines += 1; TokenKind::Comment } fn whitespace(&mut self) -> TokenKind { self.bump_while(|c| is_whitespace(c) && c != '\n'); if '\n' == self.peek(1) { self.bump(); - return TokenKind::Whitespace(true); + self.lines += 1; + return TokenKind::Whitespace; } - TokenKind::Whitespace(false) + TokenKind::Whitespace } fn ident(&mut self) -> TokenKind { self.bump_while(unicode_ident::is_xid_continue); @@ -176,6 +244,18 @@ fn is_iprivate(c: char) -> bool { matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') } +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) struct Token<'a> { + kind: TokenKind, + span: Span<'a>, +} +// impl<'a> Token<'a> { +impl<'a> Token<'a> { + fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { + Token { kind, span } + } +} + /// All the tokens the input gets parsed into. #[derive(Debug, PartialEq, Copy, Clone)] enum TokenKind { @@ -252,7 +332,7 @@ enum TokenKind { /// A comment, starting with `%%` DocComment, /// bool: ends_with_newline - Whitespace(bool), + Whitespace, /// catch all token Illegal, /// signals end of file @@ -262,113 +342,155 @@ enum TokenKind { #[cfg(test)] mod test { use super::TokenKind::*; - use crate::io::lexer::Lexer; + use crate::io::lexer::{Lexer, Span, Token}; + + #[test] + fn empty_input() { + let mut lexer = Lexer::new(""); + assert_eq!( + lexer.get_tokens(), + vec![Token::new(Eof, Span::new(0, 1, ""))] + ) + } + + #[test] + fn base() { + let mut lexer = Lexer::new("@base"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "base")), + Token::new(Eof, Span::new(5, 1, "")), + ] + ) + } + + #[test] + fn prefix() { + let mut lexer = Lexer::new("@prefix"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "prefix")), + Token::new(Eof, Span::new(7, 1, "")), + ] + ) + } + + #[test] + fn output() { + let mut lexer = Lexer::new("@output"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "output")), + Token::new(Eof, Span::new(7, 1, "")), + ] + ) + } + + #[test] + fn import() { + let mut lexer = Lexer::new("@import"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "import")), + Token::new(Eof, Span::new(7, 1, "")), + ] + ) + } + + #[test] + fn export() { + let mut lexer = Lexer::new("@export"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "export")), + Token::new(Eof, Span::new(7, 1, "")), + ] + ) + } #[test] fn tokenize() { + let mut lexer = Lexer::new("P(?X) :- A(?X).\t\n A(Human)."); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("P(?X) :- A(?X).\t\n A(Human)."); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Ident, - OpenParen, - QuestionMark, - Ident, - CloseParen, - Whitespace(false), - Colon, - Minus, - Whitespace(false), - Ident, - OpenParen, - QuestionMark, - Ident, - CloseParen, - Dot, - Whitespace(true), - Whitespace(false), - Ident, - OpenParen, - Ident, - CloseParen, - Dot, - Eof + Token::new(Ident, Span::new(0, 1, "P")), + Token::new(OpenParen, Span::new(1, 1, "(")), + Token::new(QuestionMark, Span::new(2, 1, "?")), + Token::new(Ident, Span::new(3, 1, "X")), + Token::new(CloseParen, Span::new(4, 1, ")")), + Token::new(Whitespace, Span::new(5, 1, " ")), + Token::new(Colon, Span::new(6, 1, ":")), + Token::new(Minus, Span::new(7, 1, "-")), + Token::new(Whitespace, Span::new(8, 1, " ")), + Token::new(Ident, Span::new(9, 1, "A")), + Token::new(OpenParen, Span::new(10, 1, "(")), + Token::new(QuestionMark, Span::new(11, 1, "?")), + Token::new(Ident, Span::new(12, 1, "X")), + Token::new(CloseParen, Span::new(13, 1, ")")), + Token::new(Dot, Span::new(14, 1, ".")), + Token::new(Whitespace, Span::new(15, 1, "\t\n")), + Token::new(Whitespace, Span::new(17, 2, " ")), + Token::new(Ident, Span::new(21, 2, "A")), + Token::new(OpenParen, Span::new(22, 2, "(")), + Token::new(Ident, Span::new(23, 2, "Human")), + Token::new(CloseParen, Span::new(28, 2, ")")), + Token::new(Dot, Span::new(29, 2, ".")), + Token::new(Eof, Span::new(30, 2, "")), ] ) } #[test] fn comment() { + let mut lexer = Lexer::new("% Some Comment\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("% Some Comment\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, - vec![Comment, Eof] + lexer.get_tokens(), + vec![ + Token::new(Comment, Span::new(0, 1, "% Some Comment\n")), + Token::new(Eof, Span::new(15, 2, "")) + ] ) } #[test] fn pct_enc_with_comment() { + let mut lexer = Lexer::new("%38%a3% Some Comment\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("%38%a3% Some Comment\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, - vec![PctEncoded, PctEncoded, Comment, Eof] + lexer.get_tokens(), + vec![ + Token::new(PctEncoded, Span::new(0, 1, "%38")), + Token::new(PctEncoded, Span::new(3, 1, "%a3")), + Token::new(Comment, Span::new(6, 1, "% Some Comment\n")), + Token::new(Eof, Span::new(21, 2, "")), + ] ) } #[test] fn ident() { + let mut lexer = Lexer::new("some_Ident(Alice). %comment at the end of a line\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("some_Ident(Alice). %comment at the end of a line\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Ident, - OpenParen, - Ident, - CloseParen, - Dot, - Whitespace(false), - Comment, - Eof + Token::new(Ident, Span::new(0, 1, "some_Ident")), + Token::new(OpenParen, Span::new(10, 1, "(")), + Token::new(Ident, Span::new(11, 1, "Alice")), + Token::new(CloseParen, Span::new(16, 1, ")")), + Token::new(Dot, Span::new(17, 1, ".")), + Token::new(Whitespace, Span::new(18, 1, " ")), + Token::new(Comment, Span::new(19, 1, "%comment at the end of a line\n")), + Token::new(Eof, Span::new(49, 2, "")), ] ) } @@ -376,124 +498,102 @@ mod test { #[test] #[should_panic] fn forbidden_ident() { + let mut lexer = Lexer::new("_someIdent(Alice). %comment at the end of a line\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("_someIdent(Alice). %comment at the end of a line\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Ident, - OpenParen, - Ident, - CloseParen, - Dot, - Whitespace(false), - Comment, - Eof + Token::new(Ident, Span::new(0, 1, "_someIdent")), + Token::new(OpenParen, Span::new(10, 1, "(")), + Token::new(Ident, Span::new(11, 1, "Alice")), + Token::new(CloseParen, Span::new(16, 1, ")")), + Token::new(Dot, Span::new(17, 1, ".")), + Token::new(Whitespace, Span::new(18, 1, " ")), + Token::new(Comment, Span::new(19, 1, "%comment at the end of a line\n")), + Token::new(Eof, Span::new(49, 2, "")), ] ) } #[test] fn iri() { + let mut lexer = Lexer::new(""); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new(""); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Less, Ident, Colon, Slash, Slash, Ident, Dot, Ident, Dot, Ident, Slash, Greater, - Eof + Token::new(Less, Span::new(0, 1, "<")), + Token::new(Ident, Span::new(1, 1, "https")), + Token::new(Colon, Span::new(6, 1, ":")), + Token::new(Slash, Span::new(7, 1, "/")), + Token::new(Slash, Span::new(8, 1, "/")), + Token::new(Ident, Span::new(9, 1, "résumé")), + Token::new(Dot, Span::new(17, 1, ".")), + Token::new(Ident, Span::new(18, 1, "example")), + Token::new(Dot, Span::new(25, 1, ".")), + Token::new(Ident, Span::new(26, 1, "org")), + Token::new(Slash, Span::new(29, 1, "/")), + Token::new(Greater, Span::new(30, 1, ">")), + Token::new(Eof, Span::new(31, 1, "")), ] ) } #[test] fn iri_pct_enc() { + let mut lexer = Lexer::new("\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Less, - Ident, - Colon, - Slash, - Slash, - Ident, - PctEncoded, - PctEncoded, - Ident, - PctEncoded, - PctEncoded, - Dot, - Ident, - Dot, - Ident, - Greater, - Whitespace(true), - Eof + Token::new(Less, Span::new(0, 1, "<")), + Token::new(Ident, Span::new(1, 1, "http")), + Token::new(Colon, Span::new(5, 1, ":")), + Token::new(Slash, Span::new(6, 1, "/")), + Token::new(Slash, Span::new(7, 1, "/")), + Token::new(Ident, Span::new(8, 1, "r")), + Token::new(PctEncoded, Span::new(9, 1, "%C3")), + Token::new(PctEncoded, Span::new(12, 1, "%A9")), + Token::new(Ident, Span::new(15, 1, "sum")), + Token::new(PctEncoded, Span::new(18, 1, "%C3")), + Token::new(PctEncoded, Span::new(21, 1, "%A9")), + Token::new(Dot, Span::new(24, 1, ".")), + Token::new(Ident, Span::new(25, 1, "example")), + Token::new(Dot, Span::new(32, 1, ".")), + Token::new(Ident, Span::new(33, 1, "org")), + Token::new(Greater, Span::new(36, 1, ">")), + Token::new(Whitespace, Span::new(37, 1, "\n")), + Token::new(Eof, Span::new(38, 2, "")), ] ) } #[test] fn pct_enc_comment() { + let mut lexer = Lexer::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - PctEncoded, - Whitespace(false), - Ident, - Whitespace(false), - Ident, - Whitespace(false), - Ident, - Whitespace(false), - Ident, - Whitespace(false), - Ident, - Comma, - Whitespace(true), - Comment, - Comment, - Eof + Token::new(PctEncoded, Span::new(0, 1, "%d4")), + Token::new(Whitespace, Span::new(3, 1, " ")), + Token::new(Ident, Span::new(4, 1, "this")), + Token::new(Whitespace, Span::new(8, 1, " ")), + Token::new(Ident, Span::new(9, 1, "should")), + Token::new(Whitespace, Span::new(15, 1, " ")), + Token::new(Ident, Span::new(16, 1, "be")), + Token::new(Whitespace, Span::new(18, 1, " ")), + Token::new(Ident, Span::new(19, 1, "a")), + Token::new(Whitespace, Span::new(20, 1, " ")), + Token::new(Ident, Span::new(21, 1, "comment")), + Token::new(Comma, Span::new(28, 1, ",")), + Token::new(Whitespace, Span::new(29, 1, "\n")), + Token::new( + Comment, + Span::new( + 30, + 2, + "% but the lexer can't distinguish a percent encoded value\n" + ) + ), + Token::new(Comment, Span::new(88, 3, "% in an iri from a comment :(\n")), + Token::new(Eof, Span::new(118, 4, "")), ] ) } From c4da0ff192366d7a76994d978bc39a2010eeb220 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:29:04 +0100 Subject: [PATCH 005/214] Add ast --- nemo/src/io/parser/ast.rs | 282 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 nemo/src/io/parser/ast.rs diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs new file mode 100644 index 000000000..02c9c2d93 --- /dev/null +++ b/nemo/src/io/parser/ast.rs @@ -0,0 +1,282 @@ +use std::collections::BTreeMap; + +use crate::io::lexer::Token; + +struct Position { + offset: usize, + line: u32, + column: u32, +} + +pub(crate) type Program<'a> = Vec>; + +#[derive(Debug, PartialEq)] +pub(crate) enum Statement<'a> { + Directive(Directive<'a>), + Fact { + atom: Atom<'a>, + }, + Rule { + head: Vec>, + body: Vec>, + }, +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Directive<'a> { + Base { + kw: Token<'a>, + base_iri: Token<'a>, + }, + Prefix { + kw: Token<'a>, + prefix: Token<'a>, + prefix_iri: Token<'a>, + }, + Import { + kw: Token<'a>, + predicate: Token<'a>, + map: Map<'a>, + }, + Export { + kw: Token<'a>, + predicate: Token<'a>, + map: Map<'a>, + }, + Output { + kw: Token<'a>, + predicates: Vec>, + }, +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Atom<'a> { + Atom { + predicate: Token<'a>, + terms: Vec>, + }, + InfixAtom { + operation: Token<'a>, + lhs: Term<'a>, + rhs: Term<'a>, + }, + Map(Map<'a>), +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Literal<'a> { + Positive(Atom<'a>), + Negative(Atom<'a>), +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Term<'a> { + Primitive(Token<'a>), + Binary { + operation: Token<'a>, + lhs: Box>, + rhs: Box>, + }, + Unary { + operation: Token<'a>, + term: Box>, + }, + Aggregation { + operation: Token<'a>, + terms: Vec>, + }, + Function { + identifier: Token<'a>, + terms: Vec>, + }, + Map(Map<'a>), +} + +#[derive(Debug, PartialEq)] +struct Map<'a> { + identifier: Option>, + pairs: BTreeMap, Term<'a>>, +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Node<'a> { + Statement(&'a Statement<'a>), + Directive(&'a Directive<'a>), + RuleHead(&'a Vec>), + RuleBody(&'a Vec>), + Atom(&'a Atom<'a>), + Term(&'a Term<'a>), + Terms(&'a Vec>), + Map(&'a Map<'a>), + KeyWord(&'a Token<'a>), + BaseIri(&'a Token<'a>), + Prefix(&'a Token<'a>), + PrefixIri(&'a Token<'a>), + Predicate(&'a Token<'a>), + Predicates(&'a Vec>), + Operation(&'a Token<'a>), + Lhs(&'a Term<'a>), + Rhs(&'a Term<'a>), + Identifier(&'a Token<'a>), + Pairs(&'a BTreeMap, Term<'a>>), + MapIdentifier(&'a Option>), + Primitive(&'a Token<'a>), +} + +trait AstNode { + fn children(&self) -> Vec; + // fn position(&self) -> Position; +} + +impl<'a> AstNode for Program<'a> { + fn children(&self) -> Vec { + let mut vec = Vec::new(); + for statement in self { + vec.push(Node::Statement(statement)) + } + vec + } + + // fn position(&self) -> Position { + // let first = self.get(0); + // match first { + // Some(elem) => { + // let span; + // match elem { + // Statement::Directive(directive) => match directive { + // Directive::Base { kw, base_iri } => span = kw.span, + // Directive::Prefix { + // kw, + // prefix, + // prefix_iri, + // } => span = kw.span, + // Directive::Import { kw, predicate, map } => span = kw.span, + // Directive::Export { kw, predicate, map } => span = kw.span, + // Directive::Output { kw, predicates } => span = kw.span, + // }, + // Statement::Fact { atom } => match atom { + // Atom::Atom { predicate, terms } => todo!(), + // Atom::InfixAtom { operation, lhs, rhs } => todo!(), + // Atom::Map(_) => todo!(), + // }, + // Statement::Rule { head, body } => todo!(), + // }; + // } + // None => Position { + // offset: 0, + // line: 1, + // column: 0, + // }, + // } + // } +} + +impl<'a> AstNode for Statement<'a> { + fn children(&self) -> Vec { + match self { + Statement::Directive(directive) => directive.children(), + Statement::Fact { atom } => vec![Node::Atom(atom)], + Statement::Rule { head, body } => { + vec![Node::RuleHead(head), Node::RuleBody(body)] + } + } + } + + // fn position(&self) -> Position { + // todo!() + // } +} + +impl<'a> AstNode for Directive<'a> { + fn children(&self) -> Vec { + match self { + Directive::Base { kw, base_iri } => { + vec![Node::KeyWord(kw), Node::BaseIri(base_iri)] + } + Directive::Prefix { + kw, + prefix, + prefix_iri, + } => vec![ + Node::KeyWord(kw), + Node::Prefix(prefix), + Node::PrefixIri(prefix_iri), + ], + Directive::Import { kw, predicate, map } => vec![ + Node::KeyWord(kw), + Node::Predicate(predicate), + Node::Map(map), + ], + Directive::Export { kw, predicate, map } => vec![ + Node::KeyWord(kw), + Node::Predicate(predicate), + Node::Map(map), + ], + Directive::Output { kw, predicates } => { + vec![Node::KeyWord(kw), Node::Predicates(predicates)] + } + } + } + + // fn position(&self) -> Position { + // todo!() + // } +} + +impl<'a> AstNode for Atom<'a> { + fn children(&self) -> Vec { + match self { + Atom::Atom { predicate, terms } => { + vec![Node::KeyWord(predicate), Node::Terms(terms)] + } + Atom::InfixAtom { + operation, + lhs, + rhs, + } => vec![Node::Operation(operation), Node::Lhs(lhs), Node::Rhs(rhs)], + Atom::Map(map) => map.children(), + } + } + + // fn position(&self) -> Position { + // todo!() + // } +} + +impl<'a> AstNode for Term<'a> { + fn children(&self) -> Vec { + match self { + Term::Primitive(prim) => vec![Node::Primitive(prim)], + Term::Binary { + operation, + lhs, + rhs, + } => vec![Node::Operation(operation), Node::Lhs(lhs), Node::Rhs(rhs)], + Term::Unary { operation, term } => vec![Node::Operation(operation), Node::Term(term)], + Term::Aggregation { operation, terms } => { + vec![Node::Operation(operation), Node::Terms(terms)] + } + Term::Function { identifier, terms } => { + vec![Node::Identifier(identifier), Node::Terms(terms)] + } + Term::Map(map) => map.children(), + } + } + + // fn position(&self) -> Position { + // todo!() + // } +} + +impl<'a> AstNode for Map<'a> { + fn children(&self) -> Vec { + vec![ + Node::MapIdentifier(&self.identifier), + Node::Pairs(&self.pairs), + ] + } + + // fn position(&self) -> Position { + // todo!() + // } +} From 10a3815dbce8ef7f199b7fc09593d380340e34f7 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:30:32 +0100 Subject: [PATCH 006/214] Refactor lexer to use nom --- nemo/src/io/lexer.rs | 787 ++++++++++++++++++++----------------------- 1 file changed, 366 insertions(+), 421 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 697236b3c..c0b6ccb0d 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1,264 +1,21 @@ //! Lexical tokenization of rulewerk-style rules. -use std::str::Chars; +use nom::{ + branch::alt, + bytes::complete::{is_not, tag, take}, + character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, + combinator::{all_consuming, map, recognize}, + multi::many0, + sequence::{delimited, pair, tuple}, + IResult, +}; +use nom_locate::LocatedSpan; -const EOF_CHAR: char = '\0'; - -#[derive(Debug, Copy, Clone, PartialEq)] -struct Span<'a> { - offset: usize, - line: usize, - // size: usize, - fragment: &'a str, -} -// impl<'a> Span<'a> { -impl<'a> Span<'a> { - fn new(offset: usize, line: usize, input: &'a str) -> Span<'a> { - // fn new(offset: usize, line: usize, size: usize) -> Span { - Span { - offset, - line, - fragment: input, - // size, - } - } -} - -#[derive(Debug, Clone)] -struct Lexer<'a> { - input: &'a str, - len_remaining: usize, - offset: usize, - lines: usize, - chars: Chars<'a>, -} -impl<'a> Lexer<'a> { - fn new(input: &'a str) -> Lexer<'a> { - Lexer { - input, - len_remaining: input.len(), - offset: 0, - lines: 1, - chars: input.chars(), - } - } - fn consumed_char_length(&self) -> usize { - self.len_remaining - self.chars.as_str().len() - } - fn update_remaining_len(&mut self) { - self.len_remaining = self.chars.as_str().len(); - } - fn peek(&self, count: usize) -> char { - self.chars.clone().nth(count - 1).unwrap_or(EOF_CHAR) - } - fn bump(&mut self) -> Option { - self.chars.next() - } - fn is_eof(&self) -> bool { - self.chars.as_str().is_empty() - } - fn bump_while(&mut self, mut predicate: impl FnMut(char) -> bool) { - while predicate(self.peek(1)) && !self.is_eof() { - self.bump(); - } - } - fn get_tokens(&mut self) -> Vec { - use TokenKind::*; - let mut vec = Vec::new(); - loop { - let old_line_num = self.lines; - let first_char = match self.bump() { - Some(c) => c, - None => { - let eof_tok = Token::new( - Eof, - Span::new( - self.offset, - self.lines, - &self.input[self.offset..self.offset], - ), - ); - vec.push(eof_tok); - return vec; - } - }; - let token_kind = match first_char { - '%' => match (self.peek(1), self.peek(2)) { - (n1, n2) if n1.is_digit(16) && n2.is_digit(16) => self.pct_encoded(), - _ => self.comment(), - }, - '\n' => { - self.lines += 1; - Whitespace - } - c if is_whitespace(c) => self.whitespace(), - c if unicode_ident::is_xid_start(c) => self.ident(), - c @ '0'..='9' => self.number(), - '?' => QuestionMark, - '!' => ExclamationMark, - '(' => OpenParen, - ')' => CloseParen, - '[' => OpenBracket, - ']' => CloseBracket, - '{' => OpenBrace, - '}' => CloseBrace, - '.' => Dot, - ',' => Comma, - ':' => Colon, - ';' => Semicolon, - '>' => Greater, - '=' => Equal, - '<' => Less, - '~' => Tilde, - '^' => Caret, - '#' => Hash, - '_' => Underscore, - '@' => At, - '+' => Plus, - '-' => Minus, - '*' => Star, - '/' => Slash, - '$' => Dollar, - '&' => Ampersand, - '\'' => Apostrophe, - '\u{A0}'..='\u{D7FF}' - | '\u{F900}'..='\u{FDCF}' - | '\u{FDF0}'..='\u{FFEF}' - | '\u{10000}'..='\u{1FFFD}' - | '\u{20000}'..='\u{2FFFD}' - | '\u{30000}'..='\u{3FFFD}' - | '\u{40000}'..='\u{4FFFD}' - | '\u{50000}'..='\u{5FFFD}' - | '\u{60000}'..='\u{6FFFD}' - | '\u{70000}'..='\u{7FFFD}' - | '\u{80000}'..='\u{8FFFD}' - | '\u{90000}'..='\u{9FFFD}' - | '\u{A0000}'..='\u{AFFFD}' - | '\u{B0000}'..='\u{BFFFD}' - | '\u{C0000}'..='\u{CFFFD}' - | '\u{D0000}'..='\u{DFFFD}' - | '\u{E1000}'..='\u{EFFFD}' => self.ucschar(), - '\u{E000}'..='\u{F8FF}' - | '\u{F0000}'..='\u{FFFFD}' - | '\u{100000}'..='\u{10FFFD}' => self.iprivate(), - _ => todo!(), - }; - let tok_len = self.consumed_char_length(); - - // let fragment = &*self.input; - let token = Token::new( - token_kind, - Span::new( - self.offset, - old_line_num, - &self.input[self.offset..(self.offset + tok_len)], - ), - // Span::new(self.offset, self.lines, tok_len), - ); - self.offset += tok_len; - self.update_remaining_len(); - vec.push(token); - } - } - - fn number(&mut self) -> TokenKind { - self.bump_while(is_hex_digit); - TokenKind::Number - } - fn pct_encoded(&mut self) -> TokenKind { - self.bump(); - self.bump(); - TokenKind::PctEncoded - } - fn comment(&mut self) -> TokenKind { - self.bump_while(|c| c != '\n'); - self.bump(); - self.lines += 1; - TokenKind::Comment - } - fn whitespace(&mut self) -> TokenKind { - self.bump_while(|c| is_whitespace(c) && c != '\n'); - if '\n' == self.peek(1) { - self.bump(); - self.lines += 1; - return TokenKind::Whitespace; - } - TokenKind::Whitespace - } - fn ident(&mut self) -> TokenKind { - self.bump_while(unicode_ident::is_xid_continue); - TokenKind::Ident - } - - fn ucschar(&mut self) -> TokenKind { - self.bump_while(is_ucschar); - TokenKind::UcsChars - } - - fn iprivate(&mut self) -> TokenKind { - self.bump_while(is_iprivate); - TokenKind::Iprivate - } -} - -fn is_hex_digit(c: char) -> bool { - c.is_digit(16) -} - -fn is_whitespace(c: char) -> bool { - // support also vertical tab, form feed, NEXT LINE (latin1), - // LEFT-TO-RIGHT MARK, RIGHT-TO-LEFT MARK, LINE SEPARATOR and PARAGRAPH SEPARATOR? - matches!(c, ' ' | '\n' | '\t' | '\r') -} - -fn is_ident(s: &str) -> bool { - let mut chars = s.chars(); - if let Some(char) = chars.next() { - unicode_ident::is_xid_start(char) && chars.all(unicode_ident::is_xid_continue) - } else { - false - } -} - -fn is_ucschar(c: char) -> bool { - matches!(c, '\u{A0}'..='\u{D7FF}' - | '\u{F900}'..='\u{FDCF}' - | '\u{FDF0}'..='\u{FFEF}' - | '\u{10000}'..='\u{1FFFD}' - | '\u{20000}'..='\u{2FFFD}' - | '\u{30000}'..='\u{3FFFD}' - | '\u{40000}'..='\u{4FFFD}' - | '\u{50000}'..='\u{5FFFD}' - | '\u{60000}'..='\u{6FFFD}' - | '\u{70000}'..='\u{7FFFD}' - | '\u{80000}'..='\u{8FFFD}' - | '\u{90000}'..='\u{9FFFD}' - | '\u{A0000}'..='\u{AFFFD}' - | '\u{B0000}'..='\u{BFFFD}' - | '\u{C0000}'..='\u{CFFFD}' - | '\u{D0000}'..='\u{DFFFD}' - | '\u{E1000}'..='\u{EFFFD}') -} - -fn is_iprivate(c: char) -> bool { - matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') -} - -#[derive(Debug, Copy, Clone, PartialEq)] -pub(crate) struct Token<'a> { - kind: TokenKind, - span: Span<'a>, -} -// impl<'a> Token<'a> { -impl<'a> Token<'a> { - fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { - Token { kind, span } - } -} +pub(crate) type Span<'a> = LocatedSpan<&'a str>; /// All the tokens the input gets parsed into. #[derive(Debug, PartialEq, Copy, Clone)] -enum TokenKind { +pub(crate) enum TokenKind { // Syntactic symbols: /// '?' QuestionMark, @@ -282,14 +39,20 @@ enum TokenKind { Comma, /// ':' Colon, - /// ';' - Semicolon, + /// `:-` + Arrow, /// '>' Greater, + /// `>=` + GreaterEqual, /// '=' Equal, + /// `<=` + LessEqual, /// '<' Less, + /// `!=` + Unequal, /// '~' Tilde, /// '^' @@ -308,292 +71,474 @@ enum TokenKind { Star, /// '/' Slash, - /// '$' - Dollar, - /// '&' - Ampersand, - /// "'" - Apostrophe, // Multi-char tokens: /// Identifier for keywords and predicate names Ident, - /// All other Utf8 characters that can be used in an IRI - UcsChars, - /// Characters in private use areas - Iprivate, - /// Percent-encoded characters in IRIs - PctEncoded, + /// IRI, delimited with `<` and `>` + Iri, /// Base 10 digits Number, - /// A string literal + /// A string literal, delimited with `"` String, /// A comment, starting with `%` Comment, /// A comment, starting with `%%` DocComment, - /// bool: ends_with_newline + /// ` `, `\t`, `\r` or `\n` Whitespace, + /// base directive keyword + Base, + /// prefix directive keyword + Prefix, + /// import directive keyword + Import, + /// export directive keyword + Export, + /// output directive keyword + Output, /// catch all token Illegal, /// signals end of file Eof, } +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) struct Token<'a> { + pub(crate) kind: TokenKind, + pub(crate) span: Span<'a>, +} +impl<'a> Token<'a> { + fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { + Token { kind, span } + } +} + +macro_rules! syntax { + ($func_name: ident, $tag_string: literal, $token: expr) => { + pub(crate) fn $func_name<'a>(input: Span) -> IResult { + map(tag($tag_string), |span| Token::new($token, span))(input) + } + }; +} + +syntax!(open_paren, "(", TokenKind::OpenParen); +syntax!(close_paren, ")", TokenKind::CloseParen); +syntax!(open_bracket, "[", TokenKind::OpenBracket); +syntax!(close_bracket, "]", TokenKind::CloseBracket); +syntax!(open_brace, "{", TokenKind::OpenBrace); +syntax!(close_brace, "}", TokenKind::CloseBrace); +syntax!(dot, ".", TokenKind::Dot); +syntax!(comma, ",", TokenKind::Comma); +syntax!(colon, ":", TokenKind::Colon); +syntax!(arrow, ":-", TokenKind::Arrow); +syntax!(question_mark, "?", TokenKind::QuestionMark); +syntax!(exclamation_mark, "!", TokenKind::ExclamationMark); +syntax!(tilde, "~", TokenKind::Tilde); +syntax!(caret, "^", TokenKind::Caret); +syntax!(hash, "#", TokenKind::Hash); +syntax!(underscore, "_", TokenKind::Underscore); +syntax!(at, "@", TokenKind::At); + +pub(crate) fn lex_punctuations(input: Span) -> IResult { + alt(( + arrow, + open_paren, + close_paren, + open_bracket, + close_bracket, + open_brace, + close_brace, + dot, + comma, + colon, + question_mark, + exclamation_mark, + tilde, + caret, + hash, + underscore, + at, + ))(input) +} + +syntax!(less, "<", TokenKind::Less); +syntax!(less_equal, "<=", TokenKind::LessEqual); +syntax!(equal, "=", TokenKind::Equal); +syntax!(greater_equal, ">=", TokenKind::GreaterEqual); +syntax!(greater, ">", TokenKind::Greater); +syntax!(unequals, "!=", TokenKind::Unequal); +syntax!(plus, "+", TokenKind::Plus); +syntax!(minus, "-", TokenKind::Minus); +syntax!(star, "*", TokenKind::Star); +syntax!(slash, "/", TokenKind::Slash); + +pub(crate) fn lex_operators(input: Span) -> IResult { + alt(( + less_equal, + greater_equal, + unequals, + less, + equal, + greater, + plus, + minus, + star, + slash, + ))(input) +} + +pub(crate) fn lex_ident(input: Span) -> IResult { + let (rest, result) = recognize(pair( + alpha1, + many0(alt((alphanumeric1, tag("_"), tag("-")))), + ))(input)?; + let token = match *result.fragment() { + "base" => Token::new(TokenKind::Base, result), + "prefix" => Token::new(TokenKind::Prefix, result), + "import" => Token::new(TokenKind::Import, result), + "export" => Token::new(TokenKind::Export, result), + "output" => Token::new(TokenKind::Output, result), + _ => Token::new(TokenKind::Ident, result), + }; + Ok((rest, token)) +} + +pub(crate) fn lex_iri(input: Span) -> IResult { + recognize(delimited(tag("<"), is_not("> \n"), tag(">")))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result))) +} + +pub(crate) fn lex_number(input: Span) -> IResult { + digit1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Number, result))) +} + +pub(crate) fn lex_string(input: Span) -> IResult { + recognize(delimited(tag("\""), is_not("\""), tag("\"")))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::String, result))) +} + +pub(crate) fn lex_comment(input: Span) -> IResult { + recognize(tuple((tag("%"), many0(is_not("\r\n")), line_ending)))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result))) +} + +pub(crate) fn lex_doc_comment(input: Span) -> IResult { + recognize(tuple((tag("%%"), many0(is_not("\r\n")), line_ending)))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result))) +} + +pub(crate) fn lex_whitespace(input: Span) -> IResult { + multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result))) +} + +pub(crate) fn lex_illegal(input: Span) -> IResult { + take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result))) +} + +pub(crate) fn lex_tokens(input: Span) -> IResult> { + all_consuming(many0(alt(( + lex_iri, + lex_operators, + lex_punctuations, + lex_ident, + lex_number, + lex_string, + lex_comment, + lex_whitespace, + lex_illegal, + ))))(input) + .map(|(span, mut vec)| { + vec.append(&mut vec![Token::new(TokenKind::Eof, span)]); + (span, vec) + }) +} + #[cfg(test)] mod test { use super::TokenKind::*; - use crate::io::lexer::{Lexer, Span, Token}; + use super::*; + + macro_rules! T { + ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { + Token::new($tok_kind, unsafe { + Span::new_from_raw_offset($offset, $line, $str, ()) + }) + }; + } #[test] fn empty_input() { - let mut lexer = Lexer::new(""); - assert_eq!( - lexer.get_tokens(), - vec![Token::new(Eof, Span::new(0, 1, ""))] - ) + let input = Span::new(""); + assert_eq!(lex_tokens(input).unwrap().1, vec![T!(Eof, 0, 1, "")]) } #[test] fn base() { - let mut lexer = Lexer::new("@base"); + let input = Span::new("@base"); assert_eq!( - lexer.get_tokens(), - vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "base")), - Token::new(Eof, Span::new(5, 1, "")), - ] + lex_tokens(input).unwrap().1, + vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } #[test] fn prefix() { - let mut lexer = Lexer::new("@prefix"); + let input = Span::new("@prefix"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "prefix")), - Token::new(Eof, Span::new(7, 1, "")), + T!(At, 0, 1, "@"), + T!(Prefix, 1, 1, "prefix"), + T!(Eof, 7, 1, ""), ] ) } #[test] fn output() { - let mut lexer = Lexer::new("@output"); + let input = Span::new("@output"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "output")), - Token::new(Eof, Span::new(7, 1, "")), + T!(At, 0, 1, "@"), + T!(Output, 1, 1, "output"), + T!(Eof, 7, 1, ""), ] ) } #[test] fn import() { - let mut lexer = Lexer::new("@import"); + let input = Span::new("@import"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "import")), - Token::new(Eof, Span::new(7, 1, "")), + T!(At, 0, 1, "@"), + T!(Import, 1, 1, "import"), + T!(Eof, 7, 1, ""), ] ) } #[test] fn export() { - let mut lexer = Lexer::new("@export"); + let input = Span::new("@export"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "export")), - Token::new(Eof, Span::new(7, 1, "")), + T!(At, 0, 1, "@"), + T!(Export, 1, 1, "export"), + T!(Eof, 7, 1, ""), ] ) } #[test] - fn tokenize() { - let mut lexer = Lexer::new("P(?X) :- A(?X).\t\n A(Human)."); + fn idents_with_keyword_prefix() { + let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Ident, Span::new(0, 1, "P")), - Token::new(OpenParen, Span::new(1, 1, "(")), - Token::new(QuestionMark, Span::new(2, 1, "?")), - Token::new(Ident, Span::new(3, 1, "X")), - Token::new(CloseParen, Span::new(4, 1, ")")), - Token::new(Whitespace, Span::new(5, 1, " ")), - Token::new(Colon, Span::new(6, 1, ":")), - Token::new(Minus, Span::new(7, 1, "-")), - Token::new(Whitespace, Span::new(8, 1, " ")), - Token::new(Ident, Span::new(9, 1, "A")), - Token::new(OpenParen, Span::new(10, 1, "(")), - Token::new(QuestionMark, Span::new(11, 1, "?")), - Token::new(Ident, Span::new(12, 1, "X")), - Token::new(CloseParen, Span::new(13, 1, ")")), - Token::new(Dot, Span::new(14, 1, ".")), - Token::new(Whitespace, Span::new(15, 1, "\t\n")), - Token::new(Whitespace, Span::new(17, 2, " ")), - Token::new(Ident, Span::new(21, 2, "A")), - Token::new(OpenParen, Span::new(22, 2, "(")), - Token::new(Ident, Span::new(23, 2, "Human")), - Token::new(CloseParen, Span::new(28, 2, ")")), - Token::new(Dot, Span::new(29, 2, ".")), - Token::new(Eof, Span::new(30, 2, "")), + T!(At, 0, 1, "@"), + T!(Ident, 1, 1, "baseA"), + T!(Comma, 6, 1, ","), + T!(Whitespace, 7, 1, " "), + T!(At, 8, 1, "@"), + T!(Ident, 9, 1, "prefixB"), + T!(Comma, 16, 1, ","), + T!(Whitespace, 17, 1, " "), + T!(At, 18, 1, "@"), + T!(Ident, 19, 1, "importC"), + T!(Comma, 26, 1, ","), + T!(Whitespace, 27, 1, " "), + T!(At, 28, 1, "@"), + T!(Ident, 29, 1, "exportD"), + T!(Comma, 36, 1, ","), + T!(Whitespace, 37, 1, " "), + T!(At, 38, 1, "@"), + T!(Ident, 39, 1, "outputE"), + T!(Dot, 46, 1, "."), + T!(Eof, 47, 1, ""), ] ) } #[test] - fn comment() { - let mut lexer = Lexer::new("% Some Comment\n"); + fn tokenize() { + let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Comment, Span::new(0, 1, "% Some Comment\n")), - Token::new(Eof, Span::new(15, 2, "")) + T!(Ident, 0, 1, "P"), + T!(OpenParen, 1, 1, "("), + T!(QuestionMark, 2, 1, "?"), + T!(Ident, 3, 1, "X"), + T!(CloseParen, 4, 1, ")"), + T!(Whitespace, 5, 1, " "), + T!(Arrow, 6, 1, ":-"), + T!(Whitespace, 8, 1, " "), + T!(Ident, 9, 1, "A"), + T!(OpenParen, 10, 1, "("), + T!(QuestionMark, 11, 1, "?"), + T!(Ident, 12, 1, "X"), + T!(CloseParen, 13, 1, ")"), + T!(Dot, 14, 1, "."), + T!(Whitespace, 15, 1, "\t\n "), + T!(Ident, 21, 2, "A"), + T!(OpenParen, 22, 2, "("), + T!(Ident, 23, 2, "Human"), + T!(CloseParen, 28, 2, ")"), + T!(Dot, 29, 2, "."), + T!(Eof, 30, 2, ""), ] ) } #[test] - fn pct_enc_with_comment() { - let mut lexer = Lexer::new("%38%a3% Some Comment\n"); + fn comment() { + let input = Span::new("% Some Comment\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(PctEncoded, Span::new(0, 1, "%38")), - Token::new(PctEncoded, Span::new(3, 1, "%a3")), - Token::new(Comment, Span::new(6, 1, "% Some Comment\n")), - Token::new(Eof, Span::new(21, 2, "")), + T!(Comment, 0, 1, "% Some Comment\n"), + T!(Eof, 15, 2, ""), + // T!(Comment, Span::new(0, 1, "% Some Comment\n")), + // T!(Eof, Span::new(15, 2, "")) ] ) } #[test] fn ident() { - let mut lexer = Lexer::new("some_Ident(Alice). %comment at the end of a line\n"); + let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Ident, Span::new(0, 1, "some_Ident")), - Token::new(OpenParen, Span::new(10, 1, "(")), - Token::new(Ident, Span::new(11, 1, "Alice")), - Token::new(CloseParen, Span::new(16, 1, ")")), - Token::new(Dot, Span::new(17, 1, ".")), - Token::new(Whitespace, Span::new(18, 1, " ")), - Token::new(Comment, Span::new(19, 1, "%comment at the end of a line\n")), - Token::new(Eof, Span::new(49, 2, "")), + T!(Ident, 0, 1, "some_Ident"), + T!(OpenParen, 10, 1, "("), + T!(Ident, 11, 1, "Alice"), + T!(CloseParen, 16, 1, ")"), + T!(Dot, 17, 1, "."), + T!(Whitespace, 18, 1, " "), + T!(Comment, 19, 1, "%comment at the end of a line\n"), + T!(Eof, 49, 2, ""), ] ) } #[test] - #[should_panic] fn forbidden_ident() { - let mut lexer = Lexer::new("_someIdent(Alice). %comment at the end of a line\n"); + let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Ident, Span::new(0, 1, "_someIdent")), - Token::new(OpenParen, Span::new(10, 1, "(")), - Token::new(Ident, Span::new(11, 1, "Alice")), - Token::new(CloseParen, Span::new(16, 1, ")")), - Token::new(Dot, Span::new(17, 1, ".")), - Token::new(Whitespace, Span::new(18, 1, " ")), - Token::new(Comment, Span::new(19, 1, "%comment at the end of a line\n")), - Token::new(Eof, Span::new(49, 2, "")), + T!(Underscore, 0, 1, "_"), + T!(Ident, 1, 1, "someIdent"), + T!(OpenParen, 10, 1, "("), + T!(Ident, 11, 1, "Alice"), + T!(CloseParen, 16, 1, ")"), + T!(Dot, 17, 1, "."), + T!(Whitespace, 18, 1, " "), + T!(Comment, 19, 1, "%comment at the end of a line\n"), + T!(Eof, 49, 2, ""), ] ) } #[test] fn iri() { - let mut lexer = Lexer::new(""); + let input = Span::new(""); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Less, Span::new(0, 1, "<")), - Token::new(Ident, Span::new(1, 1, "https")), - Token::new(Colon, Span::new(6, 1, ":")), - Token::new(Slash, Span::new(7, 1, "/")), - Token::new(Slash, Span::new(8, 1, "/")), - Token::new(Ident, Span::new(9, 1, "résumé")), - Token::new(Dot, Span::new(17, 1, ".")), - Token::new(Ident, Span::new(18, 1, "example")), - Token::new(Dot, Span::new(25, 1, ".")), - Token::new(Ident, Span::new(26, 1, "org")), - Token::new(Slash, Span::new(29, 1, "/")), - Token::new(Greater, Span::new(30, 1, ">")), - Token::new(Eof, Span::new(31, 1, "")), + T!(Iri, 0, 1, ""), + T!(Eof, 31, 1, ""), ] ) } #[test] fn iri_pct_enc() { - let mut lexer = Lexer::new("\n"); + let input = Span::new("\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Less, Span::new(0, 1, "<")), - Token::new(Ident, Span::new(1, 1, "http")), - Token::new(Colon, Span::new(5, 1, ":")), - Token::new(Slash, Span::new(6, 1, "/")), - Token::new(Slash, Span::new(7, 1, "/")), - Token::new(Ident, Span::new(8, 1, "r")), - Token::new(PctEncoded, Span::new(9, 1, "%C3")), - Token::new(PctEncoded, Span::new(12, 1, "%A9")), - Token::new(Ident, Span::new(15, 1, "sum")), - Token::new(PctEncoded, Span::new(18, 1, "%C3")), - Token::new(PctEncoded, Span::new(21, 1, "%A9")), - Token::new(Dot, Span::new(24, 1, ".")), - Token::new(Ident, Span::new(25, 1, "example")), - Token::new(Dot, Span::new(32, 1, ".")), - Token::new(Ident, Span::new(33, 1, "org")), - Token::new(Greater, Span::new(36, 1, ">")), - Token::new(Whitespace, Span::new(37, 1, "\n")), - Token::new(Eof, Span::new(38, 2, "")), + T!(Iri, 0, 1, ""), + T!(Whitespace, 37, 1, "\n"), + T!(Eof, 38, 2, ""), + ] + ) + } + + // FIXME: change the name of this test according to the correct name for `?X > 3` + // (Constraints are Rules with an empty Head) + #[test] + fn constraints() { + let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); + assert_eq!( + lex_tokens(input).unwrap().1, + vec![ + T!(Ident, 0, 1, "A"), + T!(OpenParen, 1, 1, "("), + T!(QuestionMark, 2, 1, "?"), + T!(Ident, 3, 1, "X"), + T!(CloseParen, 4, 1, ")"), + T!(Arrow, 5, 1, ":-"), + T!(Ident, 7, 1, "B"), + T!(OpenParen, 8, 1, "("), + T!(QuestionMark, 9, 1, "?"), + T!(Ident, 10, 1, "X"), + T!(CloseParen, 11, 1, ")"), + T!(Comma, 12, 1, ","), + T!(QuestionMark, 13, 1, "?"), + T!(Ident, 14, 1, "X"), + T!(Less, 15, 1, "<"), + T!(Number, 16, 1, "42"), + T!(Comma, 18, 1, ","), + T!(QuestionMark, 19, 1, "?"), + T!(Ident, 20, 1, "X"), + T!(Greater, 21, 1, ">"), + T!(Number, 22, 1, "3"), + T!(Dot, 23, 1, "."), + T!(Eof, 24, 1, ""), ] ) } #[test] fn pct_enc_comment() { - let mut lexer = Lexer::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); + let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(PctEncoded, Span::new(0, 1, "%d4")), - Token::new(Whitespace, Span::new(3, 1, " ")), - Token::new(Ident, Span::new(4, 1, "this")), - Token::new(Whitespace, Span::new(8, 1, " ")), - Token::new(Ident, Span::new(9, 1, "should")), - Token::new(Whitespace, Span::new(15, 1, " ")), - Token::new(Ident, Span::new(16, 1, "be")), - Token::new(Whitespace, Span::new(18, 1, " ")), - Token::new(Ident, Span::new(19, 1, "a")), - Token::new(Whitespace, Span::new(20, 1, " ")), - Token::new(Ident, Span::new(21, 1, "comment")), - Token::new(Comma, Span::new(28, 1, ",")), - Token::new(Whitespace, Span::new(29, 1, "\n")), - Token::new( + T!(Comment, 0, 1, "%d4 this should be a comment,\n"), + T!( Comment, - Span::new( - 30, - 2, - "% but the lexer can't distinguish a percent encoded value\n" - ) + 30, + 2, + "% but the lexer can't distinguish a percent encoded value\n" ), - Token::new(Comment, Span::new(88, 3, "% in an iri from a comment :(\n")), - Token::new(Eof, Span::new(118, 4, "")), + T!(Comment, 88, 3, "% in an iri from a comment :(\n"), + T!(Eof, 118, 4, ""), + ] + ) + } + + #[test] + fn fact() { + let input = Span::new("somePred(term1, term2)."); + assert_eq!( + lex_tokens(input).unwrap().1, + vec![ + T!(Ident, 0, 1, "somePred"), + T!(OpenParen, 8, 1, "("), + T!(Ident, 9, 1, "term1"), + T!(Comma, 14, 1, ","), + T!(Whitespace, 15, 1, " "), + T!(Ident, 16, 1, "term2"), + T!(CloseParen, 21, 1, ")"), + T!(Dot, 22, 1, "."), + T!(Eof, 23, 1, ""), ] ) } From 9888858712541b57c380ea0bc07492fa89d2905a Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:31:26 +0100 Subject: [PATCH 007/214] Add ast module and start to add new parser functions --- nemo/src/io/parser.rs | 112 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index f90abdccb..cd90bb337 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -22,6 +22,7 @@ use nom::{ use macros::traced; +mod ast; mod types; use types::{ConstraintOperator, IntermediateResult, Span}; @@ -2426,3 +2427,114 @@ mod test { ); } } + +// NEW PARSER +mod new { + use super::ast::{self, Term}; + use super::types::Tokens; + use crate::io::lexer::{self, lex_ident, Span, TokenKind}; + use nom::{ + branch::alt, + bytes::complete::take, + combinator::verify, + multi::{many1, separated_list0}, + sequence::tuple, + IResult, + }; + + macro_rules! tag_token ( + ($func_name:ident, $tag: expr) => ( + fn $func_name(tokens: Tokens) -> IResult { + verify(take(1usize), |t: &Tokens| t.tok[0].kind == $tag)(tokens) + } + ) + ); + + // tag_token!(ident, TokenKind::Ident); + // tag_token!(open_paren, TokenKind::OpenParen); + // tag_token!(close_paren, TokenKind::CloseParen); + // tag_token!(comma, TokenKind::Comma); + // tag_token!(dot, TokenKind::Dot); + + fn parse_program<'a>(input: Span<'a>) -> ast::Program<'a> { + let (_, statements) = + // many1(alt((/*parse_rule, */parse_fact/*, parse_directive*/)))(input).unwrap(); + many1(parse_fact)(input).unwrap(); + dbg!(&statements); + let mut program = ast::Program::new(); + for statement in statements { + program.push(statement) + } + program + } + + // fn parse_rule<'a>(input: Tokens) -> IResult> {} + + fn parse_fact<'a>(input: Span<'a>) -> IResult> { + tuple(( + lex_ident, + lexer::open_paren, + separated_list0(lexer::comma, lex_ident), + lexer::close_paren, + lexer::dot, + ))(input) + .map(|(rest, result)| { + let mut terms = Vec::new(); + for x in result.2 { + terms.push(Term::Primitive(x)) + } + ( + rest, + ast::Statement::Fact { + atom: ast::Atom::Atom { + predicate: result.0, + terms, + }, + }, + ) + }) + } + + // fn parse_directive<'a>(input: Tokens) -> IResult> {} + + #[cfg(test)] + mod test { + use super::*; + use crate::io::{lexer::*, parser::ast::*}; + + macro_rules! S { + ($offset:literal,$line:literal,$str:literal) => { + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } + }; + } + + #[test] + fn fact() { + // let input = Tokens { + // tok: &lex_tokens(Span::new("a(B,C).")).unwrap().1, + // }; + let input = Span::new("a(B,C)."); + assert_eq!( + parse_program(input), + vec![ast::Statement::Fact { + atom: ast::Atom::Atom { + predicate: Token { + kind: TokenKind::Ident, + span: S!(0, 1, "a"), + }, + terms: vec![ + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(2, 1, "B"), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(4, 1, "C"), + }), + ], + }, + }] + ) + } + } +} From 8467a73b4599aa17f8f12565ecac8d5bc209d99d Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:32:47 +0100 Subject: [PATCH 008/214] Add Tokens type --- nemo/src/io/parser/types.rs | 162 ++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index dc40559b2..a65730809 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -422,3 +422,165 @@ impl FromExternalError, DataValueCreationError> for LocatedParseError { ParseError::ExternalError(Box::new(e.into())).at(input) } } + +use crate::io::lexer::Token; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) struct Tokens<'a> { + pub(crate) tok: &'a [Token<'a>], +} +impl<'a> Tokens<'a> { + fn new(vec: &'a [Token]) -> Tokens<'a> { + Tokens { tok: vec } + } +} +impl<'a> nom::AsBytes for Tokens<'a> { + fn as_bytes(&self) -> &[u8] { + todo!() + } +} +impl<'a, T> nom::Compare for Tokens<'a> { + fn compare(&self, t: T) -> nom::CompareResult { + todo!() + } + + fn compare_no_case(&self, t: T) -> nom::CompareResult { + todo!() + } +} +// impl<'a> nom::ExtendInto for Tokens<'a> { +// type Item; + +// type Extender; + +// fn new_builder(&self) -> Self::Extender { +// todo!() +// } + +// fn extend_into(&self, acc: &mut Self::Extender) { +// todo!() +// } +// } +impl<'a, T> nom::FindSubstring for Tokens<'a> { + fn find_substring(&self, substr: T) -> Option { + todo!() + } +} +impl<'a, T> nom::FindToken for Tokens<'a> { + fn find_token(&self, token: T) -> bool { + todo!() + } +} +impl<'a> nom::InputIter for Tokens<'a> { + type Item = &'a Token<'a>; + + type Iter = std::iter::Enumerate<::std::slice::Iter<'a, Token<'a>>>; + + type IterElem = ::std::slice::Iter<'a, Token<'a>>; + + fn iter_indices(&self) -> Self::Iter { + self.tok.iter().enumerate() + } + + fn iter_elements(&self) -> Self::IterElem { + self.tok.iter() + } + + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + self.tok.iter().position(predicate) + } + + fn slice_index(&self, count: usize) -> Result { + if self.tok.len() >= count { + Ok(count) + } else { + Err(nom::Needed::Unknown) + } + } +} +impl<'a> nom::InputLength for Tokens<'a> { + fn input_len(&self) -> usize { + self.tok.len() + } +} +impl<'a> nom::InputTake for Tokens<'a> { + fn take(&self, count: usize) -> Self { + Tokens { + tok: &self.tok[0..count], + } + } + + fn take_split(&self, count: usize) -> (Self, Self) { + ( + Tokens { + tok: &self.tok[count..self.tok.len()], + }, + Tokens { + tok: &self.tok[0..count], + }, + ) + } +} +impl<'a> nom::InputTakeAtPosition for Tokens<'a> { + type Item = &'a Token<'a>; + + fn split_at_position>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn split_at_position1>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn split_at_position_complete>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn split_at_position1_complete>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } +} +impl<'a> nom::Offset for Tokens<'a> { + fn offset(&self, second: &Self) -> usize { + todo!() + } +} +impl<'a, R> nom::ParseTo for Tokens<'a> { + fn parse_to(&self) -> Option { + todo!() + } +} +impl<'a, R> nom::Slice for Tokens<'a> { + fn slice(&self, range: R) -> Self { + todo!() + } +} From 51a82fb4f2ca699ab20c4fdfa06e6235176c55ea Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:34:02 +0100 Subject: [PATCH 009/214] Change mod visibility to pub(crate) --- nemo-physical/src/datavalues.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo-physical/src/datavalues.rs b/nemo-physical/src/datavalues.rs index e4f307c2b..37486431f 100644 --- a/nemo-physical/src/datavalues.rs +++ b/nemo-physical/src/datavalues.rs @@ -2,7 +2,7 @@ //! Data values are conceived on this level as canonical representations of unique (semantic) //! values across a number of domains (integers, strings, etc.). -pub(crate) mod syntax; +pub mod syntax; /// Module to define the general [DataValue] trait. pub mod datavalue; From d132d62c993f3fc93ee9873052c0c73bd9e0737e Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:35:00 +0100 Subject: [PATCH 010/214] Add import to physical syntax module --- nemo/src/model/rule_model/syntax.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/src/model/rule_model/syntax.rs b/nemo/src/model/rule_model/syntax.rs index e3b9cad78..7f2f38326 100644 --- a/nemo/src/model/rule_model/syntax.rs +++ b/nemo/src/model/rule_model/syntax.rs @@ -1,6 +1,7 @@ //! Constants for strings that are relevant to the syntax of rules. //! These are kept in one location, since they are required in various //! places related to parsing and display. +use nemo_physical::datavalues::syntax; /// The "predicate name" used for the CSV format in import/export directives. pub(crate) const FILE_FORMAT_CSV: &str = "csv"; From e2c07497cb1390c0a743aaf03b430c810fbdee36 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 13:31:56 +0100 Subject: [PATCH 011/214] Rename unequals to unequal --- nemo/src/io/lexer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index c0b6ccb0d..5bf30cd6e 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -166,7 +166,7 @@ syntax!(less_equal, "<=", TokenKind::LessEqual); syntax!(equal, "=", TokenKind::Equal); syntax!(greater_equal, ">=", TokenKind::GreaterEqual); syntax!(greater, ">", TokenKind::Greater); -syntax!(unequals, "!=", TokenKind::Unequal); +syntax!(unequal, "!=", TokenKind::Unequal); syntax!(plus, "+", TokenKind::Plus); syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); @@ -176,7 +176,7 @@ pub(crate) fn lex_operators(input: Span) -> IResult { alt(( less_equal, greater_equal, - unequals, + unequal, less, equal, greater, From 21f1d43bc324b5a1d91d15be23ca00b69a505acd Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 13:35:17 +0100 Subject: [PATCH 012/214] Unify functions and predicates with NamedTuple and refactor rule literals into NegativeAtom --- nemo/src/io/parser/ast.rs | 82 +++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 02c9c2d93..48589ed38 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -10,7 +10,7 @@ struct Position { pub(crate) type Program<'a> = Vec>; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Statement<'a> { Directive(Directive<'a>), Fact { @@ -18,11 +18,11 @@ pub(crate) enum Statement<'a> { }, Rule { head: Vec>, - body: Vec>, + body: Vec>, }, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Directive<'a> { Base { kw: Token<'a>, @@ -43,18 +43,17 @@ pub(crate) enum Directive<'a> { predicate: Token<'a>, map: Map<'a>, }, + // maybe will be deprecated Output { kw: Token<'a>, predicates: Vec>, }, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Atom<'a> { - Atom { - predicate: Token<'a>, - terms: Vec>, - }, + Atom(NamedTuple<'a>), + NegativeAtom(NamedTuple<'a>), InfixAtom { operation: Token<'a>, lhs: Term<'a>, @@ -63,47 +62,55 @@ pub(crate) enum Atom<'a> { Map(Map<'a>), } -#[derive(Debug, PartialEq)] -pub(crate) enum Literal<'a> { - Positive(Atom<'a>), - Negative(Atom<'a>), -} - -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Term<'a> { Primitive(Token<'a>), + Unary { + operation: Token<'a>, + term: Box>, + }, Binary { operation: Token<'a>, lhs: Box>, rhs: Box>, }, - Unary { - operation: Token<'a>, - term: Box>, - }, Aggregation { operation: Token<'a>, terms: Vec>, }, - Function { - identifier: Token<'a>, - terms: Vec>, - }, + Function(NamedTuple<'a>), Map(Map<'a>), } -#[derive(Debug, PartialEq)] -struct Map<'a> { - identifier: Option>, - pairs: BTreeMap, Term<'a>>, +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct NamedTuple<'a> { + pub(crate) identifier: Token<'a>, + pub(crate) terms: Vec>, +} + +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct Map<'a> { + pub(crate) identifier: Option>, + pub(crate) pairs: Vec, Term<'a>>>, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct Pair { + key: K, + value: V, +} +impl Pair { + pub fn new(key: K, value: V) -> Pair { + Pair { key, value } + } +} + +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Node<'a> { Statement(&'a Statement<'a>), Directive(&'a Directive<'a>), RuleHead(&'a Vec>), - RuleBody(&'a Vec>), + RuleBody(&'a Vec>), Atom(&'a Atom<'a>), Term(&'a Term<'a>), Terms(&'a Vec>), @@ -118,7 +125,7 @@ pub(crate) enum Node<'a> { Lhs(&'a Term<'a>), Rhs(&'a Term<'a>), Identifier(&'a Token<'a>), - Pairs(&'a BTreeMap, Term<'a>>), + Pairs(&'a Vec, Term<'a>>>), MapIdentifier(&'a Option>), Primitive(&'a Token<'a>), } @@ -226,8 +233,17 @@ impl<'a> AstNode for Directive<'a> { impl<'a> AstNode for Atom<'a> { fn children(&self) -> Vec { match self { - Atom::Atom { predicate, terms } => { - vec![Node::KeyWord(predicate), Node::Terms(terms)] + Atom::Atom(named_tuple) => { + vec![ + Node::Identifier(&named_tuple.identifier), + Node::Terms(&named_tuple.terms), + ] + } + Atom::NegativeAtom(named_tuple) => { + vec![ + Node::Identifier(&named_tuple.identifier), + Node::Terms(&named_tuple.terms), + ] } Atom::InfixAtom { operation, @@ -256,7 +272,7 @@ impl<'a> AstNode for Term<'a> { Term::Aggregation { operation, terms } => { vec![Node::Operation(operation), Node::Terms(terms)] } - Term::Function { identifier, terms } => { + Term::Function(NamedTuple { identifier, terms }) => { vec![Node::Identifier(identifier), Node::Terms(terms)] } Term::Map(map) => map.children(), From ee1e4264598891c2642b0e44ea59dab723cf7d3f Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 18:57:09 +0100 Subject: [PATCH 013/214] Add Variable TokenKind --- nemo/src/io/lexer.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 5bf30cd6e..216f732bf 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -72,8 +72,10 @@ pub(crate) enum TokenKind { /// '/' Slash, // Multi-char tokens: - /// Identifier for keywords and predicate names + /// Identifier for keywords and names Ident, + /// Variable, + Variable, /// IRI, delimited with `<` and `>` Iri, /// Base 10 digits From b862d453b0ea2c9bfa30db72ba41dda5b78ee2e6 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 18:58:38 +0100 Subject: [PATCH 014/214] Add Variable Term and change Pair field visibility --- nemo/src/io/parser/ast.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 48589ed38..7a2a2eebd 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -65,6 +65,7 @@ pub(crate) enum Atom<'a> { #[derive(Debug, PartialEq, Clone)] pub(crate) enum Term<'a> { Primitive(Token<'a>), + Variable(Token<'a>), Unary { operation: Token<'a>, term: Box>, @@ -96,8 +97,8 @@ pub(crate) struct Map<'a> { #[derive(Debug, PartialEq, Clone)] pub(crate) struct Pair { - key: K, - value: V, + pub(crate) key: K, + pub(crate) value: V, } impl Pair { pub fn new(key: K, value: V) -> Pair { @@ -128,6 +129,7 @@ pub(crate) enum Node<'a> { Pairs(&'a Vec, Term<'a>>>), MapIdentifier(&'a Option>), Primitive(&'a Token<'a>), + Variable(&'a Token<'a>), } trait AstNode { @@ -262,7 +264,8 @@ impl<'a> AstNode for Atom<'a> { impl<'a> AstNode for Term<'a> { fn children(&self) -> Vec { match self { - Term::Primitive(prim) => vec![Node::Primitive(prim)], + Term::Primitive(primitive) => vec![Node::Primitive(primitive)], + Term::Variable(var) => vec![Node::Variable(var)], Term::Binary { operation, lhs, From 2aeabb58bb3c94323988121a1a226edea18a61d2 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 18:59:21 +0100 Subject: [PATCH 015/214] Add str_concat crate --- Cargo.lock | 7 +++++++ nemo/Cargo.toml | 1 + 2 files changed, 8 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 46134e7f8..50e220b21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1183,6 +1183,7 @@ dependencies = [ "sanitise-file-name", "serde", "serde_json", + "str-concat", "test-log", "thiserror", "tokio", @@ -2071,6 +2072,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "str-concat" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3468939e48401c4fe3cdf5e5cef50951c2808ed549d1467fde249f1fcb602634" + [[package]] name = "streaming-iterator" version = "0.1.9" diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 173a3de22..411883468 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -46,6 +46,7 @@ serde_json = "1.0.108" serde = {version = "1.0.138", features = ["derive"] } dyn-clone = "1.0.16" unicode-ident = "1.0.12" +str-concat = "0.2.0" [dev-dependencies] env_logger = "*" From 91e8288c845d28725b6a08e8a9d87076d7cc5a0c Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 19:02:00 +0100 Subject: [PATCH 016/214] Add parser functions and tests --- nemo/src/io/parser.rs | 484 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 445 insertions(+), 39 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index cd90bb337..67121271b 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2428,11 +2428,23 @@ mod test { } } -// NEW PARSER +/// NEW PARSER mod new { - use super::ast::{self, Term}; + use std::collections::{BTreeMap, HashMap}; + use std::ffi::c_ushort; + + use super::ast::{self, NamedTuple, Pair, Term}; use super::types::Tokens; - use crate::io::lexer::{self, lex_ident, Span, TokenKind}; + use crate::io::lexer::{ + self, close_brace, comma, equal, greater, greater_equal, less, less_equal, lex_comment, + lex_doc_comment, lex_ident, lex_iri, lex_number, lex_string, lex_whitespace, open_brace, + unequal, Span, Token, TokenKind, + }; + use nom::combinator::{all_consuming, opt}; + use nom::error::ParseError; + use nom::multi::{many0, separated_list1}; + use nom::sequence::{delimited, pair}; + use nom::Parser; use nom::{ branch::alt, bytes::complete::take, @@ -2442,25 +2454,42 @@ mod new { IResult, }; - macro_rules! tag_token ( - ($func_name:ident, $tag: expr) => ( - fn $func_name(tokens: Tokens) -> IResult { - verify(take(1usize), |t: &Tokens| t.tok[0].kind == $tag)(tokens) - } - ) - ); + fn combine_spans<'a>(span1: Span<'a>, span2: Span<'a>) -> Result, str_concat::Error> { + // SAFETY: The concatenation of strings is okay, because they originated from the same source string. + // The raw offset is okay, because the offset of another span is used. + unsafe { + let fragment = str_concat::concat(span1.fragment(), span2.fragment())?; + Ok(Span::new_from_raw_offset( + span1.location_offset(), + span1.location_line(), + fragment, + (), + )) + } + } - // tag_token!(ident, TokenKind::Ident); - // tag_token!(open_paren, TokenKind::OpenParen); - // tag_token!(close_paren, TokenKind::CloseParen); - // tag_token!(comma, TokenKind::Comma); - // tag_token!(dot, TokenKind::Dot); + fn ignore_ws_and_comments<'a, F, O>( + inner: F, + ) -> impl FnMut(Span<'a>) -> IResult, O, nom::error::Error>> + where + F: Parser, O, nom::error::Error>> + + std::ops::FnMut(Span<'a>) -> IResult, O, nom::error::Error>>, + { + delimited( + many0(alt((lex_whitespace, lex_comment, lex_doc_comment))), + inner, + many0(alt((lex_whitespace, lex_comment, lex_doc_comment))), + ) + } fn parse_program<'a>(input: Span<'a>) -> ast::Program<'a> { - let (_, statements) = - // many1(alt((/*parse_rule, */parse_fact/*, parse_directive*/)))(input).unwrap(); - many1(parse_fact)(input).unwrap(); - dbg!(&statements); + let (_, statements) = all_consuming(many1(ignore_ws_and_comments(alt(( + parse_fact, + parse_rule, + parse_directive, + )))))(input) + .unwrap(); + // many0(parse_fact)(input).unwrap(); let mut program = ast::Program::new(); for statement in statements { program.push(statement) @@ -2468,34 +2497,245 @@ mod new { program } - // fn parse_rule<'a>(input: Tokens) -> IResult> {} - fn parse_fact<'a>(input: Span<'a>) -> IResult> { tuple(( - lex_ident, - lexer::open_paren, - separated_list0(lexer::comma, lex_ident), - lexer::close_paren, - lexer::dot, + ignore_ws_and_comments(parse_named_tuple), + ignore_ws_and_comments(lexer::dot), ))(input) - .map(|(rest, result)| { - let mut terms = Vec::new(); - for x in result.2 { - terms.push(Term::Primitive(x)) - } + .map(|(rest, (atom, _))| { ( rest, ast::Statement::Fact { - atom: ast::Atom::Atom { - predicate: result.0, - terms, - }, + atom: ast::Atom::Atom(atom), }, ) }) } - // fn parse_directive<'a>(input: Tokens) -> IResult> {} + fn parse_rule<'a>(input: Span<'a>) -> IResult> { + tuple(( + ignore_ws_and_comments(separated_list1( + lexer::comma, + ignore_ws_and_comments(parse_named_tuple), + )), + ignore_ws_and_comments(lexer::arrow), + ignore_ws_and_comments(separated_list1( + lexer::comma, + ignore_ws_and_comments(pair(opt(lexer::tilde), parse_named_tuple)), + )), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (head, _, body, _))| { + ( + rest, + ast::Statement::Rule { + head: head.iter().map(|x| ast::Atom::Atom(x.clone())).collect(), + body: body + .iter() + .map(|(tilde, atom)| { + if let None = tilde { + ast::Atom::Atom(atom.clone()) + } else { + ast::Atom::NegativeAtom(atom.clone()) + } + }) + .collect(), + }, + ) + }) + } + + fn parse_directive<'a>(input: Span<'a>) -> IResult> { + alt(( + ignore_ws_and_comments(parse_base_directive), + ignore_ws_and_comments(parse_prefix_directive), + ignore_ws_and_comments(parse_import_directive), + ignore_ws_and_comments(parse_export_directive), + ignore_ws_and_comments(parse_output_directive), + ))(input) + .map(|(rest, directive)| (rest, ast::Statement::Directive(directive))) + } + + fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Base), + ignore_ws_and_comments(lex_iri), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, base_iri, _))| (rest, ast::Directive::Base { kw, base_iri })) + } + + fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), + ignore_ws_and_comments(lex_ident), + ignore_ws_and_comments(lexer::colon), + ignore_ws_and_comments(lex_iri), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, prefix, _, prefix_iri, _))| { + ( + rest, + ast::Directive::Prefix { + kw, + prefix, + prefix_iri, + }, + ) + }) + } + + fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Import), + ignore_ws_and_comments(lex_ident), + ignore_ws_and_comments(lexer::arrow), + ignore_ws_and_comments(parse_map), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, predicate, _, map, _))| { + (rest, ast::Directive::Import { kw, predicate, map }) + }) + } + + fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Export), + ignore_ws_and_comments(lex_ident), + ignore_ws_and_comments(lexer::arrow), + ignore_ws_and_comments(parse_map), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, predicate, _, map, _))| { + (rest, ast::Directive::Export { kw, predicate, map }) + }) + } + + fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + ignore_ws_and_comments(separated_list0( + lexer::comma, + ignore_ws_and_comments(lex_ident), + )), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, predicates, _))| (rest, ast::Directive::Output { kw, predicates })) + } + + fn parse_atom<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_atom`!") + } + + fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_negative_atom`!") + } + + fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { + tuple((parse_term, parse_operation_token, parse_term))(input).map( + |(rest, (lhs, operation, rhs))| { + ( + rest, + ast::Atom::InfixAtom { + operation, + lhs, + rhs, + }, + ) + }, + ) + } + + fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + tuple(( + lex_ident, + lexer::open_paren, + // ignore_ws_and_comments(separated_list0(lexer::comma, parse_term)), + ignore_ws_and_comments(separated_list0(comma, ignore_ws_and_comments(parse_term))), + ignore_ws_and_comments(lexer::close_paren), + ))(input) + .map(|(rest, (identifier, _, terms, _))| (rest, NamedTuple { identifier, terms })) + } + + fn parse_map<'a>(input: Span<'a>) -> IResult> { + tuple(( + opt(lex_ident), + ignore_ws_and_comments(open_brace), + separated_list0( + ignore_ws_and_comments(comma), + ignore_ws_and_comments(tuple((parse_term, equal, parse_term))), + ), + ignore_ws_and_comments(close_brace), + ))(input) + .map(|(rest, (identifier, _, vec_of_pairs, _))| { + let mut pairs = Vec::new(); + for (key, _, value) in vec_of_pairs { + pairs.push(Pair::new(key, value)); + } + (rest, ast::Map { identifier, pairs }) + }) + } + + fn parse_term<'a>(input: Span<'a>) -> IResult> { + // alt(( + // parse_primitive_term, + // parse_unary_term, + // parse_binary_term, + // parse_aggregation_term, + // parse_function_term, + // parse_map_term, + // ))(input) + ignore_ws_and_comments(alt((parse_primitive_term, parse_variable)))(input) + } + + fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { + alt((lex_ident, lex_iri, lex_number, lex_string))(input) + .map(|(rest, term)| (rest, ast::Term::Primitive(term))) + } + + fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_unary_term`!") + } + + fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_binary_term`!") + } + + fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_aggregation_term`!") + } + + fn parse_function_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_function_term`!") + } + + fn parse_map_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_map_term`!") + } + + fn parse_variable<'a>(input: Span<'a>) -> IResult> { + ignore_ws_and_comments(pair(lexer::question_mark, lex_ident))(input).map( + |(rest, (question_mark, ident))| { + ( + rest, + ast::Term::Variable(Token { + kind: TokenKind::Variable, + span: combine_spans(question_mark.span, ident.span) + .expect("Spans were not adjacent in memory"), + }), + ) + }, + ) + } + + fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { + alt((equal, unequal, less, less_equal, greater, greater_equal))(input) + } #[cfg(test)] mod test { @@ -2517,8 +2757,8 @@ mod new { assert_eq!( parse_program(input), vec![ast::Statement::Fact { - atom: ast::Atom::Atom { - predicate: Token { + atom: ast::Atom::Atom(NamedTuple { + identifier: Token { kind: TokenKind::Ident, span: S!(0, 1, "a"), }, @@ -2532,9 +2772,175 @@ mod new { span: S!(4, 1, "C"), }), ], - }, + }), }] ) } + + #[test] + fn syntax() { + let input = Span::new( + r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a."#, + ); + assert_eq!( + parse_program(input), + vec![ + ast::Statement::Directive(Directive::Base { + kw: Token { + kind: TokenKind::Base, + span: S!(1, 1, "base"), + }, + base_iri: Token { + kind: TokenKind::Iri, + span: S!(6, 1, "") + } + }), + ast::Statement::Directive(Directive::Prefix { + kw: Token { + kind: TokenKind::Prefix, + span: S!(33, 1, "prefix"), + }, + prefix: Token { + kind: TokenKind::Ident, + span: S!(40, 1, "rdfs"), + }, + prefix_iri: Token { + kind: TokenKind::Iri, + span: S!(45, 1, ""), + }, + }), + ast::Statement::Directive(Directive::Import { + kw: Token { + kind: TokenKind::Import, + span: S!(86, 1, "import"), + }, + predicate: Token { + kind: TokenKind::Ident, + span: S!(93, 1, "sourceA"), + }, + map: Map { + identifier: Some(Token { + kind: TokenKind::Ident, + span: S!(102, 1, "csv") + }), + pairs: vec![Pair { + key: Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(106, 1, "resource"), + }), + value: Term::Primitive(Token { + kind: TokenKind::String, + span: S!(115, 1, "\"sources/dataA.csv\""), + }) + }], + }, + }), + ast::Statement::Directive(Directive::Export { + kw: Token { + kind: TokenKind::Export, + span: S!(137, 1, "export"), + }, + predicate: Token { + kind: TokenKind::Ident, + span: S!(144, 1, "a"), + }, + map: Map { + identifier: Some(Token { + kind: TokenKind::Ident, + span: S!(147, 1, "csv"), + }), + pairs: vec![] + } + }), + ast::Statement::Directive(Directive::Output { + kw: Token { + kind: TokenKind::Output, + span: S!(154, 1, "output") + }, + predicates: vec![Token { + kind: TokenKind::Ident, + span: S!(161, 1, "a") + }] + }), + ] + ) + } + + #[test] + fn ignore_ws_and_comments() { + let input = Span::new(" Hi %cool comment\n"); + assert_eq!( + super::ignore_ws_and_comments(lex_ident)(input), + Ok(( + S!(22, 2, ""), + Token { + kind: TokenKind::Ident, + span: S!(3, 1, "Hi") + } + )) + ) + } + + #[test] + fn fact_with_ws() { + let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); + assert_eq!( + parse_program(input), + vec![ast::Statement::Fact { + atom: Atom::Atom(NamedTuple { + identifier: Token { + kind: TokenKind::Ident, + span: S!(0, 1, "some"), + }, + terms: vec![ + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(5, 1, "Fact") + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(11, 1, "with") + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(17, 1, "whitespace") + }), + ] + }) + }] + ) + } + + #[test] + fn combine_spans() { + use nom::bytes::complete::tag; + let source = "Some Input ;)"; + let input = Span::new(source); + let (input, first) = tag::<&str, Span, nom::error::Error<_>>("Some ")(input).unwrap(); + let (input, second) = tag::<&str, Span, nom::error::Error<_>>("Input")(input).unwrap(); + let span = super::combine_spans(first, second); + assert_eq!(span, Ok(Span::new("Some Input"))) + } + + #[test] + fn combine_spans_error() { + use nom::bytes::complete::tag; + let source = "Some Input ;)"; + let input = Span::new(source); + let (input, first) = tag::<&str, Span, nom::error::Error<_>>("Some")(input).unwrap(); + let (input, _) = tag::<&str, Span, nom::error::Error<_>>(" ")(input).unwrap(); + let (input, second) = tag::<&str, Span, nom::error::Error<_>>("Input")(input).unwrap(); + let span = super::combine_spans(first, second); + assert_eq!(span, Err(str_concat::Error::NotAdjacent)) + } + + #[test] + fn parser_test() { + let str = + std::fs::read_to_string("../testfile.rls").expect("`../testfile.rls` not found"); + let input = Span::new(str.as_str()); + dbg!(parse_program(input)); + // assert!(false); + } } } From 05998a52f09d648f241c6f673493be4e19f811a6 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 09:05:34 +0200 Subject: [PATCH 017/214] Extend Ast nodes and implement AstNode trait --- Cargo.lock | 7 - nemo/Cargo.toml | 1 - nemo/src/io/lexer.rs | 127 ++- nemo/src/io/parser.rs | 1146 ++++++++++++++++++------- nemo/src/io/parser/ast.rs | 549 ++++++------ nemo/src/io/parser/ast/atom.rs | 62 ++ nemo/src/io/parser/ast/directive.rs | 212 +++++ nemo/src/io/parser/ast/map.rs | 72 ++ nemo/src/io/parser/ast/named_tuple.rs | 40 + nemo/src/io/parser/ast/program.rs | 29 + nemo/src/io/parser/ast/statement.rs | 96 +++ nemo/src/io/parser/ast/term.rs | 103 +++ 12 files changed, 1820 insertions(+), 624 deletions(-) create mode 100644 nemo/src/io/parser/ast/atom.rs create mode 100644 nemo/src/io/parser/ast/directive.rs create mode 100644 nemo/src/io/parser/ast/map.rs create mode 100644 nemo/src/io/parser/ast/named_tuple.rs create mode 100644 nemo/src/io/parser/ast/program.rs create mode 100644 nemo/src/io/parser/ast/statement.rs create mode 100644 nemo/src/io/parser/ast/term.rs diff --git a/Cargo.lock b/Cargo.lock index 50e220b21..46134e7f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1183,7 +1183,6 @@ dependencies = [ "sanitise-file-name", "serde", "serde_json", - "str-concat", "test-log", "thiserror", "tokio", @@ -2072,12 +2071,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "str-concat" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3468939e48401c4fe3cdf5e5cef50951c2808ed549d1467fde249f1fcb602634" - [[package]] name = "streaming-iterator" version = "0.1.9" diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 411883468..173a3de22 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -46,7 +46,6 @@ serde_json = "1.0.108" serde = {version = "1.0.138", features = ["derive"] } dyn-clone = "1.0.16" unicode-ident = "1.0.12" -str-concat = "0.2.0" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 216f732bf..521b6b82c 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -5,12 +5,14 @@ use nom::{ bytes::complete::{is_not, tag, take}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, combinator::{all_consuming, map, recognize}, - multi::many0, + multi::{many0, many1}, sequence::{delimited, pair, tuple}, IResult, }; use nom_locate::LocatedSpan; +use super::parser::ast::Position; + pub(crate) type Span<'a> = LocatedSpan<&'a str>; /// All the tokens the input gets parsed into. @@ -86,6 +88,8 @@ pub(crate) enum TokenKind { Comment, /// A comment, starting with `%%` DocComment, + /// A comment, starting with `%!` + TlDocComment, /// ` `, `\t`, `\r` or `\n` Whitespace, /// base directive keyword @@ -98,11 +102,63 @@ pub(crate) enum TokenKind { Export, /// output directive keyword Output, + /// Ident for prefixes + PrefixIdent, /// catch all token Illegal, /// signals end of file Eof, } +impl std::fmt::Display for TokenKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TokenKind::QuestionMark => write!(f, "QuestionMark"), + TokenKind::ExclamationMark => write!(f, "ExclamationMark"), + TokenKind::OpenParen => write!(f, "OpenParen"), + TokenKind::CloseParen => write!(f, "CloseParen"), + TokenKind::OpenBracket => write!(f, "OpenBracket"), + TokenKind::CloseBracket => write!(f, "CloseBracket"), + TokenKind::OpenBrace => write!(f, "OpenBrace"), + TokenKind::CloseBrace => write!(f, "CloseBrace"), + TokenKind::Dot => write!(f, "Dot"), + TokenKind::Comma => write!(f, "Comma"), + TokenKind::Colon => write!(f, "Colon"), + TokenKind::Arrow => write!(f, "Arrow"), + TokenKind::Greater => write!(f, "Greater"), + TokenKind::GreaterEqual => write!(f, "GreaterEqual"), + TokenKind::Equal => write!(f, "Equal"), + TokenKind::LessEqual => write!(f, "LessEqual"), + TokenKind::Less => write!(f, "Less"), + TokenKind::Unequal => write!(f, "Unequal"), + TokenKind::Tilde => write!(f, "Tilde"), + TokenKind::Caret => write!(f, "Caret"), + TokenKind::Hash => write!(f, "Hash"), + TokenKind::Underscore => write!(f, "Underscore"), + TokenKind::At => write!(f, "At"), + TokenKind::Plus => write!(f, "Plus"), + TokenKind::Minus => write!(f, "Minus"), + TokenKind::Star => write!(f, "Star"), + TokenKind::Slash => write!(f, "Slash"), + TokenKind::Ident => write!(f, "Ident"), + TokenKind::Variable => write!(f, "Variable"), + TokenKind::Iri => write!(f, "Iri"), + TokenKind::Number => write!(f, "Number"), + TokenKind::String => write!(f, "String"), + TokenKind::Comment => write!(f, "Comment"), + TokenKind::DocComment => write!(f, "DocComment"), + TokenKind::TlDocComment => write!(f, "TlDocComment"), + TokenKind::Whitespace => write!(f, "Whitespace"), + TokenKind::Base => write!(f, "Base"), + TokenKind::Prefix => write!(f, "Prefix"), + TokenKind::Import => write!(f, "Import"), + TokenKind::Export => write!(f, "Export"), + TokenKind::Output => write!(f, "Output"), + TokenKind::PrefixIdent => write!(f, "PrefixIdent"), + TokenKind::Illegal => write!(f, "Illegal"), + TokenKind::Eof => write!(f, "Eof"), + } + } +} #[derive(Debug, Copy, Clone, PartialEq)] pub(crate) struct Token<'a> { @@ -110,10 +166,43 @@ pub(crate) struct Token<'a> { pub(crate) span: Span<'a>, } impl<'a> Token<'a> { - fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { + pub(crate) fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { Token { kind, span } } } +impl std::fmt::Display for Token<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let offset = self.span.location_offset(); + let line = self.span.location_line(); + let fragment = self.span.fragment(); + if self.span.extra == () { + write!(f, "T!{{{0}, S!({offset}, {line}, {fragment})}}", self.kind) + } else { + write!( + f, + "T!{{{0}, S!({offset}, {line}, {fragment}, {1:?})}}", + self.kind, self.span.extra + ) + } + } +} +impl<'a> crate::io::parser::ast::AstNode for Token<'a> { + fn children(&self) -> Option> { + None::> + } + + fn span(&self) -> Span { + self.span + } + + // fn position(&self) -> Position { + // Position { + // offset: self.span.location_offset(), + // line: self.span.location_line(), + // column: self.span.get_column() as u32, + // } + // } +} macro_rules! syntax { ($func_name: ident, $tag_string: literal, $token: expr) => { @@ -220,15 +309,24 @@ pub(crate) fn lex_string(input: Span) -> IResult { } pub(crate) fn lex_comment(input: Span) -> IResult { - recognize(tuple((tag("%"), many0(is_not("\r\n")), line_ending)))(input) + recognize(tuple((tag("%"), many0(is_not("\n")), line_ending)))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result))) } pub(crate) fn lex_doc_comment(input: Span) -> IResult { - recognize(tuple((tag("%%"), many0(is_not("\r\n")), line_ending)))(input) + recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result))) } +pub(crate) fn lex_toplevel_doc_comment(input: Span) -> IResult { + recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending))))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result))) +} + +pub(crate) fn lex_comments(input: Span) -> IResult { + alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment))(input) +} + pub(crate) fn lex_whitespace(input: Span) -> IResult { multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result))) } @@ -245,7 +343,7 @@ pub(crate) fn lex_tokens(input: Span) -> IResult> { lex_ident, lex_number, lex_string, - lex_comment, + lex_comments, lex_whitespace, lex_illegal, ))))(input) @@ -398,12 +496,13 @@ mod test { #[test] fn comment() { - let input = Span::new("% Some Comment\n"); + let input = Span::new(" % Some Comment\n"); assert_eq!( lex_tokens(input).unwrap().1, vec![ - T!(Comment, 0, 1, "% Some Comment\n"), - T!(Eof, 15, 2, ""), + T!(Whitespace, 0, 1, " "), + T!(Comment, 4, 1, "% Some Comment\n"), + T!(Eof, 19, 2, ""), // T!(Comment, Span::new(0, 1, "% Some Comment\n")), // T!(Eof, Span::new(15, 2, "")) ] @@ -544,4 +643,16 @@ mod test { ] ) } + + #[test] + fn whitespace() { + let input = Span::new(" \t \n\n\t \n"); + assert_eq!( + lex_tokens(input).unwrap().1, + vec![ + T!(Whitespace, 0, 1, " \t \n\n\t \n"), + T!(Eof, 12, 4, ""), + ] + ) + } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 67121271b..b29afe701 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -22,7 +22,7 @@ use nom::{ use macros::traced; -mod ast; +pub(crate) mod ast; mod types; use types::{ConstraintOperator, IntermediateResult, Span}; @@ -2430,41 +2430,36 @@ mod test { /// NEW PARSER mod new { - use std::collections::{BTreeMap, HashMap}; - use std::ffi::c_ushort; - - use super::ast::{self, NamedTuple, Pair, Term}; - use super::types::Tokens; + use super::ast::{ + atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, List, + }; use crate::io::lexer::{ - self, close_brace, comma, equal, greater, greater_equal, less, less_equal, lex_comment, - lex_doc_comment, lex_ident, lex_iri, lex_number, lex_string, lex_whitespace, open_brace, - unequal, Span, Token, TokenKind, + arrow, at, close_brace, close_paren, colon, comma, dot, equal, greater, greater_equal, + less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, + lex_operators, lex_string, lex_toplevel_doc_comment, lex_whitespace, open_brace, + open_paren, question_mark, tilde, unequal, Span, Token, TokenKind, }; - use nom::combinator::{all_consuming, opt}; - use nom::error::ParseError; - use nom::multi::{many0, separated_list1}; + use nom::combinator::{all_consuming, opt, recognize}; use nom::sequence::{delimited, pair}; use nom::Parser; use nom::{ branch::alt, - bytes::complete::take, combinator::verify, - multi::{many1, separated_list0}, + multi::{many0, many1, separated_list0}, sequence::tuple, IResult, }; - fn combine_spans<'a>(span1: Span<'a>, span2: Span<'a>) -> Result, str_concat::Error> { - // SAFETY: The concatenation of strings is okay, because they originated from the same source string. - // The raw offset is okay, because the offset of another span is used. + fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { unsafe { - let fragment = str_concat::concat(span1.fragment(), span2.fragment())?; - Ok(Span::new_from_raw_offset( - span1.location_offset(), - span1.location_line(), - fragment, + let span = Span::new_from_raw_offset( + input.location_offset(), + input.location_line(), + &input[..(rest_input.location_offset() - input.location_offset())], (), - )) + ); + // dbg!(&input, &span, &rest_input); + span } } @@ -2473,264 +2468,575 @@ mod new { ) -> impl FnMut(Span<'a>) -> IResult, O, nom::error::Error>> where F: Parser, O, nom::error::Error>> - + std::ops::FnMut(Span<'a>) -> IResult, O, nom::error::Error>>, + + FnMut(Span<'a>) -> IResult, O, nom::error::Error>>, { delimited( - many0(alt((lex_whitespace, lex_comment, lex_doc_comment))), + many0(alt((lex_whitespace, lex_comment))), inner, - many0(alt((lex_whitespace, lex_comment, lex_doc_comment))), + many0(alt((lex_whitespace, lex_comment))), ) } - fn parse_program<'a>(input: Span<'a>) -> ast::Program<'a> { - let (_, statements) = all_consuming(many1(ignore_ws_and_comments(alt(( - parse_fact, - parse_rule, - parse_directive, - )))))(input) - .unwrap(); - // many0(parse_fact)(input).unwrap(); - let mut program = ast::Program::new(); - for statement in statements { - program.push(statement) + fn parse_program<'a>(input: Span<'a>) -> Program<'a> { + let span = input.clone(); + let (_, (tl_doc_comment, statements)) = all_consuming(pair( + opt(lex_toplevel_doc_comment), + many1(alt(( + parse_fact, + parse_rule, + parse_whitespace, + parse_directive, + parse_comment, + ))), + ))(input) + .expect("Expect EOF"); + Program { + span, + tl_doc_comment, + statements, } - program } - fn parse_fact<'a>(input: Span<'a>) -> IResult> { + fn parse_whitespace<'a>(input: Span<'a>) -> IResult> { + lex_whitespace(input).map(|(rest, ws)| (rest, Statement::Whitespace(ws))) + } + + fn parse_comment<'a>(input: Span<'a>) -> IResult> { + lex_comment(input).map(|(rest, comment)| (rest, Statement::Comment(comment))) + } + + fn parse_fact<'a>(input: Span<'a>) -> IResult> { + let input_span = input; tuple(( - ignore_ws_and_comments(parse_named_tuple), - ignore_ws_and_comments(lexer::dot), + opt(lex_doc_comment), + parse_normal_atom, + opt(lex_whitespace), + dot, ))(input) - .map(|(rest, (atom, _))| { + .map(|(rest_input, (doc_comment, atom, ws, dot))| { ( - rest, - ast::Statement::Fact { - atom: ast::Atom::Atom(atom), + rest_input, + Statement::Fact { + span: outer_span(input_span, rest_input), + doc_comment, + atom, + ws, + dot, }, ) }) } - fn parse_rule<'a>(input: Span<'a>) -> IResult> { + fn parse_rule<'a>(input: Span<'a>) -> IResult> { + let input_span = input; tuple(( - ignore_ws_and_comments(separated_list1( - lexer::comma, - ignore_ws_and_comments(parse_named_tuple), - )), - ignore_ws_and_comments(lexer::arrow), - ignore_ws_and_comments(separated_list1( - lexer::comma, - ignore_ws_and_comments(pair(opt(lexer::tilde), parse_named_tuple)), + opt(lex_doc_comment), + parse_head, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_body, + opt(lex_whitespace), + dot, + ))(input) + .map( + |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { + ( + rest_input, + Statement::Rule { + span: outer_span(input_span, rest_input), + doc_comment, + head, + ws1, + arrow, + ws2, + body, + ws3, + dot, + }, + ) + }, + ) + } + + fn parse_head<'a>(input: Span<'a>) -> IResult>> { + parse_atom_list(input, parse_head_atoms) + } + + fn parse_body<'a>(input: Span<'a>) -> IResult>> { + parse_atom_list(input, parse_body_atoms) + } + + fn parse_directive<'a>(input: Span<'a>) -> IResult> { + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, + ))(input) + .map(|(rest, directive)| (rest, Statement::Directive(directive))) + } + + fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Base), )), - ignore_ws_and_comments(lexer::dot), + opt(lex_whitespace), + lex_iri, + opt(lex_whitespace), + dot, ))(input) - .map(|(rest, (head, _, body, _))| { + .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { ( - rest, - ast::Statement::Rule { - head: head.iter().map(|x| ast::Atom::Atom(x.clone())).collect(), - body: body - .iter() - .map(|(tilde, atom)| { - if let None = tilde { - ast::Atom::Atom(atom.clone()) - } else { - ast::Atom::NegativeAtom(atom.clone()) - } - }) - .collect(), + rest_input, + Directive::Base { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Base, + span: kw, + }, + ws1, + base_iri, + ws2, + dot, }, ) }) } - fn parse_directive<'a>(input: Span<'a>) -> IResult> { - alt(( - ignore_ws_and_comments(parse_base_directive), - ignore_ws_and_comments(parse_prefix_directive), - ignore_ws_and_comments(parse_import_directive), - ignore_ws_and_comments(parse_export_directive), - ignore_ws_and_comments(parse_output_directive), + fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), + )), + opt(lex_whitespace), + recognize(pair(lex_ident, colon)), + opt(lex_whitespace), + lex_iri, + opt(lex_whitespace), + dot, + ))(input) + .map( + |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { + ( + rest_input, + Directive::Prefix { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Prefix, + span: kw, + }, + ws1, + prefix: Token { + kind: TokenKind::Ident, + span: prefix, + }, + ws2, + prefix_iri, + ws3, + dot, + }, + ) + }, + ) + } + + fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Import), + )), + lex_whitespace, + lex_ident, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + dot, ))(input) - .map(|(rest, directive)| (rest, ast::Statement::Directive(directive))) + .map( + |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { + ( + rest_input, + Directive::Import { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Import, + span: kw, + }, + ws1, + predicate, + ws2, + arrow, + ws3, + map, + ws4, + dot, + }, + ) + }, + ) } - fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { + fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Base), - ignore_ws_and_comments(lex_iri), - ignore_ws_and_comments(lexer::dot), + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Export), + )), + lex_whitespace, + lex_ident, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + dot, ))(input) - .map(|(rest, (_, kw, base_iri, _))| (rest, ast::Directive::Base { kw, base_iri })) + .map( + |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { + ( + rest_input, + Directive::Export { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Export, + span: kw, + }, + ws1, + predicate, + ws2, + arrow, + ws3, + map, + ws4, + dot, + }, + ) + }, + ) } - fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { + fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), - ignore_ws_and_comments(lex_ident), - ignore_ws_and_comments(lexer::colon), - ignore_ws_and_comments(lex_iri), - ignore_ws_and_comments(lexer::dot), + opt(lex_doc_comment), + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + ignore_ws_and_comments(separated_list0(comma, ignore_ws_and_comments(lex_ident))), + ignore_ws_and_comments(dot), ))(input) - .map(|(rest, (_, kw, prefix, _, prefix_iri, _))| { + .map(|(rest_input, (doc_comment, _, kw, predicates, _))| { ( - rest, - ast::Directive::Prefix { + rest_input, + Directive::Output { + span: outer_span(input_span, rest_input), + doc_comment, kw, - prefix, - prefix_iri, + predicates, }, ) }) } - fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { - tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Import), - ignore_ws_and_comments(lex_ident), - ignore_ws_and_comments(lexer::arrow), - ignore_ws_and_comments(parse_map), - ignore_ws_and_comments(lexer::dot), - ))(input) - .map(|(rest, (_, kw, predicate, _, map, _))| { - (rest, ast::Directive::Import { kw, predicate, map }) + fn parse_atom_list<'a>( + input: Span<'a>, + parse_atom: fn(Span<'a>) -> IResult>, + ) -> IResult>> { + let input_span = input.clone(); + pair( + parse_atom, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_atom, + ))), + )(input) + .map(|(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input_span, rest_input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) }) } - fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { - tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Export), - ignore_ws_and_comments(lex_ident), - ignore_ws_and_comments(lexer::arrow), - ignore_ws_and_comments(parse_map), - ignore_ws_and_comments(lexer::dot), - ))(input) - .map(|(rest, (_, kw, predicate, _, map, _))| { - (rest, ast::Directive::Export { kw, predicate, map }) - }) + fn parse_head_atoms<'a>(input: Span<'a>) -> IResult> { + alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) } - fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { - tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Output), - ignore_ws_and_comments(separated_list0( - lexer::comma, - ignore_ws_and_comments(lex_ident), - )), - ignore_ws_and_comments(lexer::dot), + fn parse_body_atoms<'a>(input: Span<'a>) -> IResult> { + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, ))(input) - .map(|(rest, (_, kw, predicates, _))| (rest, ast::Directive::Output { kw, predicates })) } - fn parse_atom<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_atom`!") + fn parse_normal_atom<'a>(input: Span<'a>) -> IResult> { + parse_named_tuple(input) + .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) + } + + fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { + ( + rest_input, + Atom::Negative { + span: outer_span(input_span, rest_input), + neg: tilde, + atom: named_tuple, + }, + ) + }) } - fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_negative_atom`!") + fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + parse_term, + opt(lex_whitespace), + parse_operation_token, + opt(lex_whitespace), + parse_term, + ))(input) + .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { + ( + rest_input, + Atom::InfixAtom { + span: outer_span(input_span, rest_input), + lhs, + ws1, + operation, + ws2, + rhs, + }, + ) + }) } - fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { - tuple((parse_term, parse_operation_token, parse_term))(input).map( - |(rest, (lhs, operation, rhs))| { + fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + lex_ident, + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_term_list), + opt(lex_whitespace), + close_paren, + ))(input) + .map( + |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( - rest, - ast::Atom::InfixAtom { - operation, - lhs, - rhs, + rest_input, + NamedTuple { + span: outer_span(input_span, rest_input), + identifier, + ws1, + open_paren, + ws2, + terms, + ws3, + close_paren, }, ) }, ) } - fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + fn parse_map<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); tuple(( - lex_ident, - lexer::open_paren, - // ignore_ws_and_comments(separated_list0(lexer::comma, parse_term)), - ignore_ws_and_comments(separated_list0(comma, ignore_ws_and_comments(parse_term))), - ignore_ws_and_comments(lexer::close_paren), + opt(lex_ident), + opt(lex_whitespace), + open_brace, + opt(lex_whitespace), + parse_pair_list, + opt(lex_whitespace), + close_brace, ))(input) - .map(|(rest, (identifier, _, terms, _))| (rest, NamedTuple { identifier, terms })) + .map( + |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { + ( + rest_input, + Map { + span: outer_span(input_span, rest_input), + identifier, + ws1, + open_brace, + ws2, + pairs, + ws3, + close_brace, + }, + ) + }, + ) } - fn parse_map<'a>(input: Span<'a>) -> IResult> { - tuple(( - opt(lex_ident), - ignore_ws_and_comments(open_brace), - separated_list0( - ignore_ws_and_comments(comma), - ignore_ws_and_comments(tuple((parse_term, equal, parse_term))), - ), - ignore_ws_and_comments(close_brace), + fn parse_map_atom<'a>(input: Span<'a>) -> IResult> { + parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) + } + + fn parse_pair_list<'a>( + input: Span<'a>, + ) -> IResult, Term<'a>>>>> { + let input_span = input.clone(); + opt(pair( + parse_pair, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_pair, + ))), ))(input) - .map(|(rest, (identifier, _, vec_of_pairs, _))| { - let mut pairs = Vec::new(); - for (key, _, value) in vec_of_pairs { - pairs.push(Pair::new(key, value)); + .map(|(rest_input, pair_list)| { + if let Some((first, rest)) = pair_list { + ( + rest_input, + Some(List { + span: outer_span(input_span, rest_input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }), + ) + } else { + (rest_input, None) } - (rest, ast::Map { identifier, pairs }) }) } - fn parse_term<'a>(input: Span<'a>) -> IResult> { - // alt(( - // parse_primitive_term, - // parse_unary_term, - // parse_binary_term, - // parse_aggregation_term, - // parse_function_term, - // parse_map_term, - // ))(input) - ignore_ws_and_comments(alt((parse_primitive_term, parse_variable)))(input) + fn parse_pair<'a>(input: Span<'a>) -> IResult, Term<'a>>> { + let input_span = input.clone(); + tuple(( + parse_term, + opt(lex_whitespace), + equal, + opt(lex_whitespace), + parse_term, + ))(input) + .map(|(rest_input, (key, ws1, equal, ws2, value))| { + ( + rest_input, + Pair { + span: outer_span(input_span, rest_input), + key, + ws1, + equal, + ws2, + value, + }, + ) + }) } - fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { + fn parse_term_list<'a>(input: Span<'a>) -> IResult>> { + let input_span = input.clone(); + pair( + parse_term, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_term, + ))), + )(input) + .map(|(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input_span, rest_input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) + }) + } + + fn parse_term<'a>(input: Span<'a>) -> IResult> { + alt(( + parse_primitive_term, + parse_variable, + parse_unary_term, + // parse_binary_term, + // parse_aggregation_term, + parse_function_term, + parse_map_term, + ))(input) + } + + fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { alt((lex_ident, lex_iri, lex_number, lex_string))(input) - .map(|(rest, term)| (rest, ast::Term::Primitive(term))) + .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } - fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_unary_term`!") + fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + pair(lex_operators, parse_term)(input).map(|(rest_input, (operation, term))| { + ( + rest_input, + Term::Unary { + span: outer_span(input_span, rest_input), + operation, + term: Box::new(term), + }, + ) + }) } - fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { + fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { todo!("`parse_binary_term`!") } - fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { + fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { todo!("`parse_aggregation_term`!") } - fn parse_function_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_function_term`!") + fn parse_function_term<'a>(input: Span<'a>) -> IResult> { + parse_named_tuple(input) + .map(|(rest_input, named_tuple)| (rest_input, Term::Function(Box::new(named_tuple)))) } - fn parse_map_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_map_term`!") + fn parse_map_term<'a>(input: Span<'a>) -> IResult> { + parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } - fn parse_variable<'a>(input: Span<'a>) -> IResult> { - ignore_ws_and_comments(pair(lexer::question_mark, lex_ident))(input).map( - |(rest, (question_mark, ident))| { - ( - rest, - ast::Term::Variable(Token { - kind: TokenKind::Variable, - span: combine_spans(question_mark.span, ident.span) - .expect("Spans were not adjacent in memory"), - }), - ) - }, - ) + fn parse_variable<'a>(input: Span<'a>) -> IResult> { + recognize(pair(question_mark, lex_ident))(input).map(|(rest, var)| { + ( + rest, + Term::Variable(Token { + kind: TokenKind::Variable, + span: var, + }), + ) + }) } fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { @@ -2740,7 +3046,13 @@ mod new { #[cfg(test)] mod test { use super::*; - use crate::io::{lexer::*, parser::ast::*}; + use crate::io::{ + lexer::*, + parser::ast::*, + // parser::ast::{ + // atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, + // }, + }; macro_rules! S { ($offset:literal,$line:literal,$str:literal) => { @@ -2756,24 +3068,56 @@ mod new { let input = Span::new("a(B,C)."); assert_eq!( parse_program(input), - vec![ast::Statement::Fact { - atom: ast::Atom::Atom(NamedTuple { - identifier: Token { - kind: TokenKind::Ident, - span: S!(0, 1, "a"), - }, - terms: vec![ - Term::Primitive(Token { + Program { + span: input, + tl_doc_comment: None, + statements: vec![Statement::Fact { + span: S!(0, 1, "a(B,C)."), + doc_comment: None, + atom: Atom::Positive(NamedTuple { + span: S!(0, 1, "a(B,C)"), + identifier: Token { kind: TokenKind::Ident, - span: S!(2, 1, "B"), - }), - Term::Primitive(Token { - kind: TokenKind::Ident, - span: S!(4, 1, "C"), + span: S!(0, 1, "a"), + }, + ws1: None, + open_paren: Token { + kind: TokenKind::OpenParen, + span: S!(1, 1, "("), + }, + ws2: None, + terms: Some(List { + span: S!(2, 1, "B,C"), + first: Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(2, 1, "B"), + }), + rest: Some(vec![( + None, + Token { + kind: TokenKind::Comma, + span: S!(3, 1, ",") + }, + None, + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(4, 1, "C"), + }), + )]), }), - ], - }), - }] + ws3: None, + close_paren: Token { + kind: TokenKind::CloseParen, + span: S!(5, 1, ")"), + }, + }), + ws: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(6, 1, ".") + } + }], + } ) } @@ -2784,85 +3128,191 @@ mod new { ); assert_eq!( parse_program(input), - vec![ - ast::Statement::Directive(Directive::Base { - kw: Token { - kind: TokenKind::Base, - span: S!(1, 1, "base"), - }, - base_iri: Token { - kind: TokenKind::Iri, - span: S!(6, 1, "") - } - }), - ast::Statement::Directive(Directive::Prefix { - kw: Token { - kind: TokenKind::Prefix, - span: S!(33, 1, "prefix"), - }, - prefix: Token { - kind: TokenKind::Ident, - span: S!(40, 1, "rdfs"), - }, - prefix_iri: Token { - kind: TokenKind::Iri, - span: S!(45, 1, ""), - }, - }), - ast::Statement::Directive(Directive::Import { - kw: Token { - kind: TokenKind::Import, - span: S!(86, 1, "import"), - }, - predicate: Token { - kind: TokenKind::Ident, - span: S!(93, 1, "sourceA"), - }, - map: Map { - identifier: Some(Token { - kind: TokenKind::Ident, - span: S!(102, 1, "csv") + Program { + tl_doc_comment: None, + span: input, + statements: vec![ + Statement::Directive(Directive::Base { + span: S!(0, 1, "@base ."), + doc_comment: None, + kw: Token { + kind: TokenKind::Base, + span: S!(0, 1, "@base"), + }, + ws1: Some(Token { + kind: TokenKind::Whitespace, + span: S!(5, 1, " ") + }), + base_iri: Token { + kind: TokenKind::Iri, + span: S!(6, 1, "") + }, + ws2: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(31, 1, ".") + }, + }), + Statement::Directive(Directive::Prefix { + span: S!( + 32, + 1, + "@prefix rdfs:." + ), + doc_comment: None, + kw: Token { + kind: TokenKind::Prefix, + span: S!(32, 1, "@prefix"), + }, + ws1: Some(Token { + kind: TokenKind::Whitespace, + span: S!(39, 1, " ") }), - pairs: vec![Pair { - key: Term::Primitive(Token { + prefix: Token { + kind: TokenKind::Ident, + span: S!(40, 1, "rdfs:"), + }, + ws2: None, + prefix_iri: Token { + kind: TokenKind::Iri, + span: S!(45, 1, ""), + }, + ws3: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(84, 1, ".") + } + }), + Statement::Directive(Directive::Import { + span: S!( + 85, + 1, + r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# + ), + doc_comment: None, + kw: Token { + kind: TokenKind::Import, + span: S!(85, 1, "@import"), + }, + ws1: Token { + kind: TokenKind::Whitespace, + span: S!(92, 1, " "), + }, + predicate: Token { + kind: TokenKind::Ident, + span: S!(93, 1, "sourceA"), + }, + ws2: None, + arrow: Token { + kind: TokenKind::Arrow, + span: S!(100, 1, ":-"), + }, + ws3: None, + map: Map { + span: S!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), + identifier: Some(Token { kind: TokenKind::Ident, - span: S!(106, 1, "resource"), + span: S!(102, 1, "csv") }), - value: Term::Primitive(Token { - kind: TokenKind::String, - span: S!(115, 1, "\"sources/dataA.csv\""), - }) - }], - }, - }), - ast::Statement::Directive(Directive::Export { - kw: Token { - kind: TokenKind::Export, - span: S!(137, 1, "export"), - }, - predicate: Token { - kind: TokenKind::Ident, - span: S!(144, 1, "a"), - }, - map: Map { - identifier: Some(Token { + ws1: None, + open_brace: Token { + kind: TokenKind::OpenBrace, + span: S!(105, 1, "{") + }, + ws2: None, + pairs: Some(List { + span: S!(106, 1, "resource=\"sources/dataA.csv\""), + first: Pair { + span: S!(106, 1, "resource=\"sources/dataA.csv\""), + key: Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(106, 1, "resource"), + }), + ws1: None, + equal: Token { + kind: TokenKind::Equal, + span: S!(114, 1, "="), + }, + ws2: None, + value: Term::Primitive(Token { + kind: TokenKind::String, + span: S!(115, 1, "\"sources/dataA.csv\""), + }) + }, + rest: None, + }), + ws3: None, + close_brace: Token { + kind: TokenKind::CloseBrace, + span: S!(134, 1, "}") + }, + }, + ws4: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(135, 1, ".") + } + }), + Statement::Directive(Directive::Export { + span: S!(136, 1, "@export a:-csv{}."), + doc_comment: None, + kw: Token { + kind: TokenKind::Export, + span: S!(136, 1, "@export"), + }, + ws1: Token { + kind: TokenKind::Whitespace, + span: S!(143, 1, " "), + }, + predicate: Token { kind: TokenKind::Ident, - span: S!(147, 1, "csv"), - }), - pairs: vec![] - } - }), - ast::Statement::Directive(Directive::Output { - kw: Token { - kind: TokenKind::Output, - span: S!(154, 1, "output") - }, - predicates: vec![Token { - kind: TokenKind::Ident, - span: S!(161, 1, "a") - }] - }), - ] + span: S!(144, 1, "a"), + }, + ws2: None, + arrow: Token { + kind: TokenKind::Arrow, + span: S!(145, 1, ":-"), + }, + ws3: None, + map: Map { + span: S!(147, 1, "csv{}"), + identifier: Some(Token { + kind: TokenKind::Ident, + span: S!(147, 1, "csv"), + }), + ws1: None, + open_brace: Token { + kind: TokenKind::OpenBrace, + span: S!(150, 1, "{"), + }, + ws2: None, + pairs: None, + ws3: None, + close_brace: Token { + kind: TokenKind::CloseBrace, + span: S!(151, 1, "}"), + }, + }, + ws4: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(152, 1, "."), + }, + }), + Statement::Directive(Directive::Output { + span: S!(153, 1, "@output a."), + doc_comment: None, + kw: Token { + kind: TokenKind::Output, + span: S!(154, 1, "output") + }, + predicates: vec![Token { + kind: TokenKind::Ident, + span: S!(161, 1, "a") + }], + }), + ], + } ) } @@ -2886,54 +3336,92 @@ mod new { let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); assert_eq!( parse_program(input), - vec![ast::Statement::Fact { - atom: Atom::Atom(NamedTuple { - identifier: Token { - kind: TokenKind::Ident, - span: S!(0, 1, "some"), - }, - terms: vec![ - Term::Primitive(Token { - kind: TokenKind::Ident, - span: S!(5, 1, "Fact") - }), - Term::Primitive(Token { - kind: TokenKind::Ident, - span: S!(11, 1, "with") + Program { + span: input, + tl_doc_comment: None, + statements: vec![ + Statement::Fact { + span: S!(0, 1, "some(Fact, with, whitespace) ."), + doc_comment: None, + atom: Atom::Positive(NamedTuple { + span: S!(0, 1, "some(Fact, with, whitespace)"), + identifier: Token { + kind: TokenKind::Ident, + span: S!(0, 1, "some"), + }, + ws1: None, + open_paren: Token { + kind: TokenKind::OpenParen, + span: S!(4, 1, "(") + }, + ws2: None, + terms: Some(List { + span: S!(5, 1, "Fact, with, whitespace"), + first: Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(5, 1, "Fact"), + }), + rest: Some(vec![ + ( + None, + Token { + kind: TokenKind::Comma, + span: S!(9, 1, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: S!(10, 1, " "), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(11, 1, "with") + }), + ), + ( + None, + Token { + kind: TokenKind::Comma, + span: S!(15, 1, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: S!(16, 1, " "), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(17, 1, "whitespace") + }), + ), + ]), + }), + ws3: None, + close_paren: Token { + kind: TokenKind::CloseParen, + span: S!(27, 1, ")") + }, }), - Term::Primitive(Token { - kind: TokenKind::Ident, - span: S!(17, 1, "whitespace") + ws: Some(Token { + kind: TokenKind::Whitespace, + span: S!(28, 1, " "), }), - ] - }) - }] + dot: Token { + kind: TokenKind::Dot, + span: S!(29, 1, "."), + }, + }, + Statement::Whitespace(Token { + kind: TokenKind::Whitespace, + span: S!(30, 1, " ") + }), + Statement::Comment(Token { + kind: TokenKind::Comment, + span: S!(31, 1, "% and a super useful comment\n") + }) + ], + } ) } - #[test] - fn combine_spans() { - use nom::bytes::complete::tag; - let source = "Some Input ;)"; - let input = Span::new(source); - let (input, first) = tag::<&str, Span, nom::error::Error<_>>("Some ")(input).unwrap(); - let (input, second) = tag::<&str, Span, nom::error::Error<_>>("Input")(input).unwrap(); - let span = super::combine_spans(first, second); - assert_eq!(span, Ok(Span::new("Some Input"))) - } - - #[test] - fn combine_spans_error() { - use nom::bytes::complete::tag; - let source = "Some Input ;)"; - let input = Span::new(source); - let (input, first) = tag::<&str, Span, nom::error::Error<_>>("Some")(input).unwrap(); - let (input, _) = tag::<&str, Span, nom::error::Error<_>>(" ")(input).unwrap(); - let (input, second) = tag::<&str, Span, nom::error::Error<_>>("Input")(input).unwrap(); - let span = super::combine_spans(first, second); - assert_eq!(span, Err(str_concat::Error::NotAdjacent)) - } - #[test] fn parser_test() { let str = diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 7a2a2eebd..03c5e6ece 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,301 +1,292 @@ -use std::collections::BTreeMap; +use crate::io::lexer::{Span, Token}; +use std::fmt::Display; -use crate::io::lexer::Token; +pub(crate) mod atom; +pub(crate) mod directive; +pub(crate) mod map; +pub(crate) mod named_tuple; +pub(crate) mod program; +pub(crate) mod statement; +pub(crate) mod term; -struct Position { - offset: usize, - line: u32, - column: u32, -} - -pub(crate) type Program<'a> = Vec>; - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Statement<'a> { - Directive(Directive<'a>), - Fact { - atom: Atom<'a>, - }, - Rule { - head: Vec>, - body: Vec>, - }, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Directive<'a> { - Base { - kw: Token<'a>, - base_iri: Token<'a>, - }, - Prefix { - kw: Token<'a>, - prefix: Token<'a>, - prefix_iri: Token<'a>, - }, - Import { - kw: Token<'a>, - predicate: Token<'a>, - map: Map<'a>, - }, - Export { - kw: Token<'a>, - predicate: Token<'a>, - map: Map<'a>, - }, - // maybe will be deprecated - Output { - kw: Token<'a>, - predicates: Vec>, - }, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Atom<'a> { - Atom(NamedTuple<'a>), - NegativeAtom(NamedTuple<'a>), - InfixAtom { - operation: Token<'a>, - lhs: Term<'a>, - rhs: Term<'a>, - }, - Map(Map<'a>), -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Term<'a> { - Primitive(Token<'a>), - Variable(Token<'a>), - Unary { - operation: Token<'a>, - term: Box>, - }, - Binary { - operation: Token<'a>, - lhs: Box>, - rhs: Box>, - }, - Aggregation { - operation: Token<'a>, - terms: Vec>, - }, - Function(NamedTuple<'a>), - Map(Map<'a>), -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) struct NamedTuple<'a> { - pub(crate) identifier: Token<'a>, - pub(crate) terms: Vec>, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) struct Map<'a> { - pub(crate) identifier: Option>, - pub(crate) pairs: Vec, Term<'a>>>, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) struct Pair { - pub(crate) key: K, - pub(crate) value: V, -} -impl Pair { - pub fn new(key: K, value: V) -> Pair { - Pair { key, value } - } +pub(crate) trait AstNode: std::fmt::Debug { + fn children(&self) -> Option>; + fn span(&self) -> Span; + // fn position(&self) -> Position; } -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Node<'a> { - Statement(&'a Statement<'a>), - Directive(&'a Directive<'a>), - RuleHead(&'a Vec>), - RuleBody(&'a Vec>), - Atom(&'a Atom<'a>), - Term(&'a Term<'a>), - Terms(&'a Vec>), - Map(&'a Map<'a>), - KeyWord(&'a Token<'a>), - BaseIri(&'a Token<'a>), - Prefix(&'a Token<'a>), - PrefixIri(&'a Token<'a>), - Predicate(&'a Token<'a>), - Predicates(&'a Vec>), - Operation(&'a Token<'a>), - Lhs(&'a Term<'a>), - Rhs(&'a Term<'a>), - Identifier(&'a Token<'a>), - Pairs(&'a Vec, Term<'a>>>), - MapIdentifier(&'a Option>), - Primitive(&'a Token<'a>), - Variable(&'a Token<'a>), +pub(crate) struct Position { + pub(crate) offset: usize, + pub(crate) line: u32, + pub(crate) column: u32, } -trait AstNode { - fn children(&self) -> Vec; - // fn position(&self) -> Position; +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct List<'a, T> { + pub(crate) span: Span<'a>, + pub(crate) first: T, + // ([ws]?[,][ws]?[T])* + pub(crate) rest: Option>, Token<'a>, Option>, T)>>, } - -impl<'a> AstNode for Program<'a> { - fn children(&self) -> Vec { +impl AstNode for List<'_, T> { + fn children(&self) -> Option> { let mut vec = Vec::new(); - for statement in self { - vec.push(Node::Statement(statement)) - } - vec - } - - // fn position(&self) -> Position { - // let first = self.get(0); - // match first { - // Some(elem) => { - // let span; - // match elem { - // Statement::Directive(directive) => match directive { - // Directive::Base { kw, base_iri } => span = kw.span, - // Directive::Prefix { - // kw, - // prefix, - // prefix_iri, - // } => span = kw.span, - // Directive::Import { kw, predicate, map } => span = kw.span, - // Directive::Export { kw, predicate, map } => span = kw.span, - // Directive::Output { kw, predicates } => span = kw.span, - // }, - // Statement::Fact { atom } => match atom { - // Atom::Atom { predicate, terms } => todo!(), - // Atom::InfixAtom { operation, lhs, rhs } => todo!(), - // Atom::Map(_) => todo!(), - // }, - // Statement::Rule { head, body } => todo!(), - // }; - // } - // None => Position { - // offset: 0, - // line: 1, - // column: 0, - // }, - // } - // } -} - -impl<'a> AstNode for Statement<'a> { - fn children(&self) -> Vec { - match self { - Statement::Directive(directive) => directive.children(), - Statement::Fact { atom } => vec![Node::Atom(atom)], - Statement::Rule { head, body } => { - vec![Node::RuleHead(head), Node::RuleBody(body)] + vec.push(&self.first as &dyn AstNode); + if let Some(rest) = &self.rest { + for (ws1, delim, ws2, item) in rest { + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(delim); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(item); } - } + }; + Some(vec) } - // fn position(&self) -> Position { - // todo!() - // } -} - -impl<'a> AstNode for Directive<'a> { - fn children(&self) -> Vec { - match self { - Directive::Base { kw, base_iri } => { - vec![Node::KeyWord(kw), Node::BaseIri(base_iri)] - } - Directive::Prefix { - kw, - prefix, - prefix_iri, - } => vec![ - Node::KeyWord(kw), - Node::Prefix(prefix), - Node::PrefixIri(prefix_iri), - ], - Directive::Import { kw, predicate, map } => vec![ - Node::KeyWord(kw), - Node::Predicate(predicate), - Node::Map(map), - ], - Directive::Export { kw, predicate, map } => vec![ - Node::KeyWord(kw), - Node::Predicate(predicate), - Node::Map(map), - ], - Directive::Output { kw, predicates } => { - vec![Node::KeyWord(kw), Node::Predicates(predicates)] - } - } + fn span(&self) -> Span { + self.span } - - // fn position(&self) -> Position { - // todo!() - // } } -impl<'a> AstNode for Atom<'a> { - fn children(&self) -> Vec { - match self { - Atom::Atom(named_tuple) => { - vec![ - Node::Identifier(&named_tuple.identifier), - Node::Terms(&named_tuple.terms), - ] - } - Atom::NegativeAtom(named_tuple) => { - vec![ - Node::Identifier(&named_tuple.identifier), - Node::Terms(&named_tuple.terms), - ] - } - Atom::InfixAtom { - operation, - lhs, - rhs, - } => vec![Node::Operation(operation), Node::Lhs(lhs), Node::Rhs(rhs)], - Atom::Map(map) => map.children(), +fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { + let mut vec = Vec::new(); + if let Some(children) = node.children() { + for child in children { + vec.append(&mut get_all_tokens(child)); } - } - - // fn position(&self) -> Position { - // todo!() - // } + } else { + vec.push(node); + }; + vec } -impl<'a> AstNode for Term<'a> { - fn children(&self) -> Vec { - match self { - Term::Primitive(primitive) => vec![Node::Primitive(primitive)], - Term::Variable(var) => vec![Node::Variable(var)], - Term::Binary { - operation, - lhs, - rhs, - } => vec![Node::Operation(operation), Node::Lhs(lhs), Node::Rhs(rhs)], - Term::Unary { operation, term } => vec![Node::Operation(operation), Node::Term(term)], - Term::Aggregation { operation, terms } => { - vec![Node::Operation(operation), Node::Terms(terms)] - } - Term::Function(NamedTuple { identifier, terms }) => { - vec![Node::Identifier(identifier), Node::Terms(terms)] - } - Term::Map(map) => map.children(), - } +mod test { + use super::*; + use super::{atom::Atom, directive::Directive, named_tuple::NamedTuple, program::Program, statement::Statement, term::Term}; + use crate::io::lexer::TokenKind; + + macro_rules! s { + ($offset:literal,$line:literal,$str:literal) => { + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } + }; } - // fn position(&self) -> Position { - // todo!() - // } -} + #[test] + fn ast_traversal() { + let input = "\ + %! This is just a test file.\n\ + %! So the documentation of the rules is not important.\n\ + %% This is the prefix used for datatypes\n\ + @prefix xsd: .\n\ + \n\ + % Facts\n\ + %% This is just an example predicate.\n\ + somePredicate(ConstA, ConstB).\n\ + \n\ + % Rules\n\ + %% This is just an example rule.\n\ + someHead(?VarA) :- somePredicate(?VarA, ConstB). % all constants that are in relation with ConstB\n"; + let span = Span::new(input); + let ast = Program { + span, + tl_doc_comment: Some(Token { + kind: TokenKind::TlDocComment, + span: s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") + }), + statements: vec![ + Statement::Directive(Directive::Prefix { + span:s!(125,4,"@prefix xsd: ."), + doc_comment:Some(Token { + kind:TokenKind::DocComment, + span:s!(84,3,"%% This is the prefix used for datatypes\n") + }), + kw: Token{ + kind:TokenKind::Prefix, + span:s!(125,4,"@prefix") + } , + ws1:Some(Token{ + kind:TokenKind::Whitespace, + span:s!(132,4," ") + }) , + prefix: Token { + kind: TokenKind::PrefixIdent, + span: s!(133, 4, "xsd:"), + }, + ws2: Some(Token{ + kind:TokenKind::Whitespace, + span:s!(137,4," ") + }), + prefix_iri: Token { + kind: TokenKind::Iri, + span: s!(138, 4, ""), + }, + ws3: None, + dot: Token{ + kind:TokenKind::Dot, + span:s!(173,4,".") + } + }), + Statement::Whitespace(Token { + kind: TokenKind::Whitespace, + span: s!(174, 4, "\n\n"), + }), + Statement::Comment(Token { + kind: TokenKind::Comment, + span: s!(176, 6, "% Facts\n"), + }), + Statement::Fact { + span:s!(222,8,"somePredicate(ConstA, ConstB)."), + doc_comment: Some(Token { + kind: TokenKind::DocComment, + span:s!(184,7,"%% This is just an example predicate.\n") + }), + atom: Atom::Positive(NamedTuple { + span: s!(222,8,"somePredicate(ConstA, ConstB)"), + identifier: Token { + kind: TokenKind::Ident, + span: s!(222, 8, "somePredicate"), + }, + ws1:None , + open_paren:Token{ + kind:TokenKind::OpenParen, + span:s!(235,8,"(") + } , + ws2:None , + terms: Some(List { + span: s!(236, 8, "ConstA, ConstB"), + first: Term::Primitive(Token { + kind: TokenKind::Ident, + span: s!(236, 8, "ConstA"), + }), + rest: Some(vec![( + None, + Token { + kind: TokenKind::Comma, + span: s!(242, 8, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: s!(243, 8, " "), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: s!(244, 8, "ConstB"), + }), + )]), + }), + ws3: None , + close_paren:Token { + kind: TokenKind::CloseParen, + span:s!(250,8,")") + } + }), + ws: None, + dot: Token { + kind: TokenKind::Dot, + span: s!(251,8,".") + } + }, + Statement::Whitespace(Token { + kind: TokenKind::Whitespace, + span: s!(252, 8, "\n\n"), + }), + Statement::Comment(Token { + kind: TokenKind::Comment, + span: s!(254, 10, "% Rules\n"), + }), + Statement::Rule { + span: s!(295,12,"someHead(?VarA) :- somePredicate(?VarA, ConstB)."), + doc_comment: Some(Token { kind: TokenKind::DocComment, span: s!(262,11,"%% This is just an example rule.\n") }), + head: List { + span: s!(295, 12, "someHead(?VarA)"), + first: Atom::Positive(NamedTuple { + span: s!(295,12,"someHead(?VarA)"), + identifier: Token { + kind: TokenKind::Ident, + span: s!(295, 12, "someHead"), + }, + ws1: None, + open_paren: Token { kind: TokenKind::OpenParen, span: s!(303,12,"(") }, + ws2: None, + terms: Some(List { + span: s!(304, 12, "?VarA"), + first: Term::Variable(Token { + kind: TokenKind::Variable, + span: s!(304, 12, "?VarA"), + }), + rest: None, + }), + ws3: None, + close_paren: Token { kind: TokenKind::CloseParen, span: s!(309,12,")") }, + }), + rest: None, + }, + ws1: Some(Token{kind:TokenKind::Whitespace,span:s!(310,12," ")}), + arrow: Token{kind:TokenKind::Arrow, span:s!(311,12,":-")}, + ws2: Some(Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}), + body: List { + span: s!(314, 12, "somePredicate(?VarA, ConstB)"), + first: Atom::Positive(NamedTuple { + span: s!(314, 12,"somePredicate(?VarA, ConstB)"), + identifier: Token { + kind: TokenKind::Ident, + span: s!(314, 12, "somePredicate"), + }, + ws1: None, + open_paren: Token { kind: TokenKind::OpenParen, span: s!(327,12,"(") }, + ws2: None, + terms: Some(List { + span: s!(328, 12, "?Var, ConstB"), + first: Term::Variable(Token { + kind: TokenKind::Variable, + span: s!(328, 12, "?VarA"), + }), + rest: Some(vec![( + None, + Token { + kind: TokenKind::Comma, + span: s!(333, 12, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: s!(334, 12, " "), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: s!(335, 12, "ConstB"), + }), + )]), + }), + ws3: None, + close_paren: Token { kind: TokenKind::CloseParen, span: s!(341, 12,")") }, + }), + rest: None, + }, + ws3: None, + dot: Token{kind:TokenKind::Dot,span:s!(342, 12,".")}, + }, + Statement::Whitespace(Token { + kind: TokenKind::Whitespace, + span: s!(343, 12, " "), + }), + Statement::Comment(Token { + kind: TokenKind::Comment, + span: s!(346, 12, "% all constants that are in relation with ConstB\n"), + }), + ], + }; -impl<'a> AstNode for Map<'a> { - fn children(&self) -> Vec { - vec![ - Node::MapIdentifier(&self.identifier), - Node::Pairs(&self.pairs), - ] + let tokens1 = get_all_tokens(&ast); + assert_eq!(input, { + let mut result = String::new(); + for token in tokens1 { + result.push_str(token.span().fragment()); + } + result + }); } - - // fn position(&self) -> Position { - // todo!() - // } } diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs new file mode 100644 index 000000000..f78e9b8a5 --- /dev/null +++ b/nemo/src/io/parser/ast/atom.rs @@ -0,0 +1,62 @@ +use super::map::Map; +use super::named_tuple::NamedTuple; +use super::term::Term; +use super::AstNode; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Atom<'a> { + Positive(NamedTuple<'a>), + Negative { + span: Span<'a>, + neg: Token<'a>, + atom: NamedTuple<'a>, + }, + InfixAtom { + span: Span<'a>, + lhs: Term<'a>, + ws1: Option>, + operation: Token<'a>, + ws2: Option>, + rhs: Term<'a>, + }, + Map(Map<'a>), +} +impl AstNode for Atom<'_> { + fn children(&self) -> Option> { + match self { + Atom::Positive(named_tuple) => named_tuple.children(), + Atom::Negative { neg, atom, .. } => Some(vec![neg, atom]), + Atom::InfixAtom { + lhs, + ws1, + operation, + ws2, + rhs, + .. + } => { + let mut vec = Vec::new(); + vec.push(lhs as &dyn AstNode); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(operation); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(rhs); + Some(vec) + } + Atom::Map(map) => map.children(), + } + } + + fn span(&self) -> Span { + match self { + Atom::Positive(named_tuple) => named_tuple.span(), + Atom::Negative { span, .. } => *span, + Atom::InfixAtom { span, .. } => *span, + Atom::Map(map) => map.span(), + } + } +} diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs new file mode 100644 index 000000000..a716a91d3 --- /dev/null +++ b/nemo/src/io/parser/ast/directive.rs @@ -0,0 +1,212 @@ +use super::map::Map; +use super::AstNode; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Directive<'a> { + // "@base ." + Base { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + ws1: Option>, + base_iri: Token<'a>, + ws2: Option>, + dot: Token<'a>, + }, + // "@prefix wikidata: ." + Prefix { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + ws1: Option>, + prefix: Token<'a>, + ws2: Option>, + prefix_iri: Token<'a>, + ws3: Option>, + dot: Token<'a>, + }, + // "@import table :- csv{resource="path/to/file.csv"} ." + Import { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + ws1: Token<'a>, + predicate: Token<'a>, + ws2: Option>, + arrow: Token<'a>, + ws3: Option>, + map: Map<'a>, + ws4: Option>, + dot: Token<'a>, + }, + // "@export result :- turtle{resource="out.ttl"} ." + Export { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + ws1: Token<'a>, + predicate: Token<'a>, + ws2: Option>, + arrow: Token<'a>, + ws3: Option>, + map: Map<'a>, + ws4: Option>, + dot: Token<'a>, + }, + // maybe will get deprecated + Output { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + predicates: Vec>, + }, +} +impl AstNode for Directive<'_> { + fn children(&self) -> Option> { + match self { + Directive::Base { + span, + doc_comment, + kw, + ws1, + base_iri, + ws2, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(base_iri); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Directive::Prefix { + span, + doc_comment, + kw, + ws1, + prefix, + ws2, + prefix_iri, + ws3, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(prefix); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(prefix_iri); + if let Some(ws) = ws3 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Directive::Import { + span, + doc_comment, + kw, + ws1, + predicate, + ws2, + arrow, + ws3, + map, + ws4, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + vec.push(ws1); + vec.push(predicate); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(arrow); + if let Some(ws) = ws3 { + vec.push(ws); + }; + vec.push(map); + if let Some(ws) = ws4 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Directive::Export { + span, + doc_comment, + kw, + ws1, + predicate, + ws2, + arrow, + ws3, + map, + ws4, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + vec.push(ws1); + vec.push(predicate); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(arrow); + if let Some(ws) = ws3 { + vec.push(ws); + }; + vec.push(map); + if let Some(ws) = ws4 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Directive::Output { .. } => todo!(), + } + } + + fn span(&self) -> Span { + match self { + Directive::Base { span, .. } => *span, + Directive::Prefix { span, .. } => *span, + Directive::Import { span, .. } => *span, + Directive::Export { span, .. } => *span, + Directive::Output { span, .. } => *span, + } + } + + // fn position(&self) -> Position { + // let span = self.span(); + // Position { + // offset: span.location_offset(), + // line: span.location_line(), + // column: span.get_column() as u32, + // } + // } +} diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs new file mode 100644 index 000000000..34261a51a --- /dev/null +++ b/nemo/src/io/parser/ast/map.rs @@ -0,0 +1,72 @@ +use super::term::Term; +use super::{AstNode, List}; +use crate::io::lexer::{Span, Token}; +use std::fmt::Debug; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Map<'a> { + pub(crate) span: Span<'a>, + pub(crate) identifier: Option>, + pub(crate) ws1: Option>, + pub(crate) open_brace: Token<'a>, + pub(crate) ws2: Option>, + pub(crate) pairs: Option, Term<'a>>>>, + pub(crate) ws3: Option>, + pub(crate) close_brace: Token<'a>, +} +impl AstNode for Map<'_> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + if let Some(identifier) = &self.identifier { + vec.push(identifier as &dyn AstNode); + }; + if let Some(ws) = &self.ws1 { + vec.push(ws); + } + vec.push(&self.open_brace); + if let Some(ws) = &self.ws2 { + vec.push(ws); + } + if let Some(pairs) = &self.pairs { + vec.push(pairs); + }; + if let Some(ws) = &self.ws3 { + vec.push(ws); + } + vec.push(&self.close_brace); + Some(vec) + } + + fn span(&self) -> Span { + self.span + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Pair<'a, K, V> { + pub(crate) span: Span<'a>, + pub(crate) key: K, + pub(crate) ws1: Option>, + pub(crate) equal: Token<'a>, + pub(crate) ws2: Option>, + pub(crate) value: V, +} +impl AstNode for Pair<'_, K, V> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + vec.push(&self.key as &dyn AstNode); + if let Some(ws) = &self.ws1 { + vec.push(ws); + } + vec.push(&self.equal); + if let Some(ws) = &self.ws2 { + vec.push(ws); + } + vec.push(&self.value); + Some(vec) + } + + fn span(&self) -> Span { + self.span + } +} diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs new file mode 100644 index 000000000..1a338e9c5 --- /dev/null +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -0,0 +1,40 @@ +use super::term::Term; +use super::{AstNode, List}; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct NamedTuple<'a> { + pub(crate) span: Span<'a>, + pub(crate) identifier: Token<'a>, + pub(crate) ws1: Option>, + pub(crate) open_paren: Token<'a>, + pub(crate) ws2: Option>, + pub(crate) terms: Option>>, + pub(crate) ws3: Option>, + pub(crate) close_paren: Token<'a>, +} +impl AstNode for NamedTuple<'_> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + vec.push(&self.identifier as &dyn AstNode); + if let Some(ws) = &self.ws1 { + vec.push(ws); + } + vec.push(&self.open_paren); + if let Some(ws) = &self.ws2 { + vec.push(ws); + } + if let Some(terms) = &self.terms { + vec.push(terms); + } + if let Some(ws) = &self.ws3 { + vec.push(ws); + } + vec.push(&self.close_paren); + Some(vec) + } + + fn span(&self) -> Span { + self.span + } +} diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs new file mode 100644 index 000000000..2ec5b5826 --- /dev/null +++ b/nemo/src/io/parser/ast/program.rs @@ -0,0 +1,29 @@ +use super::statement::Statement; +use super::AstNode; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Program<'a> { + pub(crate) span: Span<'a>, + pub(crate) tl_doc_comment: Option>, + pub(crate) statements: Vec>, +} +impl AstNode for Program<'_> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + if let Some(dc) = &self.tl_doc_comment { + vec.push(dc as &dyn AstNode); + }; + // NOTE: The current implementation puts the doc comment and all the + // statements in the same vec, so there is no need to implement AstNode + // for Vec, which would be hard for the fn span() implementation + for statement in &self.statements { + vec.push(statement); + } + Some(vec) + } + + fn span(&self) -> Span { + self.span + } +} diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs new file mode 100644 index 000000000..57ab9c335 --- /dev/null +++ b/nemo/src/io/parser/ast/statement.rs @@ -0,0 +1,96 @@ +use super::atom::Atom; +use super::directive::Directive; +use super::{AstNode, List}; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Statement<'a> { + Directive(Directive<'a>), + Fact { + span: Span<'a>, + doc_comment: Option>, + atom: Atom<'a>, + ws: Option>, + dot: Token<'a>, + }, + Rule { + span: Span<'a>, + doc_comment: Option>, + head: List<'a, Atom<'a>>, + ws1: Option>, + arrow: Token<'a>, + ws2: Option>, + body: List<'a, Atom<'a>>, + ws3: Option>, + dot: Token<'a>, + }, + Whitespace(Token<'a>), + Comment(Token<'a>), +} +impl AstNode for Statement<'_> { + fn children(&self) -> Option> { + match self { + Statement::Directive(directive) => directive.children(), + Statement::Fact { + doc_comment, + atom, + ws, + dot, + .. + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(atom); + if let Some(ws) = ws { + vec.push(ws); + } + vec.push(dot); + Some(vec) + } + Statement::Rule { + doc_comment, + head, + ws1, + arrow, + ws2, + body, + ws3, + dot, + .. + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(head as &dyn AstNode); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(arrow); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(body); + if let Some(ws) = ws3 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Statement::Whitespace(ws) => Some(vec![ws]), + Statement::Comment(c) => Some(vec![c]), + } + } + + fn span(&self) -> Span { + match self { + Statement::Directive(directive) => directive.span(), + Statement::Fact { span, .. } => *span, + Statement::Rule { span, .. } => *span, + Statement::Whitespace(ws) => ws.span(), + Statement::Comment(c) => c.span(), + } + } +} diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs new file mode 100644 index 000000000..8b35182e3 --- /dev/null +++ b/nemo/src/io/parser/ast/term.rs @@ -0,0 +1,103 @@ +use super::map::Map; +use super::named_tuple::NamedTuple; +use super::AstNode; +use super::List; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Term<'a> { + Primitive(Token<'a>), + Variable(Token<'a>), + // TODO: Is whitespace needed? Figure out how unary terms look + Unary { + span: Span<'a>, + operation: Token<'a>, + term: Box>, + }, + Binary { + span: Span<'a>, + lhs: Box>, + ws1: Option>, + operation: Token<'a>, + ws2: Option>, + rhs: Box>, + }, + Aggregation { + span: Span<'a>, + operation: Token<'a>, + open_paren: Token<'a>, + ws1: Option>, + terms: Box>>, + ws2: Option>, + close_paren: Token<'a>, + }, + Function(Box>), + Map(Box>), +} +impl AstNode for Term<'_> { + fn children(&self) -> Option> { + match self { + Term::Primitive(token) => Some(vec![token]), + Term::Variable(token) => Some(vec![token]), + Term::Unary { + operation, term, .. + } => Some(vec![operation, &**term]), + Term::Binary { + lhs, + ws1, + operation, + ws2, + rhs, + .. + } => { + let mut vec = Vec::new(); + vec.push(&**lhs as &dyn AstNode); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(operation); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(&**rhs); + Some(vec) + } + Term::Aggregation { + operation, + open_paren, + ws1, + terms, + ws2, + close_paren, + .. + } => { + let mut vec = Vec::new(); + vec.push(operation as &dyn AstNode); + vec.push(open_paren); + if let Some(ws) = ws1 { + vec.push(ws); + } + vec.push(&**terms); + if let Some(ws) = ws2 { + vec.push(ws); + } + vec.push(close_paren); + Some(vec) + } + Term::Function(named_tuple) => named_tuple.children(), + Term::Map(map) => map.children(), + } + } + + fn span(&self) -> Span { + match self { + Term::Primitive(t) => t.span(), + Term::Variable(t) => t.span(), + Term::Unary { span, .. } => *span, + Term::Binary { span, .. } => *span, + Term::Aggregation { span, .. } => *span, + Term::Function(named_tuple) => named_tuple.span(), + Term::Map(map) => map.span(), + } + } +} From 42b996b07998f006663e4da5a7753165abe05256 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 09:06:06 +0200 Subject: [PATCH 018/214] Change trivial conversion lint from 'deny' to 'warn' --- nemo/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 246ab7f6c..8d5d47603 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -4,10 +4,10 @@ #![deny( missing_debug_implementations, missing_copy_implementations, - trivial_casts, trivial_numeric_casts )] #![warn( + trivial_casts, missing_docs, unused_import_braces, unused_qualifications, From 1db8842c637e7a9a94fa3b5f5b1a71a3cab14642 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 09:12:37 +0200 Subject: [PATCH 019/214] Add Testfile --- nemo/src/io/parser.rs | 3 +-- testfile.rls | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 testfile.rls diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index b29afe701..6f5ca55e8 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -3424,8 +3424,7 @@ mod new { #[test] fn parser_test() { - let str = - std::fs::read_to_string("../testfile.rls").expect("`../testfile.rls` not found"); + let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); let input = Span::new(str.as_str()); dbg!(parse_program(input)); // assert!(false); diff --git a/testfile.rls b/testfile.rls new file mode 100644 index 000000000..b5f5d6db9 --- /dev/null +++ b/testfile.rls @@ -0,0 +1,18 @@ +@base . +@prefix rdfs: . +@import sourceA :- csv { resource = "sources/dataA.csv" } . +@export a :- csv {} . +@output a . + +% Facts: +father(alice, bob). +mother(bob, carla). +father(bob, darius). +mother(alice, carla). + +% Rules: +parent(?X, ?Y) :- mother(?X, ?Y). +parent(?X, ?Y) :- father(?X, ?Y). +parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . +a(?x) :- b(?x, B) . +s(4) :- s(3). From ae0f8f91875e516bc663d04f75db5019142853be Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 10:04:54 +0200 Subject: [PATCH 020/214] Add and implement position method on AstNode trait --- nemo/src/io/parser/ast.rs | 12 +++++++++++- nemo/src/io/parser/ast/atom.rs | 9 +++++++++ nemo/src/io/parser/ast/directive.rs | 16 ++++++++-------- nemo/src/io/parser/ast/map.rs | 7 +++++++ nemo/src/io/parser/ast/named_tuple.rs | 8 ++++++++ nemo/src/io/parser/ast/program.rs | 8 ++++++++ nemo/src/io/parser/ast/statement.rs | 9 +++++++++ nemo/src/io/parser/ast/term.rs | 9 +++++++++ 8 files changed, 69 insertions(+), 9 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 03c5e6ece..de97d4d06 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,3 +1,5 @@ +use nom::Offset; + use crate::io::lexer::{Span, Token}; use std::fmt::Display; @@ -12,7 +14,7 @@ pub(crate) mod term; pub(crate) trait AstNode: std::fmt::Debug { fn children(&self) -> Option>; fn span(&self) -> Span; - // fn position(&self) -> Position; + fn position(&self) -> Position; } pub(crate) struct Position { @@ -50,6 +52,14 @@ impl AstNode for List<'_, T> { fn span(&self) -> Span { self.span } + + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_column() as u32, + } + } } fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index f78e9b8a5..bbe916a39 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -59,4 +59,13 @@ impl AstNode for Atom<'_> { Atom::Map(map) => map.span(), } } + + fn position(&self) -> super::Position { + let span = self.span(); + super::Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index a716a91d3..b3d17c9c8 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -201,12 +201,12 @@ impl AstNode for Directive<'_> { } } - // fn position(&self) -> Position { - // let span = self.span(); - // Position { - // offset: span.location_offset(), - // line: span.location_line(), - // column: span.get_column() as u32, - // } - // } + fn position(&self) -> Position { + let span = self.span(); + Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 34261a51a..ea420a02a 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -69,4 +69,11 @@ impl AstNode for Pair<'_, K, V> { fn span(&self) -> Span { self.span } + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index 1a338e9c5..f961dcb07 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -37,4 +37,12 @@ impl AstNode for NamedTuple<'_> { fn span(&self) -> Span { self.span } + + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 2ec5b5826..23dc43cb1 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -26,4 +26,12 @@ impl AstNode for Program<'_> { fn span(&self) -> Span { self.span } + + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 57ab9c335..c126ec480 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -93,4 +93,13 @@ impl AstNode for Statement<'_> { Statement::Comment(c) => c.span(), } } + + fn position(&self) -> Position { + let span = self.span(); + Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 8b35182e3..3fcb15190 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -100,4 +100,13 @@ impl AstNode for Term<'_> { Term::Map(map) => map.span(), } } + + fn position(&self) -> Position { + let span = self.span(); + Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_column() as u32, + } + } } From f9ce3fbf14f2b6d942d7a9ee625e52053035a832 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 10:24:46 +0200 Subject: [PATCH 021/214] Fix fn position() implementation --- nemo/src/io/lexer.rs | 14 +++++++------- nemo/src/io/parser/ast.rs | 7 ++++++- nemo/src/io/parser/ast/atom.rs | 8 ++++---- nemo/src/io/parser/ast/directive.rs | 4 ++-- nemo/src/io/parser/ast/map.rs | 13 +++++++++++-- nemo/src/io/parser/ast/named_tuple.rs | 4 ++-- nemo/src/io/parser/ast/program.rs | 4 ++-- nemo/src/io/parser/ast/statement.rs | 4 ++-- nemo/src/io/parser/ast/term.rs | 3 ++- 9 files changed, 38 insertions(+), 23 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 521b6b82c..d55231291 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -195,13 +195,13 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { self.span } - // fn position(&self) -> Position { - // Position { - // offset: self.span.location_offset(), - // line: self.span.location_line(), - // column: self.span.get_column() as u32, - // } - // } + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_utf8_column() as u32, + } + } } macro_rules! syntax { diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index de97d4d06..62e1632aa 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -15,6 +15,7 @@ pub(crate) trait AstNode: std::fmt::Debug { fn children(&self) -> Option>; fn span(&self) -> Span; fn position(&self) -> Position; + // fn is_token(&self) -> bool; } pub(crate) struct Position { @@ -57,9 +58,13 @@ impl AstNode for List<'_, T> { Position { offset: self.span.location_offset(), line: self.span.location_line(), - column: self.span.get_column() as u32, + column: self.span.get_utf8_column() as u32, } } + + // fn is_token(&self) -> bool { + // false + // } } fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index bbe916a39..a554fe378 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,7 +1,7 @@ -use super::map::Map; use super::named_tuple::NamedTuple; use super::term::Term; use super::AstNode; +use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -60,12 +60,12 @@ impl AstNode for Atom<'_> { } } - fn position(&self) -> super::Position { + fn position(&self) -> Position { let span = self.span(); - super::Position { + Position { offset: span.location_offset(), line: span.location_line(), - column: span.get_column() as u32, + column: span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index b3d17c9c8..9d5e1c28c 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,5 +1,5 @@ -use super::map::Map; use super::AstNode; +use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -206,7 +206,7 @@ impl AstNode for Directive<'_> { Position { offset: span.location_offset(), line: span.location_line(), - column: span.get_column() as u32, + column: span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index ea420a02a..c6d352d14 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,5 +1,5 @@ use super::term::Term; -use super::{AstNode, List}; +use super::{AstNode, List, Position}; use crate::io::lexer::{Span, Token}; use std::fmt::Debug; @@ -40,6 +40,14 @@ impl AstNode for Map<'_> { fn span(&self) -> Span { self.span } + + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_utf8_column() as u32, + } + } } #[derive(Debug, Clone, PartialEq)] @@ -69,11 +77,12 @@ impl AstNode for Pair<'_, K, V> { fn span(&self) -> Span { self.span } + fn position(&self) -> Position { Position { offset: self.span.location_offset(), line: self.span.location_line(), - column: self.span.get_column() as u32, + column: self.span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index f961dcb07..36695d12e 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -1,5 +1,5 @@ use super::term::Term; -use super::{AstNode, List}; +use super::{AstNode, List, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -42,7 +42,7 @@ impl AstNode for NamedTuple<'_> { Position { offset: self.span.location_offset(), line: self.span.location_line(), - column: self.span.get_column() as u32, + column: self.span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 23dc43cb1..8f99c7f8b 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,5 +1,5 @@ -use super::statement::Statement; use super::AstNode; +use super::{statement::Statement, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -31,7 +31,7 @@ impl AstNode for Program<'_> { Position { offset: self.span.location_offset(), line: self.span.location_line(), - column: self.span.get_column() as u32, + column: self.span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index c126ec480..284f28ccb 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -1,6 +1,6 @@ use super::atom::Atom; use super::directive::Directive; -use super::{AstNode, List}; +use super::{AstNode, List, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -99,7 +99,7 @@ impl AstNode for Statement<'_> { Position { offset: span.location_offset(), line: span.location_line(), - column: span.get_column() as u32, + column: span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 3fcb15190..bcf29961f 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -2,6 +2,7 @@ use super::map::Map; use super::named_tuple::NamedTuple; use super::AstNode; use super::List; +use super::Position; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -106,7 +107,7 @@ impl AstNode for Term<'_> { Position { offset: span.location_offset(), line: span.location_line(), - column: span.get_column() as u32, + column: span.get_utf8_column() as u32, } } } From 3f09a27bb16dcf79507fc9fc960774eae54dfcad Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 10:29:09 +0200 Subject: [PATCH 022/214] Add method is_token for trait AstNode and implement it --- nemo/src/io/lexer.rs | 4 ++++ nemo/src/io/parser/ast.rs | 8 ++++---- nemo/src/io/parser/ast/atom.rs | 4 ++++ nemo/src/io/parser/ast/directive.rs | 4 ++++ nemo/src/io/parser/ast/map.rs | 8 ++++++++ nemo/src/io/parser/ast/named_tuple.rs | 4 ++++ nemo/src/io/parser/ast/program.rs | 4 ++++ nemo/src/io/parser/ast/statement.rs | 4 ++++ nemo/src/io/parser/ast/term.rs | 4 ++++ 9 files changed, 40 insertions(+), 4 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index d55231291..142abcafd 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -202,6 +202,10 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + true + } } macro_rules! syntax { diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 62e1632aa..34b8a4f05 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -15,7 +15,7 @@ pub(crate) trait AstNode: std::fmt::Debug { fn children(&self) -> Option>; fn span(&self) -> Span; fn position(&self) -> Position; - // fn is_token(&self) -> bool; + fn is_token(&self) -> bool; } pub(crate) struct Position { @@ -62,9 +62,9 @@ impl AstNode for List<'_, T> { } } - // fn is_token(&self) -> bool { - // false - // } + fn is_token(&self) -> bool { + false + } } fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index a554fe378..f3949e56a 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -68,4 +68,8 @@ impl AstNode for Atom<'_> { column: span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 9d5e1c28c..9788d7cf0 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -209,4 +209,8 @@ impl AstNode for Directive<'_> { column: span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index c6d352d14..61657df30 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -48,6 +48,10 @@ impl AstNode for Map<'_> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } #[derive(Debug, Clone, PartialEq)] @@ -85,4 +89,8 @@ impl AstNode for Pair<'_, K, V> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index 36695d12e..f9379dc70 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -45,4 +45,8 @@ impl AstNode for NamedTuple<'_> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 8f99c7f8b..a868da4e7 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -34,4 +34,8 @@ impl AstNode for Program<'_> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 284f28ccb..8dcb990d8 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -102,4 +102,8 @@ impl AstNode for Statement<'_> { column: span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index bcf29961f..f96b1969c 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -110,4 +110,8 @@ impl AstNode for Term<'_> { column: span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } From 2c8ad99dd3ee150757db30c4a96558a028dc54fd Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 10 Apr 2024 15:33:58 +0200 Subject: [PATCH 023/214] Change trivial cast lint back to deny and add local exceptions --- nemo/src/io/parser/ast.rs | 1 + nemo/src/io/parser/ast/atom.rs | 1 + nemo/src/io/parser/ast/directive.rs | 4 ++++ nemo/src/io/parser/ast/map.rs | 2 ++ nemo/src/io/parser/ast/named_tuple.rs | 1 + nemo/src/io/parser/ast/program.rs | 1 + nemo/src/io/parser/ast/statement.rs | 4 +++- nemo/src/io/parser/ast/term.rs | 2 ++ nemo/src/lib.rs | 2 +- 9 files changed, 16 insertions(+), 2 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 34b8a4f05..7c00c2eb3 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -34,6 +34,7 @@ pub(crate) struct List<'a, T> { impl AstNode for List<'_, T> { fn children(&self) -> Option> { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(&self.first as &dyn AstNode); if let Some(rest) = &self.rest { for (ws1, delim, ws2, item) in rest { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index f3949e56a..3bd65b00f 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -36,6 +36,7 @@ impl AstNode for Atom<'_> { .. } => { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(lhs as &dyn AstNode); if let Some(ws) = ws1 { vec.push(ws); diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 9788d7cf0..c0d103697 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -76,6 +76,7 @@ impl AstNode for Directive<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(kw); @@ -102,6 +103,7 @@ impl AstNode for Directive<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(kw); @@ -134,6 +136,7 @@ impl AstNode for Directive<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(kw); @@ -168,6 +171,7 @@ impl AstNode for Directive<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(kw); diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 61657df30..5ac13ec03 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -18,6 +18,7 @@ impl AstNode for Map<'_> { fn children(&self) -> Option> { let mut vec = Vec::new(); if let Some(identifier) = &self.identifier { + #[allow(trivial_casts)] vec.push(identifier as &dyn AstNode); }; if let Some(ws) = &self.ws1 { @@ -66,6 +67,7 @@ pub(crate) struct Pair<'a, K, V> { impl AstNode for Pair<'_, K, V> { fn children(&self) -> Option> { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(&self.key as &dyn AstNode); if let Some(ws) = &self.ws1 { vec.push(ws); diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index f9379dc70..e8e05df9a 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -16,6 +16,7 @@ pub(crate) struct NamedTuple<'a> { impl AstNode for NamedTuple<'_> { fn children(&self) -> Option> { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(&self.identifier as &dyn AstNode); if let Some(ws) = &self.ws1 { vec.push(ws); diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index a868da4e7..8872c2e56 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -12,6 +12,7 @@ impl AstNode for Program<'_> { fn children(&self) -> Option> { let mut vec = Vec::new(); if let Some(dc) = &self.tl_doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; // NOTE: The current implementation puts the doc comment and all the diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 8dcb990d8..2404f1298 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -40,6 +40,7 @@ impl AstNode for Statement<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(atom); @@ -62,9 +63,10 @@ impl AstNode for Statement<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; - vec.push(head as &dyn AstNode); + vec.push(head); if let Some(ws) = ws1 { vec.push(ws); }; diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index f96b1969c..372e41853 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -52,6 +52,7 @@ impl AstNode for Term<'_> { .. } => { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(&**lhs as &dyn AstNode); if let Some(ws) = ws1 { vec.push(ws); @@ -73,6 +74,7 @@ impl AstNode for Term<'_> { .. } => { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(operation as &dyn AstNode); vec.push(open_paren); if let Some(ws) = ws1 { diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 8d5d47603..246ab7f6c 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -4,10 +4,10 @@ #![deny( missing_debug_implementations, missing_copy_implementations, + trivial_casts, trivial_numeric_casts )] #![warn( - trivial_casts, missing_docs, unused_import_braces, unused_qualifications, From 7f7c83f1833ea87cc90eeab35f19081f9b4553c3 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 11 Apr 2024 10:49:29 +0200 Subject: [PATCH 024/214] Add method name to trait AstNode and implement Display for all Ast nodes --- nemo/src/io/lexer.rs | 12 +++++++-- nemo/src/io/parser/ast.rs | 39 ++++++++++++++++++++++++--- nemo/src/io/parser/ast/atom.rs | 23 +++++++++++++--- nemo/src/io/parser/ast/directive.rs | 22 +++++++++++++-- nemo/src/io/parser/ast/map.rs | 25 ++++++++++++++++- nemo/src/io/parser/ast/named_tuple.rs | 14 +++++++++- nemo/src/io/parser/ast/program.rs | 17 ++++++++++-- nemo/src/io/parser/ast/statement.rs | 22 +++++++++++++-- nemo/src/io/parser/ast/term.rs | 24 ++++++++++++++--- 9 files changed, 178 insertions(+), 20 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 142abcafd..1cc390707 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -176,11 +176,15 @@ impl std::fmt::Display for Token<'_> { let line = self.span.location_line(); let fragment = self.span.fragment(); if self.span.extra == () { - write!(f, "T!{{{0}, S!({offset}, {line}, {fragment})}}", self.kind) + write!( + f, + "T!{{{0}, S!({offset}, {line}, {fragment:?})}}", + self.kind + ) } else { write!( f, - "T!{{{0}, S!({offset}, {line}, {fragment}, {1:?})}}", + "T!{{{0}, S!({offset}, {line}, {fragment:?}, {1:?})}}", self.kind, self.span.extra ) } @@ -206,6 +210,10 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { fn is_token(&self) -> bool { true } + + fn name(&self) -> String { + String::from("Token") + } } macro_rules! syntax { diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 7c00c2eb3..7f1094e8d 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -2,6 +2,7 @@ use nom::Offset; use crate::io::lexer::{Span, Token}; use std::fmt::Display; +use ascii_tree::{Tree, write_tree}; pub(crate) mod atom; pub(crate) mod directive; @@ -11,11 +12,12 @@ pub(crate) mod program; pub(crate) mod statement; pub(crate) mod term; -pub(crate) trait AstNode: std::fmt::Debug { +pub(crate) trait AstNode: std::fmt::Debug + Display { fn children(&self) -> Option>; fn span(&self) -> Span; fn position(&self) -> Position; fn is_token(&self) -> bool; + fn name(&self) -> String; } pub(crate) struct Position { @@ -66,9 +68,20 @@ impl AstNode for List<'_, T> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("List") + } +} +impl Display for List<'_, T> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } -fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { +pub(crate) fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { let mut vec = Vec::new(); if let Some(children) = node.children() { for child in children { @@ -80,6 +93,20 @@ fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { vec } +pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { + let mut vec = Vec::new(); + if let Some(children) = node.children() { + for child in children { + if child.is_token() { + vec.push(Tree::Leaf(vec![format!("{}", child)])); + } else { + vec.push(ast_to_ascii_tree(child)); + } + } + } + Tree::Node(node.name(), vec) +} + mod test { use super::*; use super::{atom::Atom, directive::Directive, named_tuple::NamedTuple, program::Program, statement::Statement, term::Term}; @@ -295,11 +322,15 @@ mod test { }), ], }; - + println!("{}", ast); let tokens1 = get_all_tokens(&ast); + for token in &tokens1 { + println!("{}", token); + } + assert_eq!(input, { let mut result = String::new(); - for token in tokens1 { + for token in &tokens1 { result.push_str(token.span().fragment()); } result diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 3bd65b00f..419ef048d 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,8 +1,9 @@ use super::named_tuple::NamedTuple; use super::term::Term; -use super::AstNode; +use super::{ast_to_ascii_tree, AstNode}; use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Atom<'a> { @@ -25,7 +26,7 @@ pub(crate) enum Atom<'a> { impl AstNode for Atom<'_> { fn children(&self) -> Option> { match self { - Atom::Positive(named_tuple) => named_tuple.children(), + Atom::Positive(named_tuple) => Some(vec![named_tuple]), Atom::Negative { neg, atom, .. } => Some(vec![neg, atom]), Atom::InfixAtom { lhs, @@ -48,7 +49,7 @@ impl AstNode for Atom<'_> { vec.push(rhs); Some(vec) } - Atom::Map(map) => map.children(), + Atom::Map(map) => Some(vec![map]), } } @@ -73,4 +74,20 @@ impl AstNode for Atom<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + match self { + Atom::Positive(_) => "Positive Atom".into(), + Atom::Negative { .. } => "Negative Atom".into(), + Atom::InfixAtom { .. } => "Infix Atom".into(), + Atom::Map(_) => "Map Atom".into(), + } + } +} +impl std::fmt::Display for Atom<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index c0d103697..857b53f6c 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,6 +1,7 @@ -use super::AstNode; -use super::{map::Map, Position}; +use super::map::Map; +use super::{ast_to_ascii_tree, AstNode, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Directive<'a> { @@ -217,4 +218,21 @@ impl AstNode for Directive<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + match self { + Directive::Base { .. } => "Base Directive".into(), + Directive::Prefix { .. } => "Prefix Directive".into(), + Directive::Import { .. } => "Import Directive".into(), + Directive::Export { .. } => "Export Directive".into(), + Directive::Output { .. } => "Output Directive".into(), + } + } +} +impl std::fmt::Display for Directive<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 5ac13ec03..0e043471d 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,6 +1,7 @@ use super::term::Term; -use super::{AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; use std::fmt::Debug; #[derive(Debug, Clone, PartialEq)] @@ -53,6 +54,17 @@ impl AstNode for Map<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("Map") + } +} +impl std::fmt::Display for Map<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } #[derive(Debug, Clone, PartialEq)] @@ -95,4 +107,15 @@ impl AstNode for Pair<'_, K, V> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("Pair") + } +} +impl std::fmt::Display for Pair<'_, K, V> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index e8e05df9a..3aa5b7e82 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -1,6 +1,7 @@ use super::term::Term; -use super::{AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) struct NamedTuple<'a> { @@ -50,4 +51,15 @@ impl AstNode for NamedTuple<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("Named Tuple") + } +} +impl std::fmt::Display for NamedTuple<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 8872c2e56..443697dd9 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,5 +1,7 @@ -use super::AstNode; -use super::{statement::Statement, Position}; +use ascii_tree::write_tree; + +use super::statement::Statement; +use super::{ast_to_ascii_tree, AstNode, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -39,4 +41,15 @@ impl AstNode for Program<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("Program") + } +} +impl std::fmt::Display for Program<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 2404f1298..84cfa3e61 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -1,7 +1,8 @@ use super::atom::Atom; use super::directive::Directive; -use super::{AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Statement<'a> { @@ -30,7 +31,7 @@ pub(crate) enum Statement<'a> { impl AstNode for Statement<'_> { fn children(&self) -> Option> { match self { - Statement::Directive(directive) => directive.children(), + Statement::Directive(directive) => Some(vec![directive]), Statement::Fact { doc_comment, atom, @@ -108,4 +109,21 @@ impl AstNode for Statement<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + match self { + Statement::Directive(_) => "Directive".into(), + Statement::Fact { .. } => "Fact".into(), + Statement::Rule { .. } => "Rule".into(), + Statement::Whitespace(_) => "Whitespace".into(), + Statement::Comment(_) => "Comment".into(), + } + } +} +impl std::fmt::Display for Statement<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 372e41853..fe5febeda 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,9 +1,8 @@ use super::map::Map; use super::named_tuple::NamedTuple; -use super::AstNode; -use super::List; -use super::Position; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Term<'a> { @@ -116,4 +115,23 @@ impl AstNode for Term<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + match self { + Term::Primitive(_) => "Primitive".into(), + Term::Variable(_) => "Variable".into(), + Term::Unary { .. } => "Unary Term".into(), + Term::Binary { .. } => "Binary Term".into(), + Term::Aggregation { .. } => "Aggregation".into(), + Term::Function(_) => "Function Symbol".into(), + Term::Map(_) => "Map".into(), + } + } +} +impl std::fmt::Display for Term<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } From 40cc93373621f2a53610cf6ccda39e6afd5fbd77 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 11 Apr 2024 10:52:18 +0200 Subject: [PATCH 025/214] Change order of parser function --- nemo/src/io/parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 6f5ca55e8..a1b540786 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2981,13 +2981,13 @@ mod new { fn parse_term<'a>(input: Span<'a>) -> IResult> { alt(( + parse_map_term, + parse_function_term, parse_primitive_term, parse_variable, parse_unary_term, // parse_binary_term, // parse_aggregation_term, - parse_function_term, - parse_map_term, ))(input) } @@ -3044,7 +3044,7 @@ mod new { } #[cfg(test)] - mod test { + mod tests { use super::*; use crate::io::{ lexer::*, From 3557c7e7f19ee7164b8ee07267cafb607bb99f54 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 11 Apr 2024 11:11:08 +0200 Subject: [PATCH 026/214] Change NamedTuple to Tuple to support unnamed tuples --- nemo/src/io/parser.rs | 43 ++++++++++++++----- nemo/src/io/parser/ast.rs | 22 +++++----- nemo/src/io/parser/ast/atom.rs | 6 +-- nemo/src/io/parser/ast/term.rs | 4 +- .../parser/ast/{named_tuple.rs => tuple.rs} | 12 +++--- 5 files changed, 56 insertions(+), 31 deletions(-) rename nemo/src/io/parser/ast/{named_tuple.rs => tuple.rs} (86%) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index a1b540786..a22898573 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2431,7 +2431,7 @@ mod test { /// NEW PARSER mod new { use super::ast::{ - atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, List, + atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, }; use crate::io::lexer::{ arrow, at, close_brace, close_paren, colon, comma, dot, equal, greater, greater_equal, @@ -2839,10 +2839,10 @@ mod new { }) } - fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( - lex_ident, + opt(lex_ident), opt(lex_whitespace), open_paren, opt(lex_whitespace), @@ -2854,7 +2854,7 @@ mod new { |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, - NamedTuple { + Tuple { span: outer_span(input_span, rest_input), identifier, ws1, @@ -3074,12 +3074,12 @@ mod new { statements: vec![Statement::Fact { span: S!(0, 1, "a(B,C)."), doc_comment: None, - atom: Atom::Positive(NamedTuple { + atom: Atom::Positive(Tuple { span: S!(0, 1, "a(B,C)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: S!(0, 1, "a"), - }, + }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, @@ -3343,12 +3343,12 @@ mod new { Statement::Fact { span: S!(0, 1, "some(Fact, with, whitespace) ."), doc_comment: None, - atom: Atom::Positive(NamedTuple { + atom: Atom::Positive(Tuple { span: S!(0, 1, "some(Fact, with, whitespace)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: S!(0, 1, "some"), - }, + }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, @@ -3422,6 +3422,29 @@ mod new { ) } + #[test] + fn display_program() { + let input = Span::new( + r#"% This example finds trees of (some species of lime/linden tree) in Dresden, +% which are more than 200 years old. +% +% It shows how to load (typed) data from (compressed) CSV files, how to +% perform a recursive reachability query, and how to use datatype built-in to +% find old trees. It can be modified to use a different species or genus of +% plant, and by changing the required age. + +@import tree :- csv{format=(string, string, int, int), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/dresden-trees-ages-heights.csv"} . % location URL, species, age, height in m +@import taxon :- csv{format=(string, string, string), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/wikidata-taxon-name-parent.csv.gz"} . % location URL, species, age, height in m + +limeSpecies(?X, "Tilia") :- taxon(?X, "Tilia", ?P). +limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). + +oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, + ); + println!("{}", parse_program(input)); + // assert!(false); + } + #[test] fn parser_test() { let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 7f1094e8d..8be761f36 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -7,7 +7,7 @@ use ascii_tree::{Tree, write_tree}; pub(crate) mod atom; pub(crate) mod directive; pub(crate) mod map; -pub(crate) mod named_tuple; +pub(crate) mod tuple; pub(crate) mod program; pub(crate) mod statement; pub(crate) mod term; @@ -109,7 +109,7 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { mod test { use super::*; - use super::{atom::Atom, directive::Directive, named_tuple::NamedTuple, program::Program, statement::Statement, term::Term}; + use super::{atom::Atom, directive::Directive, tuple::Tuple, program::Program, statement::Statement, term::Term}; use crate::io::lexer::TokenKind; macro_rules! s { @@ -187,12 +187,12 @@ mod test { kind: TokenKind::DocComment, span:s!(184,7,"%% This is just an example predicate.\n") }), - atom: Atom::Positive(NamedTuple { + atom: Atom::Positive(Tuple { span: s!(222,8,"somePredicate(ConstA, ConstB)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: s!(222, 8, "somePredicate"), - }, + }), ws1:None , open_paren:Token{ kind:TokenKind::OpenParen, @@ -246,12 +246,12 @@ mod test { doc_comment: Some(Token { kind: TokenKind::DocComment, span: s!(262,11,"%% This is just an example rule.\n") }), head: List { span: s!(295, 12, "someHead(?VarA)"), - first: Atom::Positive(NamedTuple { + first: Atom::Positive(Tuple { span: s!(295,12,"someHead(?VarA)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: s!(295, 12, "someHead"), - }, + }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(303,12,"(") }, ws2: None, @@ -273,12 +273,12 @@ mod test { ws2: Some(Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), - first: Atom::Positive(NamedTuple { + first: Atom::Positive(Tuple { span: s!(314, 12,"somePredicate(?VarA, ConstB)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: s!(314, 12, "somePredicate"), - }, + }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(327,12,"(") }, ws2: None, diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 419ef048d..e75c9fb4f 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,5 +1,5 @@ -use super::named_tuple::NamedTuple; use super::term::Term; +use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode}; use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; @@ -7,11 +7,11 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Atom<'a> { - Positive(NamedTuple<'a>), + Positive(Tuple<'a>), Negative { span: Span<'a>, neg: Token<'a>, - atom: NamedTuple<'a>, + atom: Tuple<'a>, }, InfixAtom { span: Span<'a>, diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index fe5febeda..964400d88 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,5 +1,5 @@ use super::map::Map; -use super::named_tuple::NamedTuple; +use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -31,7 +31,7 @@ pub(crate) enum Term<'a> { ws2: Option>, close_paren: Token<'a>, }, - Function(Box>), + Function(Box>), Map(Box>), } impl AstNode for Term<'_> { diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/tuple.rs similarity index 86% rename from nemo/src/io/parser/ast/named_tuple.rs rename to nemo/src/io/parser/ast/tuple.rs index 3aa5b7e82..cf1ec38f7 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -4,9 +4,9 @@ use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) struct NamedTuple<'a> { +pub(crate) struct Tuple<'a> { pub(crate) span: Span<'a>, - pub(crate) identifier: Token<'a>, + pub(crate) identifier: Option>, pub(crate) ws1: Option>, pub(crate) open_paren: Token<'a>, pub(crate) ws2: Option>, @@ -14,11 +14,13 @@ pub(crate) struct NamedTuple<'a> { pub(crate) ws3: Option>, pub(crate) close_paren: Token<'a>, } -impl AstNode for NamedTuple<'_> { +impl AstNode for Tuple<'_> { fn children(&self) -> Option> { let mut vec = Vec::new(); #[allow(trivial_casts)] - vec.push(&self.identifier as &dyn AstNode); + if let Some(identifier) = &self.identifier { + vec.push(identifier as &dyn AstNode); + } if let Some(ws) = &self.ws1 { vec.push(ws); } @@ -56,7 +58,7 @@ impl AstNode for NamedTuple<'_> { String::from("Named Tuple") } } -impl std::fmt::Display for NamedTuple<'_> { +impl std::fmt::Display for Tuple<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); write_tree(&mut output, &ast_to_ascii_tree(self))?; From d11207b41bbf18a51fd8935eb16c05b5e9203e91 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 11 Apr 2024 15:16:38 +0200 Subject: [PATCH 027/214] Fix parsing of output directives --- nemo/src/io/parser.rs | 111 ++++++++++++++++++++++++---- nemo/src/io/parser/ast/directive.rs | 9 ++- 2 files changed, 101 insertions(+), 19 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index a22898573..980c2e946 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2739,19 +2739,54 @@ mod new { let input_span = input.clone(); tuple(( opt(lex_doc_comment), - at, - verify(lex_ident, |token| token.kind == TokenKind::Output), - ignore_ws_and_comments(separated_list0(comma, ignore_ws_and_comments(lex_ident))), - ignore_ws_and_comments(dot), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + )), + lex_whitespace, + opt(parse_identifier_list), + opt(lex_whitespace), + dot, ))(input) - .map(|(rest_input, (doc_comment, _, kw, predicates, _))| { + .map( + |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { + ( + rest_input, + Directive::Output { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Output, + span: kw, + }, + ws1, + predicates, + ws2, + dot, + }, + ) + }, + ) + } + + fn parse_identifier_list<'a>(input: Span<'a>) -> IResult>> { + let input_span = input.clone(); + pair( + lex_ident, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + lex_ident, + ))), + )(input) + .map(|(rest_input, (first, rest))| { ( rest_input, - Directive::Output { + List { span: outer_span(input_span, rest_input), - doc_comment, - kw, - predicates, + first, + rest: if rest.is_empty() { None } else { Some(rest) }, }, ) }) @@ -3124,7 +3159,7 @@ mod new { #[test] fn syntax() { let input = Span::new( - r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a."#, + r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, ); assert_eq!( parse_program(input), @@ -3300,16 +3335,60 @@ mod new { }, }), Statement::Directive(Directive::Output { - span: S!(153, 1, "@output a."), + span: S!(153, 1, "@output a, b, c."), doc_comment: None, kw: Token { kind: TokenKind::Output, - span: S!(154, 1, "output") + span: S!(153, 1, "@output") }, - predicates: vec![Token { - kind: TokenKind::Ident, - span: S!(161, 1, "a") - }], + ws1: Token { + kind: TokenKind::Whitespace, + span: S!(160, 1, " "), + }, + predicates: Some(List { + span: S!(161, 1, "a, b, c"), + first: Token { + kind: TokenKind::Ident, + span: S!(161, 1, "a"), + }, + rest: Some(vec![ + ( + None, + Token { + kind: TokenKind::Comma, + span: S!(162, 1, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: S!(163, 1, " "), + }), + Token { + kind: TokenKind::Ident, + span: S!(164, 1, "b"), + }, + ), + ( + None, + Token { + kind: TokenKind::Comma, + span: S!(165, 1, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: S!(166, 1, " "), + }), + Token { + kind: TokenKind::Ident, + span: S!(167, 1, "c"), + }, + ), + ]), + }), + ws2: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(168, 1, "."), + } }), ], } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 857b53f6c..cf024ed8c 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,5 +1,5 @@ use super::map::Map; -use super::{ast_to_ascii_tree, AstNode, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -55,12 +55,15 @@ pub(crate) enum Directive<'a> { ws4: Option>, dot: Token<'a>, }, - // maybe will get deprecated + // "@output A, B, C." Output { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - predicates: Vec>, + ws1: Token<'a>, + predicates: Option>>, + ws2: Option>, + dot: Token<'a>, }, } impl AstNode for Directive<'_> { From 2ab0304dc77efa8506302c2e9a67f5793f53104e Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 12 Apr 2024 12:20:34 +0200 Subject: [PATCH 028/214] Change order of parser functions because of ordered choice --- nemo/src/io/parser.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 980c2e946..936ef2618 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -3074,8 +3074,9 @@ mod new { }) } + // Order of functions is important, because of ordered choice and no backtracking fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { - alt((equal, unequal, less, less_equal, greater, greater_equal))(input) + alt((less_equal, greater_equal, equal, unequal, less, greater))(input) } #[cfg(test)] From acf267e314da09039abdcfd671a23d696eee4377 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 12 Apr 2024 12:21:30 +0200 Subject: [PATCH 029/214] Fix 'name' method of tuple --- nemo/src/io/parser/ast/tuple.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index cf1ec38f7..9d771d289 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -55,7 +55,7 @@ impl AstNode for Tuple<'_> { } fn name(&self) -> String { - String::from("Named Tuple") + String::from("Tuple") } } impl std::fmt::Display for Tuple<'_> { From 3044d5c69adb0170fce5319ee5eb5c25ec0103bb Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 15 Apr 2024 21:05:53 +0200 Subject: [PATCH 030/214] Rename fn parse_named_tuple to parse_tuple --- nemo/src/io/parser.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 936ef2618..41f969966 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2832,13 +2832,13 @@ mod new { } fn parse_normal_atom<'a>(input: Span<'a>) -> IResult> { - parse_named_tuple(input) + parse_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); - pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { + pair(tilde, parse_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { ( rest_input, Atom::Negative { @@ -2874,7 +2874,7 @@ mod new { }) } - fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + fn parse_tuple<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( opt(lex_ident), @@ -3054,7 +3054,7 @@ mod new { } fn parse_function_term<'a>(input: Span<'a>) -> IResult> { - parse_named_tuple(input) + parse_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Function(Box::new(named_tuple)))) } From a527fd6c1b7207cd585ffec9ac5ba21a1f3fd21f Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 15 Apr 2024 21:25:10 +0200 Subject: [PATCH 031/214] Add parse_aggregation_term function --- nemo/src/io/lexer.rs | 7 +++++++ nemo/src/io/parser.rs | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 1cc390707..53a495e0d 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -78,6 +78,8 @@ pub(crate) enum TokenKind { Ident, /// Variable, Variable, + /// Aggregate identifier like `#sum` + Aggregate, /// IRI, delimited with `<` and `>` Iri, /// Base 10 digits @@ -141,6 +143,7 @@ impl std::fmt::Display for TokenKind { TokenKind::Slash => write!(f, "Slash"), TokenKind::Ident => write!(f, "Ident"), TokenKind::Variable => write!(f, "Variable"), + TokenKind::Aggregate => write!(f, "Aggregate"), TokenKind::Iri => write!(f, "Iri"), TokenKind::Number => write!(f, "Number"), TokenKind::String => write!(f, "String"), @@ -290,6 +293,10 @@ pub(crate) fn lex_operators(input: Span) -> IResult { ))(input) } +pub(crate) fn lex_unary_operators(input: Span) -> IResult { + alt((plus, minus))(input) +} + pub(crate) fn lex_ident(input: Span) -> IResult { let (rest, result) = recognize(pair( alpha1, diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 41f969966..c5345d2e2 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2435,9 +2435,9 @@ mod new { }; use crate::io::lexer::{ arrow, at, close_brace, close_paren, colon, comma, dot, equal, greater, greater_equal, - less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, - lex_operators, lex_string, lex_toplevel_doc_comment, lex_whitespace, open_brace, - open_paren, question_mark, tilde, unequal, Span, Token, TokenKind, + hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, + lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_operators, lex_whitespace, + open_brace, open_paren, question_mark, tilde, unequal, Span, Token, TokenKind, }; use nom::combinator::{all_consuming, opt, recognize}; use nom::sequence::{delimited, pair}; @@ -2506,7 +2506,7 @@ mod new { } fn parse_fact<'a>(input: Span<'a>) -> IResult> { - let input_span = input; + // let input_span = input; tuple(( opt(lex_doc_comment), parse_normal_atom, @@ -2517,7 +2517,7 @@ mod new { ( rest_input, Statement::Fact { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, atom, ws, @@ -3022,7 +3022,7 @@ mod new { parse_variable, parse_unary_term, // parse_binary_term, - // parse_aggregation_term, + parse_aggregation_term, ))(input) } @@ -3033,7 +3033,7 @@ mod new { fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); - pair(lex_operators, parse_term)(input).map(|(rest_input, (operation, term))| { + pair(lex_unary_operators, parse_term)(input).map(|(rest_input, (operation, term))| { ( rest_input, Term::Unary { @@ -3050,7 +3050,33 @@ mod new { } fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_aggregation_term`!") + tuple(( + recognize(pair(hash, lex_ident)), + open_paren, + opt(lex_whitespace), + parse_term_list, + opt(lex_whitespace), + close_paren, + ))(input) + .map( + |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { + ( + rest_input, + Term::Aggregation { + span: outer_span(input, rest_input), + operation: Token { + kind: TokenKind::Aggregate, + span: operation, + }, + open_paren, + ws1, + terms: Box::new(terms), + ws2, + close_paren, + }, + ) + }, + ) } fn parse_function_term<'a>(input: Span<'a>) -> IResult> { From 86c3be75b93453cd088b1dfd6f09c9d3a7570b71 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 16 Apr 2024 08:03:19 +0200 Subject: [PATCH 032/214] Add parsing of existential variables --- nemo/src/io/lexer.rs | 4 +++- nemo/src/io/parser.rs | 26 ++++++++++++++++++++------ nemo/src/io/parser/ast/term.rs | 4 ++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 53a495e0d..bcecf1bee 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -76,8 +76,10 @@ pub(crate) enum TokenKind { // Multi-char tokens: /// Identifier for keywords and names Ident, - /// Variable, + /// Variable like `?var` Variable, + /// Existential Variable like `!var` + Existential, /// Aggregate identifier like `#sum` Aggregate, /// IRI, delimited with `<` and `>` diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index c5345d2e2..e1c48ef4c 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2434,10 +2434,11 @@ mod new { atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, }; use crate::io::lexer::{ - arrow, at, close_brace, close_paren, colon, comma, dot, equal, greater, greater_equal, - hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, - lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_operators, lex_whitespace, - open_brace, open_paren, question_mark, tilde, unequal, Span, Token, TokenKind, + arrow, at, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, greater, + greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, + lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_operators, + lex_whitespace, open_brace, open_paren, question_mark, tilde, unequal, Span, Token, + TokenKind, }; use nom::combinator::{all_consuming, opt, recognize}; use nom::sequence::{delimited, pair}; @@ -3020,6 +3021,7 @@ mod new { parse_function_term, parse_primitive_term, parse_variable, + parse_existential, parse_unary_term, // parse_binary_term, parse_aggregation_term, @@ -3089,9 +3091,9 @@ mod new { } fn parse_variable<'a>(input: Span<'a>) -> IResult> { - recognize(pair(question_mark, lex_ident))(input).map(|(rest, var)| { + recognize(pair(question_mark, lex_ident))(input).map(|(rest_input, var)| { ( - rest, + rest_input, Term::Variable(Token { kind: TokenKind::Variable, span: var, @@ -3100,6 +3102,18 @@ mod new { }) } + fn parse_existential<'a>(input: Span<'a>) -> IResult> { + recognize(pair(exclamation_mark, lex_ident))(input).map(|(rest_input, existential)| { + ( + rest_input, + Term::Existential(Token { + kind: TokenKind::Existential, + span: existential, + }), + ) + }) + } + // Order of functions is important, because of ordered choice and no backtracking fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { alt((less_equal, greater_equal, equal, unequal, less, greater))(input) diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 964400d88..44ccc1ee3 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -8,6 +8,7 @@ use ascii_tree::write_tree; pub(crate) enum Term<'a> { Primitive(Token<'a>), Variable(Token<'a>), + Existential(Token<'a>), // TODO: Is whitespace needed? Figure out how unary terms look Unary { span: Span<'a>, @@ -39,6 +40,7 @@ impl AstNode for Term<'_> { match self { Term::Primitive(token) => Some(vec![token]), Term::Variable(token) => Some(vec![token]), + Term::Existential(token) => Some(vec![token]), Term::Unary { operation, term, .. } => Some(vec![operation, &**term]), @@ -95,6 +97,7 @@ impl AstNode for Term<'_> { match self { Term::Primitive(t) => t.span(), Term::Variable(t) => t.span(), + Term::Existential(t) => t.span(), Term::Unary { span, .. } => *span, Term::Binary { span, .. } => *span, Term::Aggregation { span, .. } => *span, @@ -120,6 +123,7 @@ impl AstNode for Term<'_> { match self { Term::Primitive(_) => "Primitive".into(), Term::Variable(_) => "Variable".into(), + Term::Existential(_) => "Existential Variable".into(), Term::Unary { .. } => "Unary Term".into(), Term::Binary { .. } => "Binary Term".into(), Term::Aggregation { .. } => "Aggregation".into(), From 32dc6596e062c0c0e348f57bc11c21b46167c939 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Sat, 20 Apr 2024 12:49:06 +0200 Subject: [PATCH 033/214] Add parsing of binary infix arithmetic terms --- nemo/src/io/lexer.rs | 10 +- nemo/src/io/parser.rs | 532 ++++++++++++++++++++++++++-- nemo/src/io/parser/ast.rs | 2 +- nemo/src/io/parser/ast/atom.rs | 19 +- nemo/src/io/parser/ast/directive.rs | 55 ++- nemo/src/io/parser/ast/map.rs | 7 +- nemo/src/io/parser/ast/program.rs | 7 +- nemo/src/io/parser/ast/statement.rs | 21 +- nemo/src/io/parser/ast/term.rs | 45 ++- nemo/src/io/parser/ast/tuple.rs | 7 +- 10 files changed, 640 insertions(+), 65 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index bcecf1bee..9082f98bf 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -145,6 +145,7 @@ impl std::fmt::Display for TokenKind { TokenKind::Slash => write!(f, "Slash"), TokenKind::Ident => write!(f, "Ident"), TokenKind::Variable => write!(f, "Variable"), + TokenKind::Existential => write!(f, "Existential"), TokenKind::Aggregate => write!(f, "Aggregate"), TokenKind::Iri => write!(f, "Iri"), TokenKind::Number => write!(f, "Number"), @@ -179,17 +180,20 @@ impl std::fmt::Display for Token<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let offset = self.span.location_offset(); let line = self.span.location_line(); + let column = self.span.get_utf8_column(); let fragment = self.span.fragment(); if self.span.extra == () { write!( f, - "T!{{{0}, S!({offset}, {line}, {fragment:?})}}", + // "T!{{{0}, S!({offset}, {line}, {fragment:?})}}", + "\x1b[93mTOKEN {0} \x1b[34m@{line}:{column} ({offset}) \x1b[93m{fragment:?}\x1b[0m", self.kind ) } else { write!( f, - "T!{{{0}, S!({offset}, {line}, {fragment:?}, {1:?})}}", + // "T!{{{0}, S!({offset}, {line}, {fragment:?}, {1:?})}}", + "\x1b[93mTOKEN {0} \x1b[34m@{line}:{column} ({offset}) \x1b[93m{fragment:?}\x1b[0m, {1:?}\x1b[0m", self.kind, self.span.extra ) } @@ -295,7 +299,7 @@ pub(crate) fn lex_operators(input: Span) -> IResult { ))(input) } -pub(crate) fn lex_unary_operators(input: Span) -> IResult { +pub(crate) fn lex_unary_prefix_operators(input: Span) -> IResult { alt((plus, minus))(input) } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index e1c48ef4c..235218c9b 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2436,10 +2436,11 @@ mod new { use crate::io::lexer::{ arrow, at, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, - lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_operators, - lex_whitespace, open_brace, open_paren, question_mark, tilde, unequal, Span, Token, - TokenKind, + lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, + lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, + question_mark, slash, star, tilde, unequal, Span, Token, TokenKind, }; + use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, opt, recognize}; use nom::sequence::{delimited, pair}; use nom::Parser; @@ -2478,8 +2479,9 @@ mod new { ) } + /// Parse a full program consisting of directives, facts, rules and comments. fn parse_program<'a>(input: Span<'a>) -> Program<'a> { - let span = input.clone(); + // let span = input.clone(); let (_, (tl_doc_comment, statements)) = all_consuming(pair( opt(lex_toplevel_doc_comment), many1(alt(( @@ -2492,20 +2494,23 @@ mod new { ))(input) .expect("Expect EOF"); Program { - span, + span: input, tl_doc_comment, statements, } } + /// Parse whitespace that is between directives, facts, rules and comments. fn parse_whitespace<'a>(input: Span<'a>) -> IResult> { lex_whitespace(input).map(|(rest, ws)| (rest, Statement::Whitespace(ws))) } + /// Parse normal comments that start with a `%` and ends at the line ending. fn parse_comment<'a>(input: Span<'a>) -> IResult> { lex_comment(input).map(|(rest, comment)| (rest, Statement::Comment(comment))) } + /// Parse a fact of the form `predicateName(term1, term2, …).` fn parse_fact<'a>(input: Span<'a>) -> IResult> { // let input_span = input; tuple(( @@ -2528,8 +2533,9 @@ mod new { }) } + /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` fn parse_rule<'a>(input: Span<'a>) -> IResult> { - let input_span = input; + // let input_span = input; tuple(( opt(lex_doc_comment), parse_head, @@ -2545,7 +2551,7 @@ mod new { ( rest_input, Statement::Rule { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, head, ws1, @@ -2560,14 +2566,17 @@ mod new { ) } + /// Parse the head atoms of a rule. fn parse_head<'a>(input: Span<'a>) -> IResult>> { parse_atom_list(input, parse_head_atoms) } + /// Parse the body atoms of a rule. fn parse_body<'a>(input: Span<'a>) -> IResult>> { parse_atom_list(input, parse_body_atoms) } + /// Parse the directives (@base, @prefix, @import, @export, @output). fn parse_directive<'a>(input: Span<'a>) -> IResult> { alt(( parse_base_directive, @@ -2579,6 +2588,7 @@ mod new { .map(|(rest, directive)| (rest, Statement::Directive(directive))) } + /// Parse the base directive. fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2611,6 +2621,7 @@ mod new { }) } + /// Parse the prefix directive. fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2652,6 +2663,7 @@ mod new { ) } + /// Parse the import directive. fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2694,6 +2706,7 @@ mod new { ) } + /// Parse the export directive. fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2736,6 +2749,7 @@ mod new { ) } + /// Parse the output directive. fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2770,6 +2784,7 @@ mod new { ) } + /// Parse a list of `ident1, ident2, …` fn parse_identifier_list<'a>(input: Span<'a>) -> IResult>> { let input_span = input.clone(); pair( @@ -2793,6 +2808,7 @@ mod new { }) } + /// Parse a list of atoms, like `atom1(…), atom2(…), infix = atom, …` fn parse_atom_list<'a>( input: Span<'a>, parse_atom: fn(Span<'a>) -> IResult>, @@ -2819,10 +2835,12 @@ mod new { }) } + /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. fn parse_head_atoms<'a>(input: Span<'a>) -> IResult> { alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) } + /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. fn parse_body_atoms<'a>(input: Span<'a>) -> IResult> { alt(( parse_normal_atom, @@ -2832,14 +2850,16 @@ mod new { ))(input) } + /// Parse an atom of the form `predicateName(term1, term2, …)`. fn parse_normal_atom<'a>(input: Span<'a>) -> IResult> { - parse_tuple(input) + parse_named_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } + /// Parse an atom of the form `~predicateName(term1, term2, …)`. fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); - pair(tilde, parse_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { + pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { ( rest_input, Atom::Negative { @@ -2851,6 +2871,8 @@ mod new { }) } + /// Parse an "infix atom" of the form `term1 term2`. + /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2875,6 +2897,8 @@ mod new { }) } + /// Parse a tuple with an optional name, like `ident(term1, term2)` + /// or just `(int, int, skip)`. fn parse_tuple<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2905,6 +2929,40 @@ mod new { ) } + /// Parse a named tuple. This function is like `parse_tuple` with the difference, + /// that is enforces the existence of an identifier for the tuple. + fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + lex_ident, + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_term_list), + opt(lex_whitespace), + close_paren, + ))(input) + .map( + |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { + ( + rest_input, + Tuple { + span: outer_span(input_span, rest_input), + identifier: Some(identifier), + ws1, + open_paren, + ws2, + terms, + ws3, + close_paren, + }, + ) + }, + ) + } + + /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. + /// Inside the curly braces ist a list of pairs. fn parse_map<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2935,10 +2993,12 @@ mod new { ) } + /// Parse a map in an atom position. fn parse_map_atom<'a>(input: Span<'a>) -> IResult> { parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) } + /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. fn parse_pair_list<'a>( input: Span<'a>, ) -> IResult, Term<'a>>>>> { @@ -2968,6 +3028,7 @@ mod new { }) } + /// Parse a pair of the form `key = value`. fn parse_pair<'a>(input: Span<'a>) -> IResult, Term<'a>>> { let input_span = input.clone(); tuple(( @@ -2992,6 +3053,7 @@ mod new { }) } + /// Parse a list of terms of the form `term1, term2, …`. fn parse_term_list<'a>(input: Span<'a>) -> IResult>> { let input_span = input.clone(); pair( @@ -3015,42 +3077,138 @@ mod new { }) } + /// Parse a term. A term can be a primitive value (constant, number, string, …), + /// a variable (universal or existential), a map, a function (-symbol), an arithmetic + /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. fn parse_term<'a>(input: Span<'a>) -> IResult> { alt(( + parse_binary_term, + parse_tuple_term, + parse_unary_prefix_term, parse_map_term, - parse_function_term, parse_primitive_term, parse_variable, parse_existential, - parse_unary_term, - // parse_binary_term, parse_aggregation_term, ))(input) } + /// Parse a primitive term (simple constant, iri constant, number, string). fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { alt((lex_ident, lex_iri, lex_number, lex_string))(input) .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } - fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { + /// Parse an unary term. + fn parse_unary_prefix_term<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); - pair(lex_unary_operators, parse_term)(input).map(|(rest_input, (operation, term))| { + pair(lex_unary_prefix_operators, parse_term)(input).map( + |(rest_input, (operation, term))| { + ( + rest_input, + Term::UnaryPrefix { + span: outer_span(input_span, rest_input), + operation, + term: Box::new(term), + }, + ) + }, + ) + } + + /// Parse a binary infix operation of the form `term1 term2`. + fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { + pair( + parse_arithmetic_product, + opt(tuple(( + opt(lex_whitespace), + alt((plus, minus)), + opt(lex_whitespace), + parse_binary_term, + ))), + )(input) + .map(|(rest_input, (lhs, opt))| { ( rest_input, - Term::Unary { - span: outer_span(input_span, rest_input), - operation, - term: Box::new(term), + if let Some((ws1, operation, ws2, rhs)) = opt { + Term::Binary { + span: outer_span(input, rest_input), + lhs: Box::new(lhs), + ws1, + operation, + ws2, + rhs: Box::new(rhs), + } + } else { + lhs }, ) }) } - fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_binary_term`!") + /// Parse an arithmetic product, i.e. an expression involving + /// only `*` and `/` over subexpressions. + fn parse_arithmetic_product<'a>(input: Span<'a>) -> IResult> { + pair( + parse_arithmetic_factor, + opt(tuple(( + opt(lex_whitespace), + alt((star, slash)), + opt(lex_whitespace), + parse_arithmetic_product, + ))), + )(input) + .map(|(rest_input, (lhs, opt))| { + ( + rest_input, + if let Some((ws1, operation, ws2, rhs)) = opt { + Term::Binary { + span: outer_span(input, rest_input), + lhs: Box::new(lhs), + ws1, + operation, + ws2, + rhs: Box::new(rhs), + } + } else { + lhs + }, + ) + }) + } + + fn parse_arithmetic_factor<'a>(input: Span<'a>) -> IResult> { + alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + ))(input) + } + + fn fold_arithmetic_expression<'a>( + initial: Term<'a>, + sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, + span_vec: Vec>, + ) -> Term<'a> { + sequence + .into_iter() + .enumerate() + .fold(initial, |acc, (i, pair)| { + let (ws1, operation, ws2, expression) = pair; + Term::Binary { + span: span_vec[i], + lhs: Box::new(acc), + ws1, + operation, + ws2, + rhs: Box::new(expression), + } + }) } + /// Parse an aggregation term of the form `#sum(…)`. fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { tuple(( recognize(pair(hash, lex_ident)), @@ -3081,15 +3239,19 @@ mod new { ) } - fn parse_function_term<'a>(input: Span<'a>) -> IResult> { + /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with + /// parenthesis. + fn parse_tuple_term<'a>(input: Span<'a>) -> IResult> { parse_tuple(input) - .map(|(rest_input, named_tuple)| (rest_input, Term::Function(Box::new(named_tuple)))) + .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } + /// Parse a map as a term. fn parse_map_term<'a>(input: Span<'a>) -> IResult> { parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } + /// Parse a variable. fn parse_variable<'a>(input: Span<'a>) -> IResult> { recognize(pair(question_mark, lex_ident))(input).map(|(rest_input, var)| { ( @@ -3102,6 +3264,7 @@ mod new { }) } + /// Parse an existential variable. fn parse_existential<'a>(input: Span<'a>) -> IResult> { recognize(pair(exclamation_mark, lex_ident))(input).map(|(rest_input, existential)| { ( @@ -3115,6 +3278,7 @@ mod new { } // Order of functions is important, because of ordered choice and no backtracking + /// Parse the operator for an infix atom. fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { alt((less_equal, greater_equal, equal, unequal, less, greater))(input) } @@ -3561,16 +3725,334 @@ limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, ); - println!("{}", parse_program(input)); - // assert!(false); + let ast = parse_program(input); + println!("{}", ast); + assert_eq!( + { + let mut result = String::new(); + for token in get_all_tokens(&ast) { + result.push_str(token.span().fragment()); + } + println!("{}", result); + result + }, + *input.fragment(), + ); } #[test] fn parser_test() { let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); let input = Span::new(str.as_str()); - dbg!(parse_program(input)); + println!("{}", parse_program(input)); // assert!(false); } + + #[test] + fn arithmetic_expressions() { + use TokenKind::*; + macro_rules! T { + ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { + Token::new($tok_kind, unsafe { + Span::new_from_raw_offset($offset, $line, $str, ()) + }) + }; + } + macro_rules! s { + ($offset:literal,$line:literal,$str:literal) => { + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } + }; + } + + assert_eq!( + { + let result = parse_term(Span::new("42")); + result.unwrap().1 + }, + Term::Primitive(T! {Number, 0, 1, "42"}), + ); + + assert_eq!( + { + let result = parse_term(Span::new("35+7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "35+7"), + lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "35"})), + ws1: None, + operation: T! {Plus, 2, 1, "+"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "7"})) + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("6*7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "6*7"), + lhs: Box::new(Term::Primitive(T! {Number, 0,1,"6"})), + ws1: None, + operation: T! {Star, 1,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 2,1,"7"})), + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("49-7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "49-7"), + lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "49"})), + ws1: None, + operation: T! {Minus, 2, 1, "-"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "7"})) + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("84/2")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "84/2"), + lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "84"})), + ws1: None, + operation: T! {Slash, 2, 1, "/"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "2"})) + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("5*7+7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "5*7+7"), + lhs: Box::new(Term::Binary { + span: s!(0, 1, "5*7"), + lhs: Box::new(Term::Primitive(T! {Number, 0,1,"5"})), + ws1: None, + operation: T! {Star, 1,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 2,1,"7"})) + }), + ws1: None, + operation: T! {Plus, 3,1,"+"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 4,1,"7"})), + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("7+5*7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "7+5*7"), + lhs: Box::new(Term::Primitive(T! {Number, 0,1,"7"})), + ws1: None, + operation: T! {Plus, 1,1,"+"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(2, 1, "5*7"), + lhs: Box::new(Term::Primitive(T! {Number, 2,1,"5"})), + ws1: None, + operation: T! {Star, 3,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 4,1,"7"})) + }), + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("(15+3*2-(7+35)*8)/3")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(0, 1, "(15+3*2-(7+35)*8)"), + identifier: None, + ws1: None, + open_paren: T!(OpenParen, 0, 1, "("), + ws2: None, + terms: Some(List { + span: s!(1, 1, "15+3*2-(7+35)*8"), + first: Term::Binary { + span: s!(1, 1, "15+3*2-(7+35)*8"), + lhs: Box::new(Term::Primitive(T! {Number, 1,1,"15"})), + ws1: None, + operation: T! {Plus, 3,1,"+"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(4, 1, "3*2-(7+35)*8"), + lhs: Box::new(Term::Binary { + span: s!(4, 1, "3*2"), + lhs: Box::new(Term::Primitive(T! {Number, 4,1,"3"})), + ws1: None, + operation: T! {Star, 5,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 6,1,"2"})), + }), + ws1: None, + operation: T! {Minus, 7,1,"-"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(8, 1, "(7+35)*8"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(8, 1, "(7+35)"), + identifier: None, + ws1: None, + open_paren: T! {OpenParen, 8, 1, "("}, + ws2: None, + terms: Some(List { + span: s!(9, 1, "7+35"), + first: Term::Binary { + span: s!(9, 1, "7+35"), + lhs: Box::new(Term::Primitive( + T! {Number, 9,1,"7"} + )), + ws1: None, + operation: T! {Plus, 10,1,"+"}, + ws2: None, + rhs: Box::new(Term::Primitive( + T! {Number, 11,1,"35"} + )), + }, + rest: None + }), + ws3: None, + close_paren: T! {CloseParen, 13,1,")"}, + }))), + ws1: None, + operation: T! {Star, 14,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 15,1,"8"})), + }), + }), + }, + rest: None + }), + ws3: None, + close_paren: T!(CloseParen, 16, 1, ")") + }))), + ws1: None, + operation: T! {Slash, 17,1,"/"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 18,1,"3"})), + } + ); + // Term::Binary { + // span: s!(), + // lhs: Box::new(), + // ws1: None, + // operation: , + // ws2: None, + // rhs: Box::new(), + // } + + assert_eq!( + { + let result = parse_term(Span::new("15+3*2-(7+35)*8/3")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "15+3*2-(7+35)*8/3"), + lhs: Box::new(Term::Primitive(T! {Number, 0,1,"15"})), + ws1: None, + operation: T! {Plus, 2,1,"+"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(3, 1, "3*2-(7+35)*8/3"), + lhs: Box::new(Term::Binary { + span: s!(3, 1, "3*2"), + lhs: Box::new(Term::Primitive(T! {Number, 3,1,"3"})), + ws1: None, + operation: T! {Star, 4,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 5,1,"2"})), + }), + ws1: None, + operation: T! {Minus, 6,1,"-"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(7, 1, "(7+35)*8/3"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(7, 1, "(7+35)"), + identifier: None, + ws1: None, + open_paren: T! {OpenParen, 7,1,"("}, + ws2: None, + terms: Some(List { + span: s!(8, 1, "7+35"), + first: Term::Binary { + span: s!(8, 1, "7+35"), + lhs: Box::new(Term::Primitive(T! {Number, 8,1,"7"})), + ws1: None, + operation: T! {Plus, 9,1,"+"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 10,1,"35"})), + }, + rest: None, + }), + ws3: None, + close_paren: T! {CloseParen, 12,1,")"}, + }))), + ws1: None, + operation: T! {Star, 13,1,"*"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(14, 1, "8/3"), + lhs: Box::new(Term::Primitive(T! {Number, 14,1,"8"})), + ws1: None, + operation: T! {Slash, 15, 1, "/"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 16,1,"3"})), + }), + }), + }), + } + ); + + // assert_eq!({ + // let result = parse_term(Span::new("1*2*3*4*5")); + // result.unwrap().1 + // },); + + // assert_eq!({ + // let result = parse_term(Span::new("(5+3)")); + // result.unwrap().1 + // },); + + // assert_eq!({ + // let result = parse_term(Span::new("( int , int , string , skip )")); + // result.unwrap().1 + // },); + + // assert_eq!({ + // let result = parse_term(Span::new("(14+4)+3")); + // result.unwrap().1 + // },); + + // assert_eq!({ + // let result = parse_term(Span::new( + // "(3 + #sum(?X, ?Y)) * (LENGTH(\"Hello, World!\") + 3)", + // )); + // result.unwrap().1 + // },); + } } } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 8be761f36..1c838a026 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -70,7 +70,7 @@ impl AstNode for List<'_, T> { } fn name(&self) -> String { - String::from("List") + format!("List \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", self.span.location_line(), self.span.get_utf8_column(), self.span.fragment()) } } impl Display for List<'_, T> { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index e75c9fb4f..52e8d5383 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -76,11 +76,22 @@ impl AstNode for Atom<'_> { } fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } match self { - Atom::Positive(_) => "Positive Atom".into(), - Atom::Negative { .. } => "Negative Atom".into(), - Atom::InfixAtom { .. } => "Infix Atom".into(), - Atom::Map(_) => "Map Atom".into(), + Atom::Positive(_) => name!("Positive Atom"), + Atom::Negative { .. } => name!("Negative Atom"), + Atom::InfixAtom { .. } => name!("Infix Atom"), + Atom::Map(_) => name!("Map Atom"), } } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index cf024ed8c..c75351256 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -70,13 +70,13 @@ impl AstNode for Directive<'_> { fn children(&self) -> Option> { match self { Directive::Base { - span, doc_comment, kw, ws1, base_iri, ws2, dot, + .. } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { @@ -95,7 +95,6 @@ impl AstNode for Directive<'_> { Some(vec) } Directive::Prefix { - span, doc_comment, kw, ws1, @@ -104,6 +103,7 @@ impl AstNode for Directive<'_> { prefix_iri, ws3, dot, + .. } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { @@ -126,7 +126,6 @@ impl AstNode for Directive<'_> { Some(vec) } Directive::Import { - span, doc_comment, kw, ws1, @@ -137,6 +136,7 @@ impl AstNode for Directive<'_> { map, ws4, dot, + .. } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { @@ -161,7 +161,6 @@ impl AstNode for Directive<'_> { Some(vec) } Directive::Export { - span, doc_comment, kw, ws1, @@ -172,6 +171,7 @@ impl AstNode for Directive<'_> { map, ws4, dot, + .. } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { @@ -195,7 +195,31 @@ impl AstNode for Directive<'_> { vec.push(dot); Some(vec) } - Directive::Output { .. } => todo!(), + Directive::Output { + span, + doc_comment, + kw, + ws1, + predicates, + ws2, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + #[allow(trivial_casts)] + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + vec.push(ws1); + if let Some(p) = predicates { + vec.push(p); + }; + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } } } @@ -223,12 +247,23 @@ impl AstNode for Directive<'_> { } fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } match self { - Directive::Base { .. } => "Base Directive".into(), - Directive::Prefix { .. } => "Prefix Directive".into(), - Directive::Import { .. } => "Import Directive".into(), - Directive::Export { .. } => "Export Directive".into(), - Directive::Output { .. } => "Output Directive".into(), + Directive::Base { .. } => name!("Base Directive"), + Directive::Prefix { .. } => name!("Prefix Directive"), + Directive::Import { .. } => name!("Import Directive"), + Directive::Export { .. } => name!("Export Directive"), + Directive::Output { .. } => name!("Output Directive"), } } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 0e043471d..669963ae8 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -109,7 +109,12 @@ impl AstNode for Pair<'_, K, V> { } fn name(&self) -> String { - String::from("Pair") + format!( + "Pair \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) } } impl std::fmt::Display for Pair<'_, K, V> { diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 443697dd9..a7397f7ec 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -43,7 +43,12 @@ impl AstNode for Program<'_> { } fn name(&self) -> String { - String::from("Program") + format!( + "Program \x1b[34m@{}:{} \x1b[92m\"{}…\"\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + &self.span.fragment()[..60], + ) } } impl std::fmt::Display for Program<'_> { diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 84cfa3e61..e26ec39dd 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -111,12 +111,23 @@ impl AstNode for Statement<'_> { } fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } match self { - Statement::Directive(_) => "Directive".into(), - Statement::Fact { .. } => "Fact".into(), - Statement::Rule { .. } => "Rule".into(), - Statement::Whitespace(_) => "Whitespace".into(), - Statement::Comment(_) => "Comment".into(), + Statement::Directive(_) => name!("Directive"), + Statement::Fact { .. } => name!("Fact"), + Statement::Rule { .. } => name!("Rule"), + Statement::Whitespace(_) => name!("Whitespace"), + Statement::Comment(_) => name!("Comment"), } } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 44ccc1ee3..a058e401b 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -10,7 +10,7 @@ pub(crate) enum Term<'a> { Variable(Token<'a>), Existential(Token<'a>), // TODO: Is whitespace needed? Figure out how unary terms look - Unary { + UnaryPrefix { span: Span<'a>, operation: Token<'a>, term: Box>, @@ -32,7 +32,7 @@ pub(crate) enum Term<'a> { ws2: Option>, close_paren: Token<'a>, }, - Function(Box>), + Tuple(Box>), Map(Box>), } impl AstNode for Term<'_> { @@ -41,7 +41,7 @@ impl AstNode for Term<'_> { Term::Primitive(token) => Some(vec![token]), Term::Variable(token) => Some(vec![token]), Term::Existential(token) => Some(vec![token]), - Term::Unary { + Term::UnaryPrefix { operation, term, .. } => Some(vec![operation, &**term]), Term::Binary { @@ -88,7 +88,7 @@ impl AstNode for Term<'_> { vec.push(close_paren); Some(vec) } - Term::Function(named_tuple) => named_tuple.children(), + Term::Tuple(named_tuple) => named_tuple.children(), Term::Map(map) => map.children(), } } @@ -98,10 +98,10 @@ impl AstNode for Term<'_> { Term::Primitive(t) => t.span(), Term::Variable(t) => t.span(), Term::Existential(t) => t.span(), - Term::Unary { span, .. } => *span, + Term::UnaryPrefix { span, .. } => *span, Term::Binary { span, .. } => *span, Term::Aggregation { span, .. } => *span, - Term::Function(named_tuple) => named_tuple.span(), + Term::Tuple(named_tuple) => named_tuple.span(), Term::Map(map) => map.span(), } } @@ -120,15 +120,32 @@ impl AstNode for Term<'_> { } fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } match self { - Term::Primitive(_) => "Primitive".into(), - Term::Variable(_) => "Variable".into(), - Term::Existential(_) => "Existential Variable".into(), - Term::Unary { .. } => "Unary Term".into(), - Term::Binary { .. } => "Binary Term".into(), - Term::Aggregation { .. } => "Aggregation".into(), - Term::Function(_) => "Function Symbol".into(), - Term::Map(_) => "Map".into(), + Term::Primitive(_) => name!("Primitive"), + Term::Variable(_) => name!("Variable"), + Term::Existential(_) => name!("Existential Variable"), + Term::UnaryPrefix { .. } => name!("Unary Term"), + Term::Binary { .. } => name!("Binary Term"), + Term::Aggregation { .. } => name!("Aggregation"), + Term::Tuple(f) => { + if let Some(_) = f.identifier { + name!("Function Symbol") + } else { + name!("Tuple") + } + } + Term::Map(_) => name!("Map"), } } } diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 9d771d289..a49f67012 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -55,7 +55,12 @@ impl AstNode for Tuple<'_> { } fn name(&self) -> String { - String::from("Tuple") + format!( + "Tuple \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) } } impl std::fmt::Display for Tuple<'_> { From cfe3555a8f6c8b636215cedac015d4118350055a Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 24 Apr 2024 17:37:53 +0200 Subject: [PATCH 034/214] Add error reporting with VerboseError and ContextError --- nemo/src/io/lexer.rs | 102 ++-- nemo/src/io/parser.rs | 1196 +++++++++++++++++++++++++---------------- 2 files changed, 808 insertions(+), 490 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 9082f98bf..4dd5f0ded 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -4,7 +4,8 @@ use nom::{ branch::alt, bytes::complete::{is_not, tag, take}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, - combinator::{all_consuming, map, recognize}, + combinator::{all_consuming, cut, map, recognize}, + error::{ContextError, ParseError}, multi::{many0, many1}, sequence::{delimited, pair, tuple}, IResult, @@ -227,7 +228,9 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { macro_rules! syntax { ($func_name: ident, $tag_string: literal, $token: expr) => { - pub(crate) fn $func_name<'a>(input: Span) -> IResult { + pub(crate) fn $func_name<'a, E: ParseError>>( + input: Span<'a>, + ) -> IResult, Token, E> { map(tag($tag_string), |span| Token::new($token, span))(input) } }; @@ -251,7 +254,9 @@ syntax!(hash, "#", TokenKind::Hash); syntax!(underscore, "_", TokenKind::Underscore); syntax!(at, "@", TokenKind::At); -pub(crate) fn lex_punctuations(input: Span) -> IResult { +pub(crate) fn lex_punctuations<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { alt(( arrow, open_paren, @@ -284,7 +289,9 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators(input: Span) -> IResult { +pub(crate) fn lex_operators<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { alt(( less_equal, greater_equal, @@ -299,11 +306,15 @@ pub(crate) fn lex_operators(input: Span) -> IResult { ))(input) } -pub(crate) fn lex_unary_prefix_operators(input: Span) -> IResult { +pub(crate) fn lex_unary_prefix_operators<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token<'a>, E> { alt((plus, minus))(input) } -pub(crate) fn lex_ident(input: Span) -> IResult { +pub(crate) fn lex_ident<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { let (rest, result) = recognize(pair( alpha1, many0(alt((alphanumeric1, tag("_"), tag("-")))), @@ -319,48 +330,66 @@ pub(crate) fn lex_ident(input: Span) -> IResult { Ok((rest, token)) } -pub(crate) fn lex_iri(input: Span) -> IResult { - recognize(delimited(tag("<"), is_not("> \n"), tag(">")))(input) +pub(crate) fn lex_iri<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { + recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">"))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result))) } -pub(crate) fn lex_number(input: Span) -> IResult { +pub(crate) fn lex_number<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { digit1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Number, result))) } -pub(crate) fn lex_string(input: Span) -> IResult { - recognize(delimited(tag("\""), is_not("\""), tag("\"")))(input) +pub(crate) fn lex_string<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { + recognize(delimited(tag("\""), is_not("\""), cut(tag("\""))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::String, result))) } -pub(crate) fn lex_comment(input: Span) -> IResult { +pub(crate) fn lex_comment<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Token, E> { recognize(tuple((tag("%"), many0(is_not("\n")), line_ending)))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result))) } -pub(crate) fn lex_doc_comment(input: Span) -> IResult { +pub(crate) fn lex_doc_comment<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Token, E> { recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result))) } -pub(crate) fn lex_toplevel_doc_comment(input: Span) -> IResult { +pub(crate) fn lex_toplevel_doc_comment<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Token, E> { recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result))) } -pub(crate) fn lex_comments(input: Span) -> IResult { +pub(crate) fn lex_comments<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Token, E> { alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment))(input) } -pub(crate) fn lex_whitespace(input: Span) -> IResult { +pub(crate) fn lex_whitespace<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result))) } -pub(crate) fn lex_illegal(input: Span) -> IResult { +pub(crate) fn lex_illegal<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result))) } -pub(crate) fn lex_tokens(input: Span) -> IResult> { +pub(crate) fn lex_tokens<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Vec, E> { all_consuming(many0(alt(( lex_iri, lex_operators, @@ -394,14 +423,17 @@ mod test { #[test] fn empty_input() { let input = Span::new(""); - assert_eq!(lex_tokens(input).unwrap().1, vec![T!(Eof, 0, 1, "")]) + assert_eq!( + lex_tokens::>(input).unwrap().1, + vec![T!(Eof, 0, 1, "")] + ) } #[test] fn base() { let input = Span::new("@base"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } @@ -410,7 +442,7 @@ mod test { fn prefix() { let input = Span::new("@prefix"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Prefix, 1, 1, "prefix"), @@ -423,7 +455,7 @@ mod test { fn output() { let input = Span::new("@output"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Output, 1, 1, "output"), @@ -436,7 +468,7 @@ mod test { fn import() { let input = Span::new("@import"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Import, 1, 1, "import"), @@ -449,7 +481,7 @@ mod test { fn export() { let input = Span::new("@export"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Export, 1, 1, "export"), @@ -462,7 +494,7 @@ mod test { fn idents_with_keyword_prefix() { let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Ident, 1, 1, "baseA"), @@ -492,7 +524,7 @@ mod test { fn tokenize() { let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "P"), T!(OpenParen, 1, 1, "("), @@ -523,7 +555,7 @@ mod test { fn comment() { let input = Span::new(" % Some Comment\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " "), T!(Comment, 4, 1, "% Some Comment\n"), @@ -538,7 +570,7 @@ mod test { fn ident() { let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "some_Ident"), T!(OpenParen, 10, 1, "("), @@ -556,7 +588,7 @@ mod test { fn forbidden_ident() { let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Underscore, 0, 1, "_"), T!(Ident, 1, 1, "someIdent"), @@ -575,7 +607,7 @@ mod test { fn iri() { let input = Span::new(""); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Eof, 31, 1, ""), @@ -587,7 +619,7 @@ mod test { fn iri_pct_enc() { let input = Span::new("\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Whitespace, 37, 1, "\n"), @@ -602,7 +634,7 @@ mod test { fn constraints() { let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "A"), T!(OpenParen, 1, 1, "("), @@ -635,7 +667,7 @@ mod test { fn pct_enc_comment() { let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Comment, 0, 1, "%d4 this should be a comment,\n"), T!( @@ -654,7 +686,7 @@ mod test { fn fact() { let input = Span::new("somePred(term1, term2)."); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "somePred"), T!(OpenParen, 8, 1, "("), @@ -673,7 +705,7 @@ mod test { fn whitespace() { let input = Span::new(" \t \n\n\t \n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " \t \n\n\t \n"), T!(Eof, 12, 4, ""), diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 235218c9b..e027612ad 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2434,14 +2434,15 @@ mod new { atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, }; use crate::io::lexer::{ - arrow, at, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, greater, - greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, - lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, + arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, + greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, + lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, question_mark, slash, star, tilde, unequal, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; - use nom::combinator::{all_consuming, opt, recognize}; + use nom::combinator::{all_consuming, cut, map, opt, recognize}; + use nom::error::{context, ContextError, ParseError}; use nom::sequence::{delimited, pair}; use nom::Parser; use nom::{ @@ -2465,12 +2466,11 @@ mod new { } } - fn ignore_ws_and_comments<'a, F, O>( + fn ignore_ws_and_comments<'a, F, O, E: ParseError> + ContextError>>( inner: F, - ) -> impl FnMut(Span<'a>) -> IResult, O, nom::error::Error>> + ) -> impl FnMut(Span<'a>) -> IResult, O, E> where - F: Parser, O, nom::error::Error>> - + FnMut(Span<'a>) -> IResult, O, nom::error::Error>>, + F: Parser, O, E> + FnMut(Span<'a>) -> IResult, O, E>, { delimited( many0(alt((lex_whitespace, lex_comment))), @@ -2480,45 +2480,63 @@ mod new { } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a>(input: Span<'a>) -> Program<'a> { - // let span = input.clone(); - let (_, (tl_doc_comment, statements)) = all_consuming(pair( - opt(lex_toplevel_doc_comment), - many1(alt(( - parse_fact, - parse_rule, - parse_whitespace, - parse_directive, - parse_comment, - ))), - ))(input) - .expect("Expect EOF"); - Program { - span: input, - tl_doc_comment, - statements, - } + fn parse_program<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Program<'a>, E> { + context( + "parse program", + all_consuming(pair( + opt(lex_toplevel_doc_comment), + many1(alt(( + parse_rule, + parse_fact, + parse_whitespace, + parse_directive, + parse_comment, + ))), + )), + )(input) + .map(|(rest_input, (tl_doc_comment, statements))| { + ( + rest_input, + Program { + span: input, + tl_doc_comment, + statements, + }, + ) + }) } /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace<'a>(input: Span<'a>) -> IResult> { - lex_whitespace(input).map(|(rest, ws)| (rest, Statement::Whitespace(ws))) + fn parse_whitespace<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse whitespace", lex_whitespace)(input) + .map(|(rest, ws)| (rest, Statement::Whitespace(ws))) } /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment<'a>(input: Span<'a>) -> IResult> { - lex_comment(input).map(|(rest, comment)| (rest, Statement::Comment(comment))) + fn parse_comment<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse comment", lex_comment)(input) + .map(|(rest, comment)| (rest, Statement::Comment(comment))) } /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a>(input: Span<'a>) -> IResult> { - // let input_span = input; - tuple(( - opt(lex_doc_comment), - parse_normal_atom, - opt(lex_whitespace), - dot, - ))(input) + fn parse_fact<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse fact", + tuple(( + opt(lex_doc_comment), + parse_normal_atom, + opt(lex_whitespace), + cut(dot), + )), + )(input) .map(|(rest_input, (doc_comment, atom, ws, dot))| { ( rest_input, @@ -2534,18 +2552,22 @@ mod new { } /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a>(input: Span<'a>) -> IResult> { - // let input_span = input; - tuple(( - opt(lex_doc_comment), - parse_head, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_body, - opt(lex_whitespace), - dot, - ))(input) + fn parse_rule<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse rule", + tuple(( + opt(lex_doc_comment), + parse_head, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_body, + opt(lex_whitespace), + cut(dot), + )), + )(input) .map( |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { ( @@ -2567,46 +2589,59 @@ mod new { } /// Parse the head atoms of a rule. - fn parse_head<'a>(input: Span<'a>) -> IResult>> { - parse_atom_list(input, parse_head_atoms) + fn parse_head<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult>, E> { + context("parse head", parse_atom_list(parse_head_atoms))(input) } /// Parse the body atoms of a rule. - fn parse_body<'a>(input: Span<'a>) -> IResult>> { - parse_atom_list(input, parse_body_atoms) + fn parse_body<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult>, E> { + context("parse body", parse_atom_list(parse_body_atoms))(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive<'a>(input: Span<'a>) -> IResult> { - alt(( - parse_base_directive, - parse_prefix_directive, - parse_import_directive, - parse_export_directive, - parse_output_directive, - ))(input) + fn parse_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse directive", + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, + )), + )(input) .map(|(rest, directive)| (rest, Statement::Directive(directive))) } /// Parse the base directive. - fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Base), + fn parse_base_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse base directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Base), + )), + opt(lex_whitespace), + lex_iri, + opt(lex_whitespace), + cut(dot), )), - opt(lex_whitespace), - lex_iri, - opt(lex_whitespace), - dot, - ))(input) + )(input) .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { ( rest_input, Directive::Base { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Base, @@ -2622,27 +2657,31 @@ mod new { } /// Parse the prefix directive. - fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), + fn parse_prefix_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse prefix directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), + )), + opt(lex_whitespace), + recognize(pair(lex_ident, colon)), + opt(lex_whitespace), + lex_iri, + opt(lex_whitespace), + cut(dot), )), - opt(lex_whitespace), - recognize(pair(lex_ident, colon)), - opt(lex_whitespace), - lex_iri, - opt(lex_whitespace), - dot, - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { ( rest_input, Directive::Prefix { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Prefix, @@ -2664,29 +2703,33 @@ mod new { } /// Parse the import directive. - fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Import), + fn parse_import_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse import directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Import), + )), + lex_whitespace, + lex_ident, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + cut(dot), )), - lex_whitespace, - lex_ident, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_map, - opt(lex_whitespace), - dot, - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( rest_input, Directive::Import { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Import, @@ -2707,29 +2750,33 @@ mod new { } /// Parse the export directive. - fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Export), + fn parse_export_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse export directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Export), + )), + lex_whitespace, + lex_ident, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + cut(dot), )), - lex_whitespace, - lex_ident, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_map, - opt(lex_whitespace), - dot, - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( rest_input, Directive::Export { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Export, @@ -2750,25 +2797,29 @@ mod new { } /// Parse the output directive. - fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Output), + fn parse_output_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse output directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + )), + lex_whitespace, + opt(parse_identifier_list), + opt(lex_whitespace), + cut(dot), )), - lex_whitespace, - opt(parse_identifier_list), - opt(lex_whitespace), - dot, - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { ( rest_input, Directive::Output { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Output, @@ -2785,22 +2836,26 @@ mod new { } /// Parse a list of `ident1, ident2, …` - fn parse_identifier_list<'a>(input: Span<'a>) -> IResult>> { - let input_span = input.clone(); - pair( - lex_ident, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), + fn parse_identifier_list<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult>, E> { + context( + "parse identifier list", + pair( lex_ident, - ))), + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + lex_ident, + ))), + ), )(input) .map(|(rest_input, (first, rest))| { ( rest_input, List { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), first, rest: if rest.is_empty() { None } else { Some(rest) }, }, @@ -2808,85 +2863,106 @@ mod new { }) } - /// Parse a list of atoms, like `atom1(…), atom2(…), infix = atom, …` - fn parse_atom_list<'a>( - input: Span<'a>, - parse_atom: fn(Span<'a>) -> IResult>, - ) -> IResult>> { - let input_span = input.clone(); - pair( - parse_atom, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_atom, - ))), - )(input) - .map(|(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input_span, rest_input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }) + fn parse_atom_list<'a, E: ParseError> + ContextError>>( + parse_atom: fn(Span<'a>) -> IResult, E>, + ) -> impl Fn(Span<'a>) -> IResult, List<'a, Atom<'a>>, E> { + move |input| { + context( + "parse atom list", + pair( + parse_atom, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_atom, + ))), + ), + )(input) + .map(|(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input, rest_input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) + }) + } } /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms<'a>(input: Span<'a>) -> IResult> { - alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) + fn parse_head_atoms<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "harse head atoms", + alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), + )(input) } /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms<'a>(input: Span<'a>) -> IResult> { - alt(( - parse_normal_atom, - parse_negative_atom, - parse_infix_atom, - parse_map_atom, - ))(input) + fn parse_body_atoms<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse body atoms", + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, + )), + )(input) } /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom<'a>(input: Span<'a>) -> IResult> { - parse_named_tuple(input) + fn parse_normal_atom<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse normal atom", parse_named_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { - ( - rest_input, - Atom::Negative { - span: outer_span(input_span, rest_input), - neg: tilde, - atom: named_tuple, - }, - ) - }) + fn parse_negative_atom<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse negative atom", pair(tilde, parse_named_tuple))(input).map( + |(rest_input, (tilde, named_tuple))| { + ( + rest_input, + Atom::Negative { + span: outer_span(input, rest_input), + neg: tilde, + atom: named_tuple, + }, + ) + }, + ) } /// Parse an "infix atom" of the form `term1 term2`. /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - parse_term, - opt(lex_whitespace), - parse_operation_token, - opt(lex_whitespace), - parse_term, - ))(input) + fn parse_infix_atom<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse infix atom", + tuple(( + parse_term, + opt(lex_whitespace), + parse_operation_token, + opt(lex_whitespace), + parse_term, + )), + )(input) .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { ( rest_input, Atom::InfixAtom { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), lhs, ws1, operation, @@ -2899,23 +2975,27 @@ mod new { /// Parse a tuple with an optional name, like `ident(term1, term2)` /// or just `(int, int, skip)`. - fn parse_tuple<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_ident), - opt(lex_whitespace), - open_paren, - opt(lex_whitespace), - opt(parse_term_list), - opt(lex_whitespace), - close_paren, - ))(input) + fn parse_tuple<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse tuple", + tuple(( + opt(lex_ident), + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_term_list), + opt(lex_whitespace), + cut(close_paren), + )), + )(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, Tuple { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), identifier, ws1, open_paren, @@ -2931,23 +3011,27 @@ mod new { /// Parse a named tuple. This function is like `parse_tuple` with the difference, /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - lex_ident, - opt(lex_whitespace), - open_paren, - opt(lex_whitespace), - opt(parse_term_list), - opt(lex_whitespace), - close_paren, - ))(input) + fn parse_named_tuple<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse named tuple", + tuple(( + lex_ident, + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_term_list), + opt(lex_whitespace), + cut(close_paren), + )), + )(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, Tuple { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), identifier: Some(identifier), ws1, open_paren, @@ -2963,23 +3047,27 @@ mod new { /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. /// Inside the curly braces ist a list of pairs. - fn parse_map<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_ident), - opt(lex_whitespace), - open_brace, - opt(lex_whitespace), - parse_pair_list, - opt(lex_whitespace), - close_brace, - ))(input) + fn parse_map<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse map", + tuple(( + opt(lex_ident), + opt(lex_whitespace), + open_brace, + opt(lex_whitespace), + parse_pair_list, + opt(lex_whitespace), + cut(close_brace), + )), + )(input) .map( |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { ( rest_input, Map { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), identifier, ws1, open_brace, @@ -2994,30 +3082,35 @@ mod new { } /// Parse a map in an atom position. - fn parse_map_atom<'a>(input: Span<'a>) -> IResult> { - parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) + fn parse_map_atom<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse map atom", parse_map)(input) + .map(|(rest_input, map)| (rest_input, Atom::Map(map))) } /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - fn parse_pair_list<'a>( + fn parse_pair_list<'a, E: ParseError> + ContextError>>( input: Span<'a>, - ) -> IResult, Term<'a>>>>> { - let input_span = input.clone(); - opt(pair( - parse_pair, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), + ) -> IResult, Term<'a>>>>, E> { + context( + "parse pair list", + opt(pair( parse_pair, - ))), - ))(input) + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_pair, + ))), + )), + )(input) .map(|(rest_input, pair_list)| { if let Some((first, rest)) = pair_list { ( rest_input, Some(List { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), first, rest: if rest.is_empty() { None } else { Some(rest) }, }), @@ -3029,20 +3122,24 @@ mod new { } /// Parse a pair of the form `key = value`. - fn parse_pair<'a>(input: Span<'a>) -> IResult, Term<'a>>> { - let input_span = input.clone(); - tuple(( - parse_term, - opt(lex_whitespace), - equal, - opt(lex_whitespace), - parse_term, - ))(input) + fn parse_pair<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Term<'a>>, E> { + context( + "parse pair", + tuple(( + parse_term, + opt(lex_whitespace), + equal, + opt(lex_whitespace), + parse_term, + )), + )(input) .map(|(rest_input, (key, ws1, equal, ws2, value))| { ( rest_input, Pair { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), key, ws1, equal, @@ -3054,22 +3151,26 @@ mod new { } /// Parse a list of terms of the form `term1, term2, …`. - fn parse_term_list<'a>(input: Span<'a>) -> IResult>> { - let input_span = input.clone(); - pair( - parse_term, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), + fn parse_term_list<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult>, E> { + context( + "parse term list", + pair( parse_term, - ))), + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_term, + ))), + ), )(input) .map(|(rest_input, (first, rest))| { ( rest_input, List { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), first, rest: if rest.is_empty() { None } else { Some(rest) }, }, @@ -3080,52 +3181,128 @@ mod new { /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a>(input: Span<'a>) -> IResult> { - alt(( - parse_binary_term, - parse_tuple_term, - parse_unary_prefix_term, - parse_map_term, - parse_primitive_term, - parse_variable, - parse_existential, - parse_aggregation_term, - ))(input) + fn parse_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse term", + alt(( + parse_binary_term, + parse_tuple_term, + parse_unary_prefix_term, + parse_map_term, + parse_primitive_term, + parse_variable, + parse_existential, + parse_aggregation_term, + )), + )(input) } /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { - alt((lex_ident, lex_iri, lex_number, lex_string))(input) - .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) + fn parse_primitive_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse primitive term", + alt(( + parse_rdf_literal, + parse_ident, + parse_iri, + parse_number, + parse_string, + )), + )(input) + .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } - /// Parse an unary term. - fn parse_unary_prefix_term<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - pair(lex_unary_prefix_operators, parse_term)(input).map( - |(rest_input, (operation, term))| { - ( - rest_input, - Term::UnaryPrefix { - span: outer_span(input_span, rest_input), - operation, - term: Box::new(term), + /// Parse a rdf literal e.g. "2023-06-19"^^ + fn parse_rdf_literal<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context( + "parse rdf literal", + tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), + )(input) + .map(|(rest_input, (string, carets, iri))| { + ( + rest_input, + Primitive::RdfLiteral { + span: outer_span(input, rest_input), + string, + carets: Token { + kind: TokenKind::Caret, + span: carets, }, - ) - }, - ) + iri, + }, + ) + }) + } + + fn parse_ident<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse identifier", lex_ident)(input) + .map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) + } + + fn parse_iri<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse iri", lex_iri)(input) + .map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) + } + + fn parse_number<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse number", lex_number)(input) + .map(|(rest_input, number)| (rest_input, Primitive::Number(number))) + } + + fn parse_string<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse string", lex_string)(input) + .map(|(rest_input, string)| (rest_input, Primitive::String(string))) + } + + /// Parse an unary term. + fn parse_unary_prefix_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse unary prefix term", + pair(lex_unary_prefix_operators, parse_term), + )(input) + .map(|(rest_input, (operation, term))| { + ( + rest_input, + Term::UnaryPrefix { + span: outer_span(input, rest_input), + operation, + term: Box::new(term), + }, + ) + }) } /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { - pair( - parse_arithmetic_product, - opt(tuple(( - opt(lex_whitespace), - alt((plus, minus)), - opt(lex_whitespace), - parse_binary_term, - ))), + fn parse_binary_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse binary term", + pair( + parse_arithmetic_product, + opt(tuple(( + opt(lex_whitespace), + alt((plus, minus)), + opt(lex_whitespace), + parse_binary_term, + ))), + ), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3148,15 +3325,20 @@ mod new { /// Parse an arithmetic product, i.e. an expression involving /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product<'a>(input: Span<'a>) -> IResult> { - pair( - parse_arithmetic_factor, - opt(tuple(( - opt(lex_whitespace), - alt((star, slash)), - opt(lex_whitespace), - parse_arithmetic_product, - ))), + fn parse_arithmetic_product<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse arithmetic product", + pair( + parse_arithmetic_factor, + opt(tuple(( + opt(lex_whitespace), + alt((star, slash)), + opt(lex_whitespace), + parse_arithmetic_product, + ))), + ), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3177,47 +3359,57 @@ mod new { }) } - fn parse_arithmetic_factor<'a>(input: Span<'a>) -> IResult> { - alt(( - parse_tuple_term, - parse_aggregation_term, - parse_primitive_term, - parse_variable, - parse_existential, - ))(input) - } - - fn fold_arithmetic_expression<'a>( - initial: Term<'a>, - sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, - span_vec: Vec>, - ) -> Term<'a> { - sequence - .into_iter() - .enumerate() - .fold(initial, |acc, (i, pair)| { - let (ws1, operation, ws2, expression) = pair; - Term::Binary { - span: span_vec[i], - lhs: Box::new(acc), - ws1, - operation, - ws2, - rhs: Box::new(expression), - } - }) + fn parse_arithmetic_factor<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse arithmetic factor", + alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + )), + )(input) } + // fn fold_arithmetic_expression<'a>( + // initial: Term<'a>, + // sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, + // span_vec: Vec>, + // ) -> Term<'a> { + // sequence + // .into_iter() + // .enumerate() + // .fold(initial, |acc, (i, pair)| { + // let (ws1, operation, ws2, expression) = pair; + // Term::Binary { + // span: span_vec[i], + // lhs: Box::new(acc), + // ws1, + // operation, + // ws2, + // rhs: Box::new(expression), + // } + // }) + // } + /// Parse an aggregation term of the form `#sum(…)`. - fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { - tuple(( - recognize(pair(hash, lex_ident)), - open_paren, - opt(lex_whitespace), - parse_term_list, - opt(lex_whitespace), - close_paren, - ))(input) + fn parse_aggregation_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse aggregation term", + tuple(( + recognize(pair(hash, lex_ident)), + open_paren, + opt(lex_whitespace), + parse_term_list, + opt(lex_whitespace), + close_paren, + )), + )(input) .map( |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { ( @@ -3241,32 +3433,47 @@ mod new { /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. - fn parse_tuple_term<'a>(input: Span<'a>) -> IResult> { - parse_tuple(input) + fn parse_tuple_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse tuple term", parse_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } /// Parse a map as a term. - fn parse_map_term<'a>(input: Span<'a>) -> IResult> { - parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) + fn parse_map_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse map term", parse_map)(input) + .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } /// Parse a variable. - fn parse_variable<'a>(input: Span<'a>) -> IResult> { - recognize(pair(question_mark, lex_ident))(input).map(|(rest_input, var)| { - ( - rest_input, - Term::Variable(Token { - kind: TokenKind::Variable, - span: var, - }), - ) - }) + fn parse_variable<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse variable", recognize(pair(question_mark, lex_ident)))(input).map( + |(rest_input, var)| { + ( + rest_input, + Term::Variable(Token { + kind: TokenKind::Variable, + span: var, + }), + ) + }, + ) } /// Parse an existential variable. - fn parse_existential<'a>(input: Span<'a>) -> IResult> { - recognize(pair(exclamation_mark, lex_ident))(input).map(|(rest_input, existential)| { + fn parse_existential<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse existential", + recognize(pair(exclamation_mark, lex_ident)), + )(input) + .map(|(rest_input, existential)| { ( rest_input, Term::Existential(Token { @@ -3279,12 +3486,19 @@ mod new { // Order of functions is important, because of ordered choice and no backtracking /// Parse the operator for an infix atom. - fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { - alt((less_equal, greater_equal, equal, unequal, less, greater))(input) + fn parse_operation_token<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse operation token", + alt((less_equal, greater_equal, equal, unequal, less, greater)), + )(input) } #[cfg(test)] mod tests { + use nom::error::{convert_error, VerboseError}; + use super::*; use crate::io::{ lexer::*, @@ -3300,6 +3514,19 @@ mod new { }; } + fn convert_located_span_error<'a>(input: Span<'a>, err: VerboseError>) -> String { + convert_error( + *(input.fragment()), + VerboseError { + errors: err + .errors + .into_iter() + .map(|(span, tag)| (*(span.fragment()), tag)) + .collect(), + }, + ) + } + #[test] fn fact() { // let input = Tokens { @@ -3307,7 +3534,7 @@ mod new { // }; let input = Span::new("a(B,C)."); assert_eq!( - parse_program(input), + parse_program::>(input).unwrap().1, Program { span: input, tl_doc_comment: None, @@ -3328,10 +3555,10 @@ mod new { ws2: None, terms: Some(List { span: S!(2, 1, "B,C"), - first: Term::Primitive(Token { + first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(2, 1, "B"), - }), + })), rest: Some(vec![( None, Token { @@ -3339,10 +3566,10 @@ mod new { span: S!(3, 1, ",") }, None, - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(4, 1, "C"), - }), + })), )]), }), ws3: None, @@ -3367,7 +3594,7 @@ mod new { r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, ); assert_eq!( - parse_program(input), + parse_program::>(input).unwrap().1, Program { tl_doc_comment: None, span: input, @@ -3464,20 +3691,20 @@ mod new { span: S!(106, 1, "resource=\"sources/dataA.csv\""), first: Pair { span: S!(106, 1, "resource=\"sources/dataA.csv\""), - key: Term::Primitive(Token { + key: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(106, 1, "resource"), - }), + })), ws1: None, equal: Token { kind: TokenKind::Equal, span: S!(114, 1, "="), }, ws2: None, - value: Term::Primitive(Token { + value: Term::Primitive(Primitive::String(Token { kind: TokenKind::String, span: S!(115, 1, "\"sources/dataA.csv\""), - }) + })), }, rest: None, }), @@ -3604,7 +3831,7 @@ mod new { fn ignore_ws_and_comments() { let input = Span::new(" Hi %cool comment\n"); assert_eq!( - super::ignore_ws_and_comments(lex_ident)(input), + super::ignore_ws_and_comments(lex_ident::>)(input), Ok(( S!(22, 2, ""), Token { @@ -3619,7 +3846,7 @@ mod new { fn fact_with_ws() { let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); assert_eq!( - parse_program(input), + parse_program::>(input).unwrap().1, Program { span: input, tl_doc_comment: None, @@ -3641,10 +3868,10 @@ mod new { ws2: None, terms: Some(List { span: S!(5, 1, "Fact, with, whitespace"), - first: Term::Primitive(Token { + first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(5, 1, "Fact"), - }), + })), rest: Some(vec![ ( None, @@ -3656,10 +3883,10 @@ mod new { kind: TokenKind::Whitespace, span: S!(10, 1, " "), }), - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(11, 1, "with") - }), + })), ), ( None, @@ -3671,10 +3898,10 @@ mod new { kind: TokenKind::Whitespace, span: S!(16, 1, " "), }), - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(17, 1, "whitespace") - }), + })), ), ]), }), @@ -3717,7 +3944,7 @@ mod new { % find old trees. It can be modified to use a different species or genus of % plant, and by changing the required age. -@import tree :- csv{format=(string, string, int, int), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/dresden-trees-ages-heights.csv"} . % location URL, species, age, height in m +@import tree :- csv{format=(string, string, string, int, int), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/dresden-trees-ages-heights.csv"} . % location URL, species, age, height in m @import taxon :- csv{format=(string, string, string), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/wikidata-taxon-name-parent.csv.gz"} . % location URL, species, age, height in m limeSpecies(?X, "Tilia") :- taxon(?X, "Tilia", ?P). @@ -3725,27 +3952,47 @@ limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, ); - let ast = parse_program(input); - println!("{}", ast); - assert_eq!( - { - let mut result = String::new(); - for token in get_all_tokens(&ast) { - result.push_str(token.span().fragment()); - } - println!("{}", result); - result - }, - *input.fragment(), - ); + let ast = parse_program::>(input); + match &ast { + Ok((rest_input, ast)) => { + println!("Rest Input:\n{:#?}\n\n{}", rest_input, ast); + assert_eq!( + { + let mut string_from_tokens = String::new(); + for token in get_all_tokens(ast) { + string_from_tokens.push_str(token.span().fragment()); + } + println!("String from Tokens:\n"); + println!("{}\n", string_from_tokens); + string_from_tokens + }, + *input.fragment(), + ); + } + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + println!( + "PRINT ERROR:\n\n{}", + convert_located_span_error(input, err.clone()) + ); + } + Err(err) => panic!("{}", err), + } + assert!(ast.is_ok()); } #[test] fn parser_test() { let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); let input = Span::new(str.as_str()); - println!("{}", parse_program(input)); - // assert!(false); + let result = parse_program::>(input); + match result { + Ok(ast) => println!("{}", ast.1), + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + println!("{}", convert_located_span_error(input, err)) + } + Err(_) => (), + } + assert!(false); } #[test] @@ -3766,120 +4013,143 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term(Span::new("42")); + let result = parse_term::>(Span::new("42")); result.unwrap().1 }, - Term::Primitive(T! {Number, 0, 1, "42"}), + Term::Primitive(Primitive::Number(T! {Number, 0, 1, "42"})), ); assert_eq!( { - let result = parse_term(Span::new("35+7")); + let result = parse_term::>(Span::new("35+7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "35+7"), - lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "35"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "35"}))), ws1: None, operation: T! {Plus, 2, 1, "+"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "7"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "7"}))), } ); assert_eq!( { - let result = parse_term(Span::new("6*7")); + let result = parse_term::>(Span::new("6*7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "6*7"), - lhs: Box::new(Term::Primitive(T! {Number, 0,1,"6"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"6"}))), ws1: None, operation: T! {Star, 1,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 2,1,"7"})), + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"7"}))), } ); assert_eq!( { - let result = parse_term(Span::new("49-7")); + let result = parse_term::>(Span::new("49-7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "49-7"), - lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "49"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "49"}))), ws1: None, operation: T! {Minus, 2, 1, "-"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "7"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "7"}))), } ); assert_eq!( { - let result = parse_term(Span::new("84/2")); + let result = parse_term::>(Span::new("84/2")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "84/2"), - lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "84"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "84"}))), ws1: None, operation: T! {Slash, 2, 1, "/"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "2"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "2"}))), } ); assert_eq!( { - let result = parse_term(Span::new("5*7+7")); + let result = parse_term::>(Span::new("5*7+7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "5*7+7"), lhs: Box::new(Term::Binary { span: s!(0, 1, "5*7"), - lhs: Box::new(Term::Primitive(T! {Number, 0,1,"5"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"5"}))), ws1: None, operation: T! {Star, 1,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 2,1,"7"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"7"}))), }), ws1: None, operation: T! {Plus, 3,1,"+"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 4,1,"7"})), + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 4,1,"7"}))), } ); assert_eq!( { - let result = parse_term(Span::new("7+5*7")); + let result = parse_term::>(Span::new("7+5*7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "7+5*7"), - lhs: Box::new(Term::Primitive(T! {Number, 0,1,"7"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"7"}))), ws1: None, operation: T! {Plus, 1,1,"+"}, ws2: None, rhs: Box::new(Term::Binary { span: s!(2, 1, "5*7"), - lhs: Box::new(Term::Primitive(T! {Number, 2,1,"5"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"5"}))), ws1: None, operation: T! {Star, 3,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 4,1,"7"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 4,1,"7"}))), }), } ); assert_eq!( { - let result = parse_term(Span::new("(15+3*2-(7+35)*8)/3")); - result.unwrap().1 + let input = Span::new("(15+3*2-(7+35)*8)/3"); + let result = parse_term::>(input); + // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); + match result { + Ok(ast) => { + println!("{}", ast.1); + ast.1 + } + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + panic!( + "{}", + convert_error( + *(input.fragment()), + VerboseError { + errors: err + .errors + .into_iter() + .map(|(span, tag)| { (*(span.fragment()), tag) }) + .collect() + } + ) + ) + } + Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), + } }, Term::Binary { span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), @@ -3893,7 +4163,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(1, 1, "15+3*2-(7+35)*8"), first: Term::Binary { span: s!(1, 1, "15+3*2-(7+35)*8"), - lhs: Box::new(Term::Primitive(T! {Number, 1,1,"15"})), + lhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 1,1,"15"} + ))), ws1: None, operation: T! {Plus, 3,1,"+"}, ws2: None, @@ -3901,11 +4173,15 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(4, 1, "3*2-(7+35)*8"), lhs: Box::new(Term::Binary { span: s!(4, 1, "3*2"), - lhs: Box::new(Term::Primitive(T! {Number, 4,1,"3"})), + lhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 4,1,"3"} + ))), ws1: None, operation: T! {Star, 5,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 6,1,"2"})), + rhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 6,1,"2"} + ))), }), ws1: None, operation: T! {Minus, 7,1,"-"}, @@ -3923,13 +4199,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters first: Term::Binary { span: s!(9, 1, "7+35"), lhs: Box::new(Term::Primitive( - T! {Number, 9,1,"7"} + Primitive::Number(T! {Number, 9,1,"7"}) )), ws1: None, operation: T! {Plus, 10,1,"+"}, ws2: None, rhs: Box::new(Term::Primitive( - T! {Number, 11,1,"35"} + Primitive::Number(T! {Number, 11,1,"35"}) )), }, rest: None @@ -3940,7 +4216,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws1: None, operation: T! {Star, 14,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 15,1,"8"})), + rhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 15,1,"8"} + ))), }), }), }, @@ -3952,7 +4230,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws1: None, operation: T! {Slash, 17,1,"/"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 18,1,"3"})), + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 18,1,"3"}))), } ); // Term::Binary { @@ -3966,12 +4244,12 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term(Span::new("15+3*2-(7+35)*8/3")); + let result = parse_term::>(Span::new("15+3*2-(7+35)*8/3")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "15+3*2-(7+35)*8/3"), - lhs: Box::new(Term::Primitive(T! {Number, 0,1,"15"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"15"}))), ws1: None, operation: T! {Plus, 2,1,"+"}, ws2: None, @@ -3979,11 +4257,11 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(3, 1, "3*2-(7+35)*8/3"), lhs: Box::new(Term::Binary { span: s!(3, 1, "3*2"), - lhs: Box::new(Term::Primitive(T! {Number, 3,1,"3"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3,1,"3"}))), ws1: None, operation: T! {Star, 4,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 5,1,"2"})), + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 5,1,"2"}))), }), ws1: None, operation: T! {Minus, 6,1,"-"}, @@ -4000,11 +4278,15 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(8, 1, "7+35"), first: Term::Binary { span: s!(8, 1, "7+35"), - lhs: Box::new(Term::Primitive(T! {Number, 8,1,"7"})), + lhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 8,1,"7"} + ))), ws1: None, operation: T! {Plus, 9,1,"+"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 10,1,"35"})), + rhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 10,1,"35"} + ))), }, rest: None, }), @@ -4016,11 +4298,15 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws2: None, rhs: Box::new(Term::Binary { span: s!(14, 1, "8/3"), - lhs: Box::new(Term::Primitive(T! {Number, 14,1,"8"})), + lhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 14,1,"8"} + ))), ws1: None, operation: T! {Slash, 15, 1, "/"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 16,1,"3"})), + rhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 16,1,"3"} + ))), }), }), }), @@ -4028,27 +4314,27 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ); // assert_eq!({ - // let result = parse_term(Span::new("1*2*3*4*5")); + // let result = parse_term::>(Span::new("1*2*3*4*5")); // result.unwrap().1 // },); // assert_eq!({ - // let result = parse_term(Span::new("(5+3)")); + // let result = parse_term::>(Span::new("(5+3)")); // result.unwrap().1 // },); // assert_eq!({ - // let result = parse_term(Span::new("( int , int , string , skip )")); + // let result = parse_term::>(Span::new("( int , int , string , skip )")); // result.unwrap().1 // },); // assert_eq!({ - // let result = parse_term(Span::new("(14+4)+3")); + // let result = parse_term::>(Span::new("(14+4)+3")); // result.unwrap().1 // },); // assert_eq!({ - // let result = parse_term(Span::new( + // let result = parse_term::>(Span::new( // "(3 + #sum(?X, ?Y)) * (LENGTH(\"Hello, World!\") + 3)", // )); // result.unwrap().1 From c3b9f5974d65b71a8bde5b2809a4b730181ac33c Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 24 Apr 2024 17:38:24 +0200 Subject: [PATCH 035/214] Add enum Primitive for primitives composed of more than one token --- nemo/src/io/parser/ast.rs | 14 +++--- nemo/src/io/parser/ast/term.rs | 83 +++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 8 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 1c838a026..722aff57f 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -109,7 +109,7 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { mod test { use super::*; - use super::{atom::Atom, directive::Directive, tuple::Tuple, program::Program, statement::Statement, term::Term}; + use super::{atom::Atom, directive::Directive, tuple::Tuple, program::Program, statement::Statement, term::Term, term::Primitive}; use crate::io::lexer::TokenKind; macro_rules! s { @@ -201,10 +201,10 @@ mod test { ws2:None , terms: Some(List { span: s!(236, 8, "ConstA, ConstB"), - first: Term::Primitive(Token { + first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(236, 8, "ConstA"), - }), + })), rest: Some(vec![( None, Token { @@ -215,10 +215,10 @@ mod test { kind: TokenKind::Whitespace, span: s!(243, 8, " "), }), - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(244, 8, "ConstB"), - }), + })), )]), }), ws3: None , @@ -298,10 +298,10 @@ mod test { kind: TokenKind::Whitespace, span: s!(334, 12, " "), }), - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(335, 12, "ConstB"), - }), + })), )]), }), ws3: None, diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index a058e401b..e81ccf09e 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -6,7 +6,7 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Term<'a> { - Primitive(Token<'a>), + Primitive(Primitive<'a>), Variable(Token<'a>), Existential(Token<'a>), // TODO: Is whitespace needed? Figure out how unary terms look @@ -156,3 +156,84 @@ impl std::fmt::Display for Term<'_> { write!(f, "{output}") } } + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Primitive<'a> { + Constant(Token<'a>), + Number(Token<'a>), + String(Token<'a>), + Iri(Token<'a>), + RdfLiteral { + span: Span<'a>, + string: Token<'a>, + carets: Token<'a>, + iri: Token<'a>, + }, +} +impl AstNode for Primitive<'_> { + fn children(&self) -> Option> { + match self { + Primitive::Constant(token) => Some(vec![token]), + Primitive::Number(token) => Some(vec![token]), + Primitive::String(token) => Some(vec![token]), + Primitive::Iri(token) => Some(vec![token]), + Primitive::RdfLiteral { + string, + carets, + iri, + .. + } => Some(vec![string, carets, iri]), + } + } + + fn span(&self) -> Span { + match self { + Primitive::Constant(token) => token.span, + Primitive::Number(token) => token.span, + Primitive::String(token) => token.span, + Primitive::Iri(token) => token.span, + Primitive::RdfLiteral { span, .. } => *span, + } + } + + fn position(&self) -> Position { + let span = self.span(); + Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + } + } + + fn is_token(&self) -> bool { + false + } + + fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } + match self { + Primitive::Constant(_) => name!("Constant"), + Primitive::Number(_) => name!("Number"), + Primitive::String(_) => name!("String"), + Primitive::Iri(_) => name!("Iri"), + Primitive::RdfLiteral { .. } => name!("RDF Literal"), + } + } +} +impl std::fmt::Display for Primitive<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } +} From e34d500c99be9f12c5ec5d9e0e6367463717c54d Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 25 Apr 2024 16:57:31 +0200 Subject: [PATCH 036/214] Add decimal number parsing --- nemo/src/io/lexer.rs | 9 + nemo/src/io/parser.rs | 533 ++++++++++++++++++++++++--------- nemo/src/io/parser/ast/term.rs | 81 ++++- 3 files changed, 480 insertions(+), 143 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 4dd5f0ded..ebb608d10 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -74,6 +74,8 @@ pub(crate) enum TokenKind { Star, /// '/' Slash, + /// 'e' or 'E' + Exponent, // Multi-char tokens: /// Identifier for keywords and names Ident, @@ -144,6 +146,7 @@ impl std::fmt::Display for TokenKind { TokenKind::Minus => write!(f, "Minus"), TokenKind::Star => write!(f, "Star"), TokenKind::Slash => write!(f, "Slash"), + TokenKind::Exponent => write!(f, "Exponent"), TokenKind::Ident => write!(f, "Ident"), TokenKind::Variable => write!(f, "Variable"), TokenKind::Existential => write!(f, "Existential"), @@ -253,6 +256,12 @@ syntax!(caret, "^", TokenKind::Caret); syntax!(hash, "#", TokenKind::Hash); syntax!(underscore, "_", TokenKind::Underscore); syntax!(at, "@", TokenKind::At); +syntax!(exp_lower, "e", TokenKind::Exponent); +syntax!(exp_upper, "E", TokenKind::Exponent); + +pub(crate) fn exp<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { + alt((exp_lower, exp_upper))(input) +} pub(crate) fn lex_punctuations<'a, E: ParseError>>( input: Span<'a>, diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index e027612ad..5f87b2fcf 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2435,8 +2435,8 @@ mod new { }; use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, - greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, - lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, + exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, + lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, question_mark, slash, star, tilde, unequal, Span, Token, TokenKind, }; @@ -3257,8 +3257,63 @@ mod new { fn parse_number<'a, E: ParseError> + ContextError>>( input: Span<'a>, ) -> IResult, Primitive<'a>, E> { - context("parse number", lex_number)(input) - .map(|(rest_input, number)| (rest_input, Primitive::Number(number))) + context("parse number", alt((parse_decimal, parse_integer)))(input) + } + + fn parse_decimal<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context( + "parse decimal", + tuple(( + opt(alt((plus, minus))), + opt(lex_number), + dot, + lex_number, + opt(parse_exponent), + )), + )(input) + .map(|(rest_input, (sign, before, dot, after, exponent))| { + dbg!(&sign, &before, &dot, &after, &exponent); + ( + rest_input, + Primitive::Number { + span: outer_span(input, rest_input), + sign, + before, + dot: Some(dot), + after, + exponent, + }, + ) + }) + } + + fn parse_integer<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse integer", pair(opt(alt((plus, minus))), lex_number))(input).map( + |(rest_input, (sign, number))| { + ( + rest_input, + Primitive::Number { + span: outer_span(input, rest_input), + sign, + before: None, + dot: None, + after: number, + exponent: None, + }, + ) + }, + ) + } + + fn parse_exponent<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Exponent<'a>, E> { + tuple((exp, opt(alt((plus, minus))), lex_number))(input) + .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) } fn parse_string<'a, E: ParseError> + ContextError>>( @@ -3508,7 +3563,14 @@ mod new { // }, }; - macro_rules! S { + macro_rules! T { + ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { + Token::new($tok_kind, unsafe { + Span::new_from_raw_offset($offset, $line, $str, ()) + }) + }; + } + macro_rules! s { ($offset:literal,$line:literal,$str:literal) => { unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } }; @@ -3539,49 +3601,49 @@ mod new { span: input, tl_doc_comment: None, statements: vec![Statement::Fact { - span: S!(0, 1, "a(B,C)."), + span: s!(0, 1, "a(B,C)."), doc_comment: None, atom: Atom::Positive(Tuple { - span: S!(0, 1, "a(B,C)"), + span: s!(0, 1, "a(B,C)"), identifier: Some(Token { kind: TokenKind::Ident, - span: S!(0, 1, "a"), + span: s!(0, 1, "a"), }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, - span: S!(1, 1, "("), + span: s!(1, 1, "("), }, ws2: None, terms: Some(List { - span: S!(2, 1, "B,C"), + span: s!(2, 1, "B,C"), first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(2, 1, "B"), + span: s!(2, 1, "B"), })), rest: Some(vec![( None, Token { kind: TokenKind::Comma, - span: S!(3, 1, ",") + span: s!(3, 1, ",") }, None, Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(4, 1, "C"), + span: s!(4, 1, "C"), })), )]), }), ws3: None, close_paren: Token { kind: TokenKind::CloseParen, - span: S!(5, 1, ")"), + span: s!(5, 1, ")"), }, }), ws: None, dot: Token { kind: TokenKind::Dot, - span: S!(6, 1, ".") + span: s!(6, 1, ".") } }], } @@ -3600,28 +3662,28 @@ mod new { span: input, statements: vec![ Statement::Directive(Directive::Base { - span: S!(0, 1, "@base ."), + span: s!(0, 1, "@base ."), doc_comment: None, kw: Token { kind: TokenKind::Base, - span: S!(0, 1, "@base"), + span: s!(0, 1, "@base"), }, ws1: Some(Token { kind: TokenKind::Whitespace, - span: S!(5, 1, " ") + span: s!(5, 1, " ") }), base_iri: Token { kind: TokenKind::Iri, - span: S!(6, 1, "") + span: s!(6, 1, "") }, ws2: None, dot: Token { kind: TokenKind::Dot, - span: S!(31, 1, ".") + span: s!(31, 1, ".") }, }), Statement::Directive(Directive::Prefix { - span: S!( + span: s!( 32, 1, "@prefix rdfs:." @@ -3629,29 +3691,29 @@ mod new { doc_comment: None, kw: Token { kind: TokenKind::Prefix, - span: S!(32, 1, "@prefix"), + span: s!(32, 1, "@prefix"), }, ws1: Some(Token { kind: TokenKind::Whitespace, - span: S!(39, 1, " ") + span: s!(39, 1, " ") }), prefix: Token { kind: TokenKind::Ident, - span: S!(40, 1, "rdfs:"), + span: s!(40, 1, "rdfs:"), }, ws2: None, prefix_iri: Token { kind: TokenKind::Iri, - span: S!(45, 1, ""), + span: s!(45, 1, ""), }, ws3: None, dot: Token { kind: TokenKind::Dot, - span: S!(84, 1, ".") + span: s!(84, 1, ".") } }), Statement::Directive(Directive::Import { - span: S!( + span: s!( 85, 1, r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# @@ -3659,51 +3721,51 @@ mod new { doc_comment: None, kw: Token { kind: TokenKind::Import, - span: S!(85, 1, "@import"), + span: s!(85, 1, "@import"), }, ws1: Token { kind: TokenKind::Whitespace, - span: S!(92, 1, " "), + span: s!(92, 1, " "), }, predicate: Token { kind: TokenKind::Ident, - span: S!(93, 1, "sourceA"), + span: s!(93, 1, "sourceA"), }, ws2: None, arrow: Token { kind: TokenKind::Arrow, - span: S!(100, 1, ":-"), + span: s!(100, 1, ":-"), }, ws3: None, map: Map { - span: S!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), + span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), identifier: Some(Token { kind: TokenKind::Ident, - span: S!(102, 1, "csv") + span: s!(102, 1, "csv") }), ws1: None, open_brace: Token { kind: TokenKind::OpenBrace, - span: S!(105, 1, "{") + span: s!(105, 1, "{") }, ws2: None, pairs: Some(List { - span: S!(106, 1, "resource=\"sources/dataA.csv\""), + span: s!(106, 1, "resource=\"sources/dataA.csv\""), first: Pair { - span: S!(106, 1, "resource=\"sources/dataA.csv\""), + span: s!(106, 1, "resource=\"sources/dataA.csv\""), key: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(106, 1, "resource"), + span: s!(106, 1, "resource"), })), ws1: None, equal: Token { kind: TokenKind::Equal, - span: S!(114, 1, "="), + span: s!(114, 1, "="), }, ws2: None, value: Term::Primitive(Primitive::String(Token { kind: TokenKind::String, - span: S!(115, 1, "\"sources/dataA.csv\""), + span: s!(115, 1, "\"sources/dataA.csv\""), })), }, rest: None, @@ -3711,107 +3773,107 @@ mod new { ws3: None, close_brace: Token { kind: TokenKind::CloseBrace, - span: S!(134, 1, "}") + span: s!(134, 1, "}") }, }, ws4: None, dot: Token { kind: TokenKind::Dot, - span: S!(135, 1, ".") + span: s!(135, 1, ".") } }), Statement::Directive(Directive::Export { - span: S!(136, 1, "@export a:-csv{}."), + span: s!(136, 1, "@export a:-csv{}."), doc_comment: None, kw: Token { kind: TokenKind::Export, - span: S!(136, 1, "@export"), + span: s!(136, 1, "@export"), }, ws1: Token { kind: TokenKind::Whitespace, - span: S!(143, 1, " "), + span: s!(143, 1, " "), }, predicate: Token { kind: TokenKind::Ident, - span: S!(144, 1, "a"), + span: s!(144, 1, "a"), }, ws2: None, arrow: Token { kind: TokenKind::Arrow, - span: S!(145, 1, ":-"), + span: s!(145, 1, ":-"), }, ws3: None, map: Map { - span: S!(147, 1, "csv{}"), + span: s!(147, 1, "csv{}"), identifier: Some(Token { kind: TokenKind::Ident, - span: S!(147, 1, "csv"), + span: s!(147, 1, "csv"), }), ws1: None, open_brace: Token { kind: TokenKind::OpenBrace, - span: S!(150, 1, "{"), + span: s!(150, 1, "{"), }, ws2: None, pairs: None, ws3: None, close_brace: Token { kind: TokenKind::CloseBrace, - span: S!(151, 1, "}"), + span: s!(151, 1, "}"), }, }, ws4: None, dot: Token { kind: TokenKind::Dot, - span: S!(152, 1, "."), + span: s!(152, 1, "."), }, }), Statement::Directive(Directive::Output { - span: S!(153, 1, "@output a, b, c."), + span: s!(153, 1, "@output a, b, c."), doc_comment: None, kw: Token { kind: TokenKind::Output, - span: S!(153, 1, "@output") + span: s!(153, 1, "@output") }, ws1: Token { kind: TokenKind::Whitespace, - span: S!(160, 1, " "), + span: s!(160, 1, " "), }, predicates: Some(List { - span: S!(161, 1, "a, b, c"), + span: s!(161, 1, "a, b, c"), first: Token { kind: TokenKind::Ident, - span: S!(161, 1, "a"), + span: s!(161, 1, "a"), }, rest: Some(vec![ ( None, Token { kind: TokenKind::Comma, - span: S!(162, 1, ","), + span: s!(162, 1, ","), }, Some(Token { kind: TokenKind::Whitespace, - span: S!(163, 1, " "), + span: s!(163, 1, " "), }), Token { kind: TokenKind::Ident, - span: S!(164, 1, "b"), + span: s!(164, 1, "b"), }, ), ( None, Token { kind: TokenKind::Comma, - span: S!(165, 1, ","), + span: s!(165, 1, ","), }, Some(Token { kind: TokenKind::Whitespace, - span: S!(166, 1, " "), + span: s!(166, 1, " "), }), Token { kind: TokenKind::Ident, - span: S!(167, 1, "c"), + span: s!(167, 1, "c"), }, ), ]), @@ -3819,7 +3881,7 @@ mod new { ws2: None, dot: Token { kind: TokenKind::Dot, - span: S!(168, 1, "."), + span: s!(168, 1, "."), } }), ], @@ -3833,10 +3895,10 @@ mod new { assert_eq!( super::ignore_ws_and_comments(lex_ident::>)(input), Ok(( - S!(22, 2, ""), + s!(22, 2, ""), Token { kind: TokenKind::Ident, - span: S!(3, 1, "Hi") + span: s!(3, 1, "Hi") } )) ) @@ -3852,55 +3914,55 @@ mod new { tl_doc_comment: None, statements: vec![ Statement::Fact { - span: S!(0, 1, "some(Fact, with, whitespace) ."), + span: s!(0, 1, "some(Fact, with, whitespace) ."), doc_comment: None, atom: Atom::Positive(Tuple { - span: S!(0, 1, "some(Fact, with, whitespace)"), + span: s!(0, 1, "some(Fact, with, whitespace)"), identifier: Some(Token { kind: TokenKind::Ident, - span: S!(0, 1, "some"), + span: s!(0, 1, "some"), }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, - span: S!(4, 1, "(") + span: s!(4, 1, "(") }, ws2: None, terms: Some(List { - span: S!(5, 1, "Fact, with, whitespace"), + span: s!(5, 1, "Fact, with, whitespace"), first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(5, 1, "Fact"), + span: s!(5, 1, "Fact"), })), rest: Some(vec![ ( None, Token { kind: TokenKind::Comma, - span: S!(9, 1, ","), + span: s!(9, 1, ","), }, Some(Token { kind: TokenKind::Whitespace, - span: S!(10, 1, " "), + span: s!(10, 1, " "), }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(11, 1, "with") + span: s!(11, 1, "with") })), ), ( None, Token { kind: TokenKind::Comma, - span: S!(15, 1, ","), + span: s!(15, 1, ","), }, Some(Token { kind: TokenKind::Whitespace, - span: S!(16, 1, " "), + span: s!(16, 1, " "), }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(17, 1, "whitespace") + span: s!(17, 1, "whitespace") })), ), ]), @@ -3908,25 +3970,25 @@ mod new { ws3: None, close_paren: Token { kind: TokenKind::CloseParen, - span: S!(27, 1, ")") + span: s!(27, 1, ")") }, }), ws: Some(Token { kind: TokenKind::Whitespace, - span: S!(28, 1, " "), + span: s!(28, 1, " "), }), dot: Token { kind: TokenKind::Dot, - span: S!(29, 1, "."), + span: s!(29, 1, "."), }, }, Statement::Whitespace(Token { kind: TokenKind::Whitespace, - span: S!(30, 1, " ") + span: s!(30, 1, " ") }), Statement::Comment(Token { kind: TokenKind::Comment, - span: S!(31, 1, "% and a super useful comment\n") + span: s!(31, 1, "% and a super useful comment\n") }) ], } @@ -3998,25 +4060,20 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters #[test] fn arithmetic_expressions() { use TokenKind::*; - macro_rules! T { - ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { - Token::new($tok_kind, unsafe { - Span::new_from_raw_offset($offset, $line, $str, ()) - }) - }; - } - macro_rules! s { - ($offset:literal,$line:literal,$str:literal) => { - unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } - }; - } assert_eq!( { let result = parse_term::>(Span::new("42")); result.unwrap().1 }, - Term::Primitive(Primitive::Number(T! {Number, 0, 1, "42"})), + Term::Primitive(Primitive::Number { + span: s!(0, 1, "42"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "42"}, + exponent: None, + }), ); assert_eq!( @@ -4026,11 +4083,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "35+7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "35"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "35"}, + exponent: None, + })), ws1: None, operation: T! {Plus, 2, 1, "+"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "7"}, + exponent: None, + })), } ); @@ -4041,11 +4112,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "6*7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"6"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "6"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"6"}, + exponent: None, + })), ws1: None, operation: T! {Star, 1,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(2, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 2,1,"7"}, + exponent: None, + })), } ); @@ -4056,11 +4141,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "49-7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "49"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "49"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "49"}, + exponent: None, + })), ws1: None, operation: T! {Minus, 2, 1, "-"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "7"}, + exponent: None, + })), } ); @@ -4071,11 +4170,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "84/2"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "84"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "84"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "84"}, + exponent: None, + })), ws1: None, operation: T! {Slash, 2, 1, "/"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "2"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "2"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "2"}, + exponent: None, + })), } ); @@ -4088,16 +4201,37 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(0, 1, "5*7+7"), lhs: Box::new(Term::Binary { span: s!(0, 1, "5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"5"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "5"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"5"}, + exponent: None, + })), ws1: None, operation: T! {Star, 1,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(2, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 2,1,"7"}, + exponent: None, + })), }), ws1: None, operation: T! {Plus, 3,1,"+"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 4,1,"7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 4,1,"7"}, + exponent: None, + })), } ); @@ -4108,17 +4242,38 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "7+5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"7"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"7"}, + exponent: None + })), ws1: None, operation: T! {Plus, 1,1,"+"}, ws2: None, rhs: Box::new(Term::Binary { span: s!(2, 1, "5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"5"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(2, 1, "5"), + sign: None, + before: None, + dot: None, + after: T! {Number, 2,1,"5"}, + exponent: None + })), ws1: None, operation: T! {Star, 3,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 4,1,"7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 4,1,"7"}, + exponent: None + })), }), } ); @@ -4163,9 +4318,14 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(1, 1, "15+3*2-(7+35)*8"), first: Term::Binary { span: s!(1, 1, "15+3*2-(7+35)*8"), - lhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 1,1,"15"} - ))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(1, 1, "15"), + sign: None, + before: None, + dot: None, + after: T! {Number, 1,1,"15"}, + exponent: None, + })), ws1: None, operation: T! {Plus, 3,1,"+"}, ws2: None, @@ -4173,15 +4333,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(4, 1, "3*2-(7+35)*8"), lhs: Box::new(Term::Binary { span: s!(4, 1, "3*2"), - lhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 4,1,"3"} - ))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 4,1,"3"}, + exponent: None, + })), ws1: None, operation: T! {Star, 5,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 6,1,"2"} - ))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(6, 1, "2"), + sign: None, + before: None, + dot: None, + after: T! {Number, 6,1,"2"}, + exponent: None, + })), }), ws1: None, operation: T! {Minus, 7,1,"-"}, @@ -4199,13 +4369,27 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters first: Term::Binary { span: s!(9, 1, "7+35"), lhs: Box::new(Term::Primitive( - Primitive::Number(T! {Number, 9,1,"7"}) + Primitive::Number { + span: s!(9, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 9,1,"7"}, + exponent: None, + } )), ws1: None, operation: T! {Plus, 10,1,"+"}, ws2: None, rhs: Box::new(Term::Primitive( - Primitive::Number(T! {Number, 11,1,"35"}) + Primitive::Number { + span: s!(11, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 11,1,"35"}, + exponent: None, + } )), }, rest: None @@ -4216,9 +4400,14 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws1: None, operation: T! {Star, 14,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 15,1,"8"} - ))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(15, 1, "8"), + sign: None, + before: None, + dot: None, + after: T! {Number, 15,1,"8"}, + exponent: None, + })), }), }), }, @@ -4230,7 +4419,14 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws1: None, operation: T! {Slash, 17,1,"/"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 18,1,"3"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(18, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 18,1,"3"}, + exponent: None, + })), } ); // Term::Binary { @@ -4249,7 +4445,14 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "15+3*2-(7+35)*8/3"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"15"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "15"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"15"}, + exponent: None, + })), ws1: None, operation: T! {Plus, 2,1,"+"}, ws2: None, @@ -4257,11 +4460,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(3, 1, "3*2-(7+35)*8/3"), lhs: Box::new(Term::Binary { span: s!(3, 1, "3*2"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3,1,"3"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3,1,"3"}, + exponent: None, + })), ws1: None, operation: T! {Star, 4,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 5,1,"2"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(5, 1, "2"), + sign: None, + before: None, + dot: None, + after: T! {Number, 5,1,"2"}, + exponent: None, + })), }), ws1: None, operation: T! {Minus, 6,1,"-"}, @@ -4278,15 +4495,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(8, 1, "7+35"), first: Term::Binary { span: s!(8, 1, "7+35"), - lhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 8,1,"7"} - ))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(8, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 8,1,"7"}, + exponent: None, + })), ws1: None, operation: T! {Plus, 9,1,"+"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 10,1,"35"} - ))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(10, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 10,1,"35"}, + exponent: None, + })), }, rest: None, }), @@ -4298,15 +4525,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws2: None, rhs: Box::new(Term::Binary { span: s!(14, 1, "8/3"), - lhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 14,1,"8"} - ))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(14, 1, "8"), + sign: None, + before: None, + dot: None, + after: T! {Number, 14,1,"8"}, + exponent: None, + })), ws1: None, operation: T! {Slash, 15, 1, "/"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 16,1,"3"} - ))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(16, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 16,1,"3"}, + exponent: None, + })), }), }), }), @@ -4340,5 +4577,23 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters // result.unwrap().1 // },); } + + #[test] + fn number_exp() { + assert_eq!( + { + let input = Span::new("e42"); + parse_exponent::>(input) + }, + Ok(( + s!(3, 1, ""), + Exponent { + e: T! {TokenKind::Exponent, 0,1,"e"}, + sign: None, + number: T! {TokenKind::Number, 1,1,"42"} + } + )) + ) + } } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index e81ccf09e..c8f734481 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -160,7 +160,14 @@ impl std::fmt::Display for Term<'_> { #[derive(Debug, Clone, PartialEq)] pub(crate) enum Primitive<'a> { Constant(Token<'a>), - Number(Token<'a>), + Number { + span: Span<'a>, + sign: Option>, + before: Option>, + dot: Option>, + after: Token<'a>, + exponent: Option>, + }, String(Token<'a>), Iri(Token<'a>), RdfLiteral { @@ -174,7 +181,33 @@ impl AstNode for Primitive<'_> { fn children(&self) -> Option> { match self { Primitive::Constant(token) => Some(vec![token]), - Primitive::Number(token) => Some(vec![token]), + Primitive::Number { + sign, + before, + dot, + after, + exponent, + .. + } => { + let mut vec = Vec::new(); + #[allow(trivial_casts)] + if let Some(s) = sign { + vec.push(s as &dyn AstNode); + } + if let Some(b) = before { + vec.push(b); + } + if let Some(d) = dot { + vec.push(d); + } + vec.push(after); + if let Some(exp) = exponent { + if let Some(mut children) = exp.children() { + vec.append(&mut children); + } + } + Some(vec) + } Primitive::String(token) => Some(vec![token]), Primitive::Iri(token) => Some(vec![token]), Primitive::RdfLiteral { @@ -189,7 +222,7 @@ impl AstNode for Primitive<'_> { fn span(&self) -> Span { match self { Primitive::Constant(token) => token.span, - Primitive::Number(token) => token.span, + Primitive::Number { span, .. } => *span, Primitive::String(token) => token.span, Primitive::Iri(token) => token.span, Primitive::RdfLiteral { span, .. } => *span, @@ -223,7 +256,7 @@ impl AstNode for Primitive<'_> { } match self { Primitive::Constant(_) => name!("Constant"), - Primitive::Number(_) => name!("Number"), + Primitive::Number { .. } => name!("Number"), Primitive::String(_) => name!("String"), Primitive::Iri(_) => name!("Iri"), Primitive::RdfLiteral { .. } => name!("RDF Literal"), @@ -237,3 +270,43 @@ impl std::fmt::Display for Primitive<'_> { write!(f, "{output}") } } + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Exponent<'a> { + pub(crate) e: Token<'a>, + pub(crate) sign: Option>, + pub(crate) number: Token<'a>, +} +impl AstNode for Exponent<'_> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + #[allow(trivial_casts)] + vec.push(&self.e as &dyn AstNode); + if let Some(s) = &self.sign { + vec.push(s); + }; + vec.push(&self.number); + Some(vec) + } + + fn span(&self) -> Span { + todo!() + } + + fn position(&self) -> Position { + todo!() + } + + fn is_token(&self) -> bool { + todo!() + } + + fn name(&self) -> String { + todo!() + } +} +impl std::fmt::Display for Exponent<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} From 4e040a6ffc62ea13cab84b27636a3399fbd38bb2 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 25 Apr 2024 17:13:19 +0200 Subject: [PATCH 037/214] Add parsing of blanks --- nemo/src/io/parser.rs | 11 ++++++++++- nemo/src/io/parser/ast/term.rs | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 5f87b2fcf..236c6e313 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2438,7 +2438,7 @@ mod new { exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, - question_mark, slash, star, tilde, unequal, Span, Token, TokenKind, + question_mark, slash, star, tilde, underscore, unequal, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, cut, map, opt, recognize}; @@ -3195,6 +3195,7 @@ mod new { parse_variable, parse_existential, parse_aggregation_term, + parse_blank, )), )(input) } @@ -3486,6 +3487,14 @@ mod new { ) } + /// Parse a `_` + fn parse_blank<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse blank", underscore)(input) + .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) + } + /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. fn parse_tuple_term<'a, E: ParseError> + ContextError>>( diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index c8f734481..8be1e79e7 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -34,6 +34,7 @@ pub(crate) enum Term<'a> { }, Tuple(Box>), Map(Box>), + Blank(Token<'a>), } impl AstNode for Term<'_> { fn children(&self) -> Option> { @@ -88,8 +89,10 @@ impl AstNode for Term<'_> { vec.push(close_paren); Some(vec) } + // TODO: check whether directly the children or Some(vec![named_tuple]) should get returned (for fidelity in ast) Term::Tuple(named_tuple) => named_tuple.children(), Term::Map(map) => map.children(), + Term::Blank(token) => Some(vec![token]), } } @@ -103,6 +106,7 @@ impl AstNode for Term<'_> { Term::Aggregation { span, .. } => *span, Term::Tuple(named_tuple) => named_tuple.span(), Term::Map(map) => map.span(), + Term::Blank(t) => t.span(), } } @@ -146,6 +150,7 @@ impl AstNode for Term<'_> { } } Term::Map(_) => name!("Map"), + Term::Blank(_) => name!("Blank"), } } } From 4f4f4cdfd9640b2f8399ac9246cf2dc92789864d Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 15 May 2024 07:40:07 +0200 Subject: [PATCH 038/214] Add error reporting and recovery --- nemo/src/io/lexer.rs | 451 +++++-- nemo/src/io/parser.rs | 1734 ++++++++++++++++----------- nemo/src/io/parser/ast.rs | 29 + nemo/src/io/parser/ast/program.rs | 21 +- nemo/src/io/parser/ast/statement.rs | 4 + nemo/src/io/parser/types.rs | 265 +++- testfile.rls | 17 + testfile2.rls | 11 + 8 files changed, 1704 insertions(+), 828 deletions(-) create mode 100644 testfile2.rls diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index ebb608d10..555e9e74f 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1,21 +1,75 @@ //! Lexical tokenization of rulewerk-style rules. +use std::{cell::RefCell, ops::Range}; + use nom::{ branch::alt, - bytes::complete::{is_not, tag, take}, + bytes::complete::{is_not, tag, take, take_till, take_while}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, combinator::{all_consuming, cut, map, recognize}, error::{ContextError, ParseError}, multi::{many0, many1}, sequence::{delimited, pair, tuple}, - IResult, }; use nom_locate::LocatedSpan; -use super::parser::ast::Position; +#[derive(Debug)] +pub(crate) enum NewParseError { + MissingWhitespace, + Rule, + Fact, + Directive, + Comment, + SyntaxError(String), + MissingTlDocComment, +} +impl nom::error::ParseError> for NewParseError { + fn from_error_kind(input: Input, kind: nom::error::ErrorKind) -> Self { + NewParseError::SyntaxError(kind.description().to_string()) + } + + fn append(_: Input, _: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +pub(crate) type IResult = nom::IResult; + +use super::parser::{ + ast::Position, + types::{Input, Label, ToRange}, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct Error(pub(crate) Position, pub(crate) String); + +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) struct ParserState<'a> { + pub(crate) errors: &'a RefCell>, + pub(crate) labels: &'a RefCell>, +} +impl ParserState<'_> { + pub fn report_error(&self, error: Error) { + self.errors.borrow_mut().push(error); + } +} pub(crate) type Span<'a> = LocatedSpan<&'a str>; +impl ToRange for Span<'_> { + fn to_range(&self) -> Range { + let start = self.location_offset(); + let end = start + self.fragment().len(); + start..end + } +} + +pub(crate) fn to_range<'a>(span: Span<'a>) -> Range { + let start = span.location_offset(); + let end = start + span.fragment().len(); + start..end +} + /// All the tokens the input gets parsed into. #[derive(Debug, PartialEq, Copy, Clone)] pub(crate) enum TokenKind { @@ -115,6 +169,8 @@ pub(crate) enum TokenKind { Illegal, /// signals end of file Eof, + /// signals an error + Error, } impl std::fmt::Display for TokenKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -166,6 +222,7 @@ impl std::fmt::Display for TokenKind { TokenKind::PrefixIdent => write!(f, "PrefixIdent"), TokenKind::Illegal => write!(f, "Illegal"), TokenKind::Eof => write!(f, "Eof"), + TokenKind::Error => write!(f, "\x1b[1;31mError\x1b[0m"), } } } @@ -229,12 +286,27 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { } } +pub(crate) fn map_err<'a, 'e, O, E: ParseError>>( + mut f: impl nom::Parser, O, E>, + mut op: impl FnMut(E) -> NewParseError, +) -> impl FnMut(Input<'a, 'e>) -> IResult, O> { + move |input| { + f.parse(input).map_err(|e| match e { + nom::Err::Incomplete(err) => nom::Err::Incomplete(err), + nom::Err::Error(err) => nom::Err::Error(op(err)), + nom::Err::Failure(err) => nom::Err::Error(op(err)), + }) + } +} + macro_rules! syntax { ($func_name: ident, $tag_string: literal, $token: expr) => { - pub(crate) fn $func_name<'a, E: ParseError>>( - input: Span<'a>, - ) -> IResult, Token, E> { - map(tag($tag_string), |span| Token::new($token, span))(input) + pub(crate) fn $func_name<'a, 'e>( + input: Input<'a, 'e>, + ) -> nom::IResult, Token<'a>> { + map(tag($tag_string), |span: Input| { + Token::new($token, span.input) + })(input) } }; } @@ -259,13 +331,13 @@ syntax!(at, "@", TokenKind::At); syntax!(exp_lower, "e", TokenKind::Exponent); syntax!(exp_upper, "E", TokenKind::Exponent); -pub(crate) fn exp<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { +pub(crate) fn exp<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { alt((exp_lower, exp_upper))(input) } -pub(crate) fn lex_punctuations<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_punctuations<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { alt(( arrow, open_paren, @@ -298,9 +370,9 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_operators<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { alt(( less_equal, greater_equal, @@ -315,90 +387,79 @@ pub(crate) fn lex_operators<'a, E: ParseError>>( ))(input) } -pub(crate) fn lex_unary_prefix_operators<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token<'a>, E> { - alt((plus, minus))(input) -} +// pub(crate) fn lex_unary_prefix_operators<'a, 'e>( +// input: Input<'a, 'e>, +// ) -> IResult, Token<'a>> { +// alt((plus, minus))(input) +// } -pub(crate) fn lex_ident<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { - let (rest, result) = recognize(pair( +pub(crate) fn lex_ident<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { + let (rest_input, ident) = recognize(pair( alpha1, many0(alt((alphanumeric1, tag("_"), tag("-")))), ))(input)?; - let token = match *result.fragment() { - "base" => Token::new(TokenKind::Base, result), - "prefix" => Token::new(TokenKind::Prefix, result), - "import" => Token::new(TokenKind::Import, result), - "export" => Token::new(TokenKind::Export, result), - "output" => Token::new(TokenKind::Output, result), - _ => Token::new(TokenKind::Ident, result), + let token = match *ident.input.fragment() { + "base" => Token::new(TokenKind::Base, ident.input), + "prefix" => Token::new(TokenKind::Prefix, ident.input), + "import" => Token::new(TokenKind::Import, ident.input), + "export" => Token::new(TokenKind::Export, ident.input), + "output" => Token::new(TokenKind::Output, ident.input), + _ => Token::new(TokenKind::Ident, ident.input), }; - Ok((rest, token)) + Ok((rest_input, token)) } -pub(crate) fn lex_iri<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { +pub(crate) fn lex_iri<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">"))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) } -pub(crate) fn lex_number<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { - digit1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Number, result))) +pub(crate) fn lex_number<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { + digit1(input) + .map(|(rest_input, result)| (rest_input, Token::new(TokenKind::Number, result.input))) } -pub(crate) fn lex_string<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_string<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { recognize(delimited(tag("\""), is_not("\""), cut(tag("\""))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::String, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) } -pub(crate) fn lex_comment<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_comment<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { recognize(tuple((tag("%"), many0(is_not("\n")), line_ending)))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) } -pub(crate) fn lex_doc_comment<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_doc_comment<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) } -pub(crate) fn lex_toplevel_doc_comment<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_toplevel_doc_comment<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) } -pub(crate) fn lex_comments<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_comments<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment))(input) } -pub(crate) fn lex_whitespace<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { - multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result))) +pub(crate) fn lex_whitespace<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { + multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) } -pub(crate) fn lex_illegal<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { - take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result))) +pub(crate) fn lex_illegal<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { + take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) } -pub(crate) fn lex_tokens<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Vec, E> { +pub(crate) fn lex_tokens<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Vec>> { all_consuming(many0(alt(( lex_iri, lex_operators, @@ -411,13 +472,28 @@ pub(crate) fn lex_tokens<'a, E: ParseError> + ContextError>>( lex_illegal, ))))(input) .map(|(span, mut vec)| { - vec.append(&mut vec![Token::new(TokenKind::Eof, span)]); + vec.append(&mut vec![Token::new(TokenKind::Eof, span.input)]); (span, vec) }) } +pub(crate) fn skip_to_dot<'a, 'e>(input: Input<'a, 'e>) -> (Input<'a, 'e>, Token<'a>) { + let (rest_input, error_input) = recognize(pair( + take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), + tag("."), + ))(input) + .expect("Skipping to the next dot should not fail!"); + ( + rest_input, + Token { + kind: TokenKind::Error, + span: error_input.input, + }, + ) +} + #[cfg(test)] -mod test { +mod tests { use super::TokenKind::*; use super::*; @@ -432,8 +508,19 @@ mod test { #[test] fn empty_input() { let input = Span::new(""); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![T!(Eof, 0, 1, "")] ) } @@ -441,8 +528,19 @@ mod test { #[test] fn base() { let input = Span::new("@base"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } @@ -450,8 +548,19 @@ mod test { #[test] fn prefix() { let input = Span::new("@prefix"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Prefix, 1, 1, "prefix"), @@ -463,8 +572,19 @@ mod test { #[test] fn output() { let input = Span::new("@output"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Output, 1, 1, "output"), @@ -476,8 +596,19 @@ mod test { #[test] fn import() { let input = Span::new("@import"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Import, 1, 1, "import"), @@ -489,8 +620,19 @@ mod test { #[test] fn export() { let input = Span::new("@export"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Export, 1, 1, "export"), @@ -502,8 +644,19 @@ mod test { #[test] fn idents_with_keyword_prefix() { let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Ident, 1, 1, "baseA"), @@ -532,8 +685,19 @@ mod test { #[test] fn tokenize() { let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Ident, 0, 1, "P"), T!(OpenParen, 1, 1, "("), @@ -563,8 +727,19 @@ mod test { #[test] fn comment() { let input = Span::new(" % Some Comment\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " "), T!(Comment, 4, 1, "% Some Comment\n"), @@ -578,8 +753,19 @@ mod test { #[test] fn ident() { let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Ident, 0, 1, "some_Ident"), T!(OpenParen, 10, 1, "("), @@ -596,8 +782,19 @@ mod test { #[test] fn forbidden_ident() { let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Underscore, 0, 1, "_"), T!(Ident, 1, 1, "someIdent"), @@ -615,8 +812,19 @@ mod test { #[test] fn iri() { let input = Span::new(""); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Eof, 31, 1, ""), @@ -627,8 +835,19 @@ mod test { #[test] fn iri_pct_enc() { let input = Span::new("\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Whitespace, 37, 1, "\n"), @@ -642,8 +861,19 @@ mod test { #[test] fn constraints() { let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Ident, 0, 1, "A"), T!(OpenParen, 1, 1, "("), @@ -675,8 +905,19 @@ mod test { #[test] fn pct_enc_comment() { let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Comment, 0, 1, "%d4 this should be a comment,\n"), T!( @@ -694,8 +935,19 @@ mod test { #[test] fn fact() { let input = Span::new("somePred(term1, term2)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Ident, 0, 1, "somePred"), T!(OpenParen, 8, 1, "("), @@ -713,12 +965,39 @@ mod test { #[test] fn whitespace() { let input = Span::new(" \t \n\n\t \n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " \t \n\n\t \n"), T!(Eof, 12, 4, ""), ] ) } + + #[test] + fn skip_to_dot() { + let input = Span::new("some ?broken :- rule). A(Fact)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; + dbg!(super::skip_to_dot(input)); + } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 236c6e313..758bb9061 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -23,7 +23,7 @@ use nom::{ use macros::traced; pub(crate) mod ast; -mod types; +pub(crate) mod types; use types::{ConstraintOperator, IntermediateResult, Span}; pub(crate) mod iri; @@ -2430,15 +2430,19 @@ mod test { /// NEW PARSER mod new { + use std::cell::RefCell; + use super::ast::{ - atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, + atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, }; + use super::types::{Input, Label, ParserLabel, ToRange}; use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, - lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, - question_mark, slash, star, tilde, underscore, unequal, Span, Token, TokenKind, + lex_whitespace, map_err, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, + slash, star, tilde, underscore, unequal, Error, NewParseError, ParserState, Span, Token, + TokenKind, }; use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, cut, map, opt, recognize}; @@ -2466,82 +2470,244 @@ mod new { } } - fn ignore_ws_and_comments<'a, F, O, E: ParseError> + ContextError>>( - inner: F, - ) -> impl FnMut(Span<'a>) -> IResult, O, E> - where - F: Parser, O, E> + FnMut(Span<'a>) -> IResult, O, E>, - { - delimited( - many0(alt((lex_whitespace, lex_comment))), - inner, - many0(alt((lex_whitespace, lex_comment))), - ) + // fn ignore_ws_and_comments<'a, F, O, E: ParseError> + ContextError>>( + // inner: F, + // ) -> impl FnMut(Span<'a>) -> IResult, O, E> + // where + // F: Parser, O, E> + FnMut(Span<'a>) -> IResult, O, E>, + // { + // delimited( + // many0(alt((lex_whitespace, lex_comment))), + // inner, + // many0(alt((lex_whitespace, lex_comment))), + // ) + // } + + // fn expect<'a, F, E, T>( + // parser: F, + // error_msg: E, + // state: Errors, + // ) -> impl Fn(Span<'a>) -> IResult, T> + // where + // F: Fn(Span<'a>) -> IResult, T>, + // E: ToString, + // { + // move |input| match parser(input) { + // Ok((rest_input, output)) => Ok((rest_input, output)), + // Err(nom::Err::Error(nom::error::Error { input, .. })) + // | Err(nom::Err::Failure(nom::error::Error { input, .. })) => { + // let err = crate::io::lexer::Error(to_range(input), error_msg.to_string()); + // state.report_error(err); + // Ok(( + // input, + // Token { + // kind: TokenKind::Error, + // span: outer_span(input, rest_input), + // }, + // )) + // } + // Err(err) => Err(err), + // } + // } + + fn expect< + 'a, + 'e, + O: Copy, + E: ParseError>, + F: nom::Parser, O, E>, + >( + mut parser: F, + error_msg: impl ToString, + error_output: O, + errors: ParserState<'e>, + ) -> impl FnMut(Input<'a, 'e>) -> IResult, O, E> { + move |input| match parser.parse(input.clone()) { + Ok(result) => Ok(result), + Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { + let err = Error( + Position { + offset: input.input.location_offset(), + line: input.input.location_line(), + column: input.input.get_utf8_column() as u32, + }, + error_msg.to_string(), + ); + errors.report_error(err); + Ok((input, error_output)) + } + Err(err) => Err(err), + } + } + + fn recover<'a, 'e, E>( + mut parser: impl nom::Parser, Statement<'a>, E>, + error_msg: impl ToString, + errors: ParserState<'e>, + ) -> impl FnMut(Input<'a, 'e>) -> IResult, Statement<'a>, E> { + move |input: Input<'a, 'e>| match parser.parse(input) { + Ok(result) => Ok(result), + Err(err) if input.input.is_empty() => Err(err), + Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => { + let err = Error( + Position { + offset: input.input.location_offset(), + line: input.input.location_line(), + column: input.input.get_utf8_column() as u32, + }, + error_msg.to_string(), + ); + errors.report_error(err); + let (rest_input, token) = skip_to_dot(input); + Ok((rest_input, Statement::Error(token))) + } + Err(err) => Err(err), + } + } + + fn report_label<'a, 's, O, E>( + mut parser: impl nom::Parser, O, E>, + label: ParserLabel, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + move |input| match parser.parse(input) { + Ok(result) => Ok(result), + Err(err) => { + match err { + nom::Err::Incomplete(_) => (), + nom::Err::Error(_) | nom::Err::Failure(_) => { + if !input.input.is_empty() { + input.parser_state.labels.borrow_mut().push(Label { + label, + pos: Position { + offset: input.input.location_offset(), + line: input.input.location_line(), + column: input.input.get_utf8_column() as u32, + }, + }) + }; + } + }; + Err(err) + } + } + } + + fn report_error<'a, 's, O, E>( + mut parser: impl nom::Parser, O, E>, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + move |input| match parser.parse(input) { + Ok(result) => { + input.parser_state.labels.borrow_mut().clear(); + Ok(result) + } + Err(err) => { + match err { + nom::Err::Incomplete(_) => (), + nom::Err::Error(_) | nom::Err::Failure(_) => { + // println!("LABELS BEFORE REPORT!!!!: {:#?}", input.parser_state.labels); + let mut furthest_errors: Vec = Vec::new(); + let labels = + as Clone>::clone(&input.parser_state.labels.borrow()) + .into_iter(); + for label in labels { + if let Some(last) = furthest_errors.last() { + if label.pos.offset >= (*last).0.offset { + let err = + Error(label.pos, format!("expected {:?}", label.label)); + furthest_errors.push(err); + } + } else { + let err = Error(label.pos, format!("expected {:?}", label.label)); + furthest_errors.push(err); + }; + } + for err in furthest_errors { + input.parser_state.report_error(err) + } + // for label in furthest_errors { + // println!( + // "Syntax error: Parser got stuck at line {} column {}, expected {:?}", + // label.position.line, label.position.column, label.label + // ); + // println!( + // "\n{}", + // input + // .parser_state + // .source + // .fragment() + // .lines() + // .collect::>() + // .get((label.position.line - 1) as usize) + // .unwrap() + // ); + // println!("{1:>0$}", label.position.column, "^"); + // } + } + }; + Err(err) + } + } } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Program<'a>, E> { - context( - "parse program", - all_consuming(pair( - opt(lex_toplevel_doc_comment), - many1(alt(( - parse_rule, - parse_fact, + fn parse_program<'a, 'e>(input: Input<'a, 'e>) -> (Program<'a>, Vec) { + let (rest_input, (tl_doc_comment, statements)) = all_consuming(pair( + opt(lex_toplevel_doc_comment), + many1(recover( + report_error(alt(( + // TODO: Discuss wether directives should only get parsed at the beginning of the source file + report_label(parse_rule, ParserLabel::Rule), + report_label(parse_fact, ParserLabel::Fact), parse_whitespace, - parse_directive, + report_label(parse_directive, ParserLabel::Directive), parse_comment, ))), + "failed to parse a statement", + input.parser_state, )), - )(input) - .map(|(rest_input, (tl_doc_comment, statements))| { - ( - rest_input, - Program { - span: input, - tl_doc_comment, - statements, - }, - ) - }) + ))(input) + .expect("Parser can't fail. If it fails it's a bug! Pleas report it. Got"); + ( + Program { + span: input.input, + tl_doc_comment, + statements, + }, + rest_input.parser_state.errors.take(), + ) } /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse whitespace", lex_whitespace)(input) - .map(|(rest, ws)| (rest, Statement::Whitespace(ws))) + fn parse_whitespace<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) } /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse comment", lex_comment)(input) - .map(|(rest, comment)| (rest, Statement::Comment(comment))) + fn parse_comment<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) } /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse fact", - tuple(( - opt(lex_doc_comment), - parse_normal_atom, - opt(lex_whitespace), - cut(dot), - )), - )(input) + fn parse_fact<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + tuple(( + opt(lex_doc_comment), + parse_normal_atom, + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map(|(rest_input, (doc_comment, atom, ws, dot))| { ( rest_input, Statement::Fact { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, atom, ws, @@ -2552,28 +2718,31 @@ mod new { } /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse rule", - tuple(( - opt(lex_doc_comment), - parse_head, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_body, - opt(lex_whitespace), - cut(dot), - )), - )(input) + fn parse_rule<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + tuple(( + opt(lex_doc_comment), + parse_head, + opt(lex_whitespace), + report_label(arrow, ParserLabel::Arrow), + opt(lex_whitespace), + parse_body, + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { ( rest_input, Statement::Rule { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, head, ws1, @@ -2589,63 +2758,57 @@ mod new { } /// Parse the head atoms of a rule. - fn parse_head<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult>, E> { - context("parse head", parse_atom_list(parse_head_atoms))(input) + fn parse_head<'a, 'e>(input: Input<'a, 'e>) -> IResult, List<'a, Atom<'a>>> { + parse_list(parse_head_atoms)(input) } /// Parse the body atoms of a rule. - fn parse_body<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult>, E> { - context("parse body", parse_atom_list(parse_body_atoms))(input) + fn parse_body<'a, 'e>(input: Input<'a, 'e>) -> IResult, List<'a, Atom<'a>>> { + parse_list(parse_body_atoms)(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse directive", - alt(( - parse_base_directive, - parse_prefix_directive, - parse_import_directive, - parse_export_directive, - parse_output_directive, - )), - )(input) + fn parse_directive<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, + ))(input) .map(|(rest, directive)| (rest, Statement::Directive(directive))) } /// Parse the base directive. - fn parse_base_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse base directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Base), - )), - opt(lex_whitespace), - lex_iri, - opt(lex_whitespace), - cut(dot), + fn parse_base_directive<'a, 'e>(input: Input<'a, 'e>) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Base), )), - )(input) + opt(lex_whitespace), + report_label(lex_iri, ParserLabel::Iri), + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { ( rest_input, Directive::Base { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Base, - span: kw, + span: kw.input, }, ws1, base_iri, @@ -2657,40 +2820,45 @@ mod new { } /// Parse the prefix directive. - fn parse_prefix_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse prefix directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), - )), - opt(lex_whitespace), - recognize(pair(lex_ident, colon)), - opt(lex_whitespace), - lex_iri, - opt(lex_whitespace), - cut(dot), + fn parse_prefix_directive<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), )), - )(input) + opt(lex_whitespace), + report_label(recognize(pair(lex_ident, colon)), ParserLabel::Prefix), + opt(lex_whitespace), + report_label(lex_iri, ParserLabel::Iri), + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { ( rest_input, Directive::Prefix { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Prefix, - span: kw, + span: kw.input, }, ws1, prefix: Token { kind: TokenKind::Ident, - span: prefix, + span: prefix.input, }, ws2, prefix_iri, @@ -2703,37 +2871,42 @@ mod new { } /// Parse the import directive. - fn parse_import_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse import directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Import), - )), - lex_whitespace, - lex_ident, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_map, - opt(lex_whitespace), - cut(dot), + fn parse_import_directive<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Import), )), - )(input) + lex_whitespace, + report_label(lex_ident, ParserLabel::Identifier), + opt(lex_whitespace), + report_label(arrow, ParserLabel::Arrow), + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( rest_input, Directive::Import { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Import, - span: kw, + span: kw.input, }, ws1, predicate, @@ -2750,37 +2923,42 @@ mod new { } /// Parse the export directive. - fn parse_export_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse export directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Export), - )), - lex_whitespace, - lex_ident, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_map, - opt(lex_whitespace), - cut(dot), + fn parse_export_directive<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Export), )), - )(input) + lex_whitespace, + report_label(lex_ident, ParserLabel::Identifier), + opt(lex_whitespace), + report_label(arrow, ParserLabel::Arrow), + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( rest_input, Directive::Export { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Export, - span: kw, + span: kw.input, }, ws1, predicate, @@ -2797,33 +2975,38 @@ mod new { } /// Parse the output directive. - fn parse_output_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse output directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Output), - )), - lex_whitespace, - opt(parse_identifier_list), - opt(lex_whitespace), - cut(dot), + fn parse_output_directive<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), )), - )(input) + lex_whitespace, + opt(parse_list(lex_ident)), + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { ( rest_input, Directive::Output { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Output, - span: kw, + span: kw.input, }, ws1, predicates, @@ -2835,55 +3018,49 @@ mod new { ) } - /// Parse a list of `ident1, ident2, …` - fn parse_identifier_list<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult>, E> { - context( - "parse identifier list", + // /// Parse a list of `ident1, ident2, …` + // fn parse_identifier_list<'a, 'e>( + // input: Input<'a, 'e>, + // ) -> IResult, List<'a, Token<'a>>> { + // pair( + // lex_ident, + // many0(tuple(( + // opt(lex_whitespace), + // comma, + // opt(lex_whitespace), + // lex_ident, + // ))), + // )(input) + // .map(|(rest_input, (first, rest))| { + // ( + // rest_input, + // List { + // span: outer_span(input.input, rest_input.input), + // first, + // rest: if rest.is_empty() { None } else { Some(rest) }, + // }, + // ) + // }) + // } + + fn parse_list<'a, 'e, T>( + parse_t: fn(Input<'a, 'e>) -> IResult, T>, + ) -> impl Fn(Input<'a, 'e>) -> IResult, List<'a, T>> { + move |input: Input<'a, 'e>| { pair( - lex_ident, + parse_t, many0(tuple(( opt(lex_whitespace), comma, opt(lex_whitespace), - lex_ident, + parse_t, ))), - ), - )(input) - .map(|(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input, rest_input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }) - } - - fn parse_atom_list<'a, E: ParseError> + ContextError>>( - parse_atom: fn(Span<'a>) -> IResult, E>, - ) -> impl Fn(Span<'a>) -> IResult, List<'a, Atom<'a>>, E> { - move |input| { - context( - "parse atom list", - pair( - parse_atom, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_atom, - ))), - ), )(input) .map(|(rest_input, (first, rest))| { ( rest_input, List { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), first, rest: if rest.is_empty() { None } else { Some(rest) }, }, @@ -2893,76 +3070,55 @@ mod new { } /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "harse head atoms", - alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), - )(input) + fn parse_head_atoms<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) } /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse body atoms", - alt(( - parse_normal_atom, - parse_negative_atom, - parse_infix_atom, - parse_map_atom, - )), - )(input) + fn parse_body_atoms<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, + ))(input) } /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse normal atom", parse_named_tuple)(input) + fn parse_normal_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + parse_named_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse negative atom", pair(tilde, parse_named_tuple))(input).map( - |(rest_input, (tilde, named_tuple))| { - ( - rest_input, - Atom::Negative { - span: outer_span(input, rest_input), - neg: tilde, - atom: named_tuple, - }, - ) - }, - ) + fn parse_negative_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { + ( + rest_input, + Atom::Negative { + span: outer_span(input.input, rest_input.input), + neg: tilde, + atom: named_tuple, + }, + ) + }) } /// Parse an "infix atom" of the form `term1 term2`. /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse infix atom", - tuple(( - parse_term, - opt(lex_whitespace), - parse_operation_token, - opt(lex_whitespace), - parse_term, - )), - )(input) + fn parse_infix_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + tuple(( + parse_term, + opt(lex_whitespace), + parse_operation_token, + opt(lex_whitespace), + parse_term, + ))(input) .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { ( rest_input, Atom::InfixAtom { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), lhs, ws1, operation, @@ -2975,27 +3131,22 @@ mod new { /// Parse a tuple with an optional name, like `ident(term1, term2)` /// or just `(int, int, skip)`. - fn parse_tuple<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse tuple", - tuple(( - opt(lex_ident), - opt(lex_whitespace), - open_paren, - opt(lex_whitespace), - opt(parse_term_list), - opt(lex_whitespace), - cut(close_paren), - )), - )(input) + fn parse_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { + tuple(( + opt(lex_ident), + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_list(parse_term)), + opt(lex_whitespace), + report_label(close_paren, ParserLabel::CloseParen), + ))(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, Tuple { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), identifier, ws1, open_paren, @@ -3011,27 +3162,22 @@ mod new { /// Parse a named tuple. This function is like `parse_tuple` with the difference, /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse named tuple", - tuple(( - lex_ident, - opt(lex_whitespace), - open_paren, - opt(lex_whitespace), - opt(parse_term_list), - opt(lex_whitespace), - cut(close_paren), - )), - )(input) + fn parse_named_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { + tuple(( + lex_ident, + opt(lex_whitespace), + report_label(open_paren, ParserLabel::OpenParen), + opt(lex_whitespace), + opt(parse_list(parse_term)), + opt(lex_whitespace), + report_label(close_paren, ParserLabel::CloseParen), + ))(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, Tuple { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), identifier: Some(identifier), ws1, open_paren, @@ -3047,27 +3193,22 @@ mod new { /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. /// Inside the curly braces ist a list of pairs. - fn parse_map<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse map", - tuple(( - opt(lex_ident), - opt(lex_whitespace), - open_brace, - opt(lex_whitespace), - parse_pair_list, - opt(lex_whitespace), - cut(close_brace), - )), - )(input) + fn parse_map<'a, 'e>(input: Input<'a, 'e>) -> IResult, Map<'a>> { + tuple(( + opt(lex_ident), + opt(lex_whitespace), + open_brace, + opt(lex_whitespace), + opt(parse_list(parse_pair)), + opt(lex_whitespace), + report_label(close_brace, ParserLabel::CloseBrace), + ))(input) .map( |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { ( rest_input, Map { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), identifier, ws1, open_brace, @@ -3082,64 +3223,59 @@ mod new { } /// Parse a map in an atom position. - fn parse_map_atom<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse map atom", parse_map)(input) - .map(|(rest_input, map)| (rest_input, Atom::Map(map))) - } - - /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - fn parse_pair_list<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Term<'a>>>>, E> { - context( - "parse pair list", - opt(pair( - parse_pair, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_pair, - ))), - )), - )(input) - .map(|(rest_input, pair_list)| { - if let Some((first, rest)) = pair_list { - ( - rest_input, - Some(List { - span: outer_span(input, rest_input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }), - ) - } else { - (rest_input, None) - } - }) - } + fn parse_map_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) + } + + // /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. + // fn parse_pair_list<'a, 'e, E: ParseError> + ContextError>>( + // input: Input<'a, 'e>, + // state: Errors, + // ) -> IResult, Option, Term<'a>>>>, E> { + // context( + // "parse pair list", + // opt(pair( + // parse_pair, + // many0(tuple(( + // opt(lex_whitespace), + // comma, + // opt(lex_whitespace), + // parse_pair, + // ))), + // )), + // )(input) + // .map(|(rest_input, pair_list)| { + // if let Some((first, rest)) = pair_list { + // ( + // rest_input, + // Some(List { + // span: outer_span(input, rest_input), + // first, + // rest: if rest.is_empty() { None } else { Some(rest) }, + // }), + // ) + // } else { + // (rest_input, None) + // } + // }) + // } /// Parse a pair of the form `key = value`. - fn parse_pair<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Term<'a>>, E> { - context( - "parse pair", - tuple(( - parse_term, - opt(lex_whitespace), - equal, - opt(lex_whitespace), - parse_term, - )), - )(input) + fn parse_pair<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Pair<'a, Term<'a>, Term<'a>>> { + tuple(( + parse_term, + opt(lex_whitespace), + report_label(equal, ParserLabel::Equal), + opt(lex_whitespace), + parse_term, + ))(input) .map(|(rest_input, (key, ws1, equal, ws2, value))| { ( rest_input, Pair { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), key, ws1, equal, @@ -3150,90 +3286,80 @@ mod new { }) } - /// Parse a list of terms of the form `term1, term2, …`. - fn parse_term_list<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult>, E> { - context( - "parse term list", - pair( - parse_term, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_term, - ))), - ), - )(input) - .map(|(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input, rest_input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }) - } + // /// Parse a list of terms of the form `term1, term2, …`. + // fn parse_term_list<'a, 'e, E: ParseError> + ContextError>>( + // input: Input<'a, 'e>, + // state: Errors, + // ) -> IResult, List<'a, Term<'a>>, E> { + // context( + // "parse term list", + // pair( + // parse_term, + // many0(tuple(( + // opt(lex_whitespace), + // comma, + // opt(lex_whitespace), + // parse_term, + // ))), + // ), + // )(input) + // .map(|(rest_input, (first, rest))| { + // ( + // rest_input, + // List { + // span: outer_span(input, rest_input), + // first, + // rest: if rest.is_empty() { None } else { Some(rest) }, + // }, + // ) + // }) + // } /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse term", - alt(( - parse_binary_term, - parse_tuple_term, - parse_unary_prefix_term, - parse_map_term, - parse_primitive_term, - parse_variable, - parse_existential, - parse_aggregation_term, - parse_blank, - )), - )(input) + fn parse_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + report_error(alt(( + parse_binary_term, + parse_tuple_term, + // parse_unary_prefix_term, + parse_map_term, + parse_primitive_term, + parse_variable, + parse_existential, + parse_aggregation_term, + parse_blank, + )))(input) } /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse primitive term", - alt(( - parse_rdf_literal, - parse_ident, - parse_iri, - parse_number, - parse_string, - )), - )(input) + fn parse_primitive_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + report_error(alt(( + parse_rdf_literal, + parse_ident, + parse_iri, + parse_number, + parse_string, + )))(input) .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } /// Parse a rdf literal e.g. "2023-06-19"^^ - fn parse_rdf_literal<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context( - "parse rdf literal", - tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), - )(input) + fn parse_rdf_literal<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + tuple(( + lex_string, + recognize(pair(caret, caret)), + report_label(lex_iri, ParserLabel::Iri), + ))(input) .map(|(rest_input, (string, carets, iri))| { ( rest_input, Primitive::RdfLiteral { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), string, carets: Token { kind: TokenKind::Caret, - span: carets, + span: carets.input, }, iri, }, @@ -3241,45 +3367,31 @@ mod new { }) } - fn parse_ident<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse identifier", lex_ident)(input) - .map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) + fn parse_ident<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + lex_ident(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) } - fn parse_iri<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse iri", lex_iri)(input) - .map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) + fn parse_iri<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) } - fn parse_number<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse number", alt((parse_decimal, parse_integer)))(input) + fn parse_number<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + report_error(alt((parse_decimal, parse_integer)))(input) } - fn parse_decimal<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context( - "parse decimal", - tuple(( - opt(alt((plus, minus))), - opt(lex_number), - dot, - lex_number, - opt(parse_exponent), - )), - )(input) + fn parse_decimal<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + tuple(( + opt(alt((plus, minus))), + opt(lex_number), + dot, + lex_number, + opt(parse_exponent), + ))(input) .map(|(rest_input, (sign, before, dot, after, exponent))| { - dbg!(&sign, &before, &dot, &after, &exponent); ( rest_input, Primitive::Number { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), sign, before, dot: Some(dot), @@ -3290,82 +3402,68 @@ mod new { }) } - fn parse_integer<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse integer", pair(opt(alt((plus, minus))), lex_number))(input).map( - |(rest_input, (sign, number))| { - ( - rest_input, - Primitive::Number { - span: outer_span(input, rest_input), - sign, - before: None, - dot: None, - after: number, - exponent: None, - }, - ) - }, - ) - } - - fn parse_exponent<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Exponent<'a>, E> { - tuple((exp, opt(alt((plus, minus))), lex_number))(input) - .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) - } - - fn parse_string<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse string", lex_string)(input) - .map(|(rest_input, string)| (rest_input, Primitive::String(string))) - } - - /// Parse an unary term. - fn parse_unary_prefix_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse unary prefix term", - pair(lex_unary_prefix_operators, parse_term), - )(input) - .map(|(rest_input, (operation, term))| { + fn parse_integer<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + pair(opt(alt((plus, minus))), lex_number)(input).map(|(rest_input, (sign, number))| { ( rest_input, - Term::UnaryPrefix { - span: outer_span(input, rest_input), - operation, - term: Box::new(term), + Primitive::Number { + span: outer_span(input.input, rest_input.input), + sign, + before: None, + dot: None, + after: number, + exponent: None, }, ) }) } + fn parse_exponent<'a, 'e>(input: Input<'a, 'e>) -> IResult, Exponent<'a>> { + tuple(( + exp, + opt(alt((plus, minus))), + report_label(lex_number, ParserLabel::Number), + ))(input) + .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) + } + + fn parse_string<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) + } + + // /// Parse an unary term. + // fn parse_unary_prefix_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + // pair(lex_unary_prefix_operators, parse_term)(input).map( + // |(rest_input, (operation, term))| { + // ( + // rest_input, + // Term::UnaryPrefix { + // span: outer_span(input.input, rest_input.input), + // operation, + // term: Box::new(term), + // }, + // ) + // }, + // ) + // } + /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse binary term", - pair( - parse_arithmetic_product, - opt(tuple(( - opt(lex_whitespace), - alt((plus, minus)), - opt(lex_whitespace), - parse_binary_term, - ))), - ), + fn parse_binary_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + pair( + parse_arithmetic_product, + opt(tuple(( + opt(lex_whitespace), + alt((plus, minus)), + opt(lex_whitespace), + parse_binary_term, + ))), )(input) .map(|(rest_input, (lhs, opt))| { ( rest_input, if let Some((ws1, operation, ws2, rhs)) = opt { Term::Binary { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), lhs: Box::new(lhs), ws1, operation, @@ -3381,27 +3479,22 @@ mod new { /// Parse an arithmetic product, i.e. an expression involving /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse arithmetic product", - pair( - parse_arithmetic_factor, - opt(tuple(( - opt(lex_whitespace), - alt((star, slash)), - opt(lex_whitespace), - parse_arithmetic_product, - ))), - ), + fn parse_arithmetic_product<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + pair( + parse_arithmetic_factor, + opt(tuple(( + opt(lex_whitespace), + alt((star, slash)), + opt(lex_whitespace), + parse_arithmetic_product, + ))), )(input) .map(|(rest_input, (lhs, opt))| { ( rest_input, if let Some((ws1, operation, ws2, rhs)) = opt { Term::Binary { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), lhs: Box::new(lhs), ws1, operation, @@ -3415,19 +3508,14 @@ mod new { }) } - fn parse_arithmetic_factor<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse arithmetic factor", - alt(( - parse_tuple_term, - parse_aggregation_term, - parse_primitive_term, - parse_variable, - parse_existential, - )), - )(input) + fn parse_arithmetic_factor<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + report_error(alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + )))(input) } // fn fold_arithmetic_expression<'a>( @@ -3452,29 +3540,24 @@ mod new { // } /// Parse an aggregation term of the form `#sum(…)`. - fn parse_aggregation_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse aggregation term", - tuple(( - recognize(pair(hash, lex_ident)), - open_paren, - opt(lex_whitespace), - parse_term_list, - opt(lex_whitespace), - close_paren, - )), - )(input) + fn parse_aggregation_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + tuple(( + recognize(pair(hash, lex_ident)), + report_label(open_paren, ParserLabel::OpenParen), + opt(lex_whitespace), + parse_list(parse_term), + opt(lex_whitespace), + report_label(close_paren, ParserLabel::CloseParen), + ))(input) .map( |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { ( rest_input, Term::Aggregation { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), operation: Token { kind: TokenKind::Aggregate, - span: operation, + span: operation.input, }, open_paren, ws1, @@ -3488,61 +3571,51 @@ mod new { } /// Parse a `_` - fn parse_blank<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse blank", underscore)(input) - .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) + fn parse_blank<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + underscore(input).map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) } /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. - fn parse_tuple_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse tuple term", parse_tuple)(input) + fn parse_tuple_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + parse_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } /// Parse a map as a term. - fn parse_map_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse map term", parse_map)(input) - .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) + fn parse_map_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } /// Parse a variable. - fn parse_variable<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse variable", recognize(pair(question_mark, lex_ident)))(input).map( - |(rest_input, var)| { - ( - rest_input, - Term::Variable(Token { - kind: TokenKind::Variable, - span: var, - }), - ) - }, - ) + fn parse_variable<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + recognize(pair( + question_mark, + report_label(lex_ident, ParserLabel::Identifier), + ))(input) + .map(|(rest_input, var)| { + ( + rest_input, + Term::Variable(Token { + kind: TokenKind::Variable, + span: var.input, + }), + ) + }) } /// Parse an existential variable. - fn parse_existential<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse existential", - recognize(pair(exclamation_mark, lex_ident)), - )(input) + fn parse_existential<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + recognize(pair( + exclamation_mark, + report_label(lex_ident, ParserLabel::Identifier), + ))(input) .map(|(rest_input, existential)| { ( rest_input, Term::Existential(Token { kind: TokenKind::Existential, - span: existential, + span: existential.input, }), ) }) @@ -3550,17 +3623,24 @@ mod new { // Order of functions is important, because of ordered choice and no backtracking /// Parse the operator for an infix atom. - fn parse_operation_token<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse operation token", - alt((less_equal, greater_equal, equal, unequal, less, greater)), - )(input) + fn parse_operation_token<'a, 'e>(input: Input<'a, 'e>) -> IResult, Token<'a>> { + report_error(alt(( + less_equal, + greater_equal, + equal, + unequal, + less, + greater, + )))(input) } #[cfg(test)] mod tests { + use std::{ + cell::RefCell, + collections::{HashMap, HashSet}, + }; + use nom::error::{convert_error, VerboseError}; use super::*; @@ -3585,14 +3665,17 @@ mod new { }; } - fn convert_located_span_error<'a>(input: Span<'a>, err: VerboseError>) -> String { + fn convert_located_span_error<'a, 'e>( + input: Span<'a>, + err: VerboseError>, + ) -> String { convert_error( *(input.fragment()), VerboseError { errors: err .errors .into_iter() - .map(|(span, tag)| (*(span.fragment()), tag)) + .map(|(span, tag)| (*(span.input.fragment()), tag)) .collect(), }, ) @@ -3604,10 +3687,21 @@ mod new { // tok: &lex_tokens(Span::new("a(B,C).")).unwrap().1, // }; let input = Span::new("a(B,C)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - parse_program::>(input).unwrap().1, + // parse_program::>(input).unwrap().1, + parse_program(input).0, Program { - span: input, + span: input.input, tl_doc_comment: None, statements: vec![Statement::Fact { span: s!(0, 1, "a(B,C)."), @@ -3664,11 +3758,22 @@ mod new { let input = Span::new( r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, ); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - parse_program::>(input).unwrap().1, + // parse_program::>(input).unwrap().1, + parse_program(input).0, Program { tl_doc_comment: None, - span: input, + span: input.input, statements: vec![ Statement::Directive(Directive::Base { span: s!(0, 1, "@base ."), @@ -3898,28 +4003,39 @@ mod new { ) } - #[test] - fn ignore_ws_and_comments() { - let input = Span::new(" Hi %cool comment\n"); - assert_eq!( - super::ignore_ws_and_comments(lex_ident::>)(input), - Ok(( - s!(22, 2, ""), - Token { - kind: TokenKind::Ident, - span: s!(3, 1, "Hi") - } - )) - ) - } + // #[test] + // fn ignore_ws_and_comments() { + // let input = Span::new(" Hi %cool comment\n"); + // assert_eq!( + // super::ignore_ws_and_comments(lex_ident::>)(input), + // Ok(( + // s!(22, 2, ""), + // Token { + // kind: TokenKind::Ident, + // span: s!(3, 1, "Hi") + // } + // )) + // ) + // } #[test] fn fact_with_ws() { let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - parse_program::>(input).unwrap().1, + // parse_program::>(input).unwrap().1, + parse_program(input).0, Program { - span: input, + span: input.input, tl_doc_comment: None, statements: vec![ Statement::Fact { @@ -4023,47 +4139,71 @@ limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, ); - let ast = parse_program::>(input); - match &ast { - Ok((rest_input, ast)) => { - println!("Rest Input:\n{:#?}\n\n{}", rest_input, ast); - assert_eq!( - { - let mut string_from_tokens = String::new(); - for token in get_all_tokens(ast) { - string_from_tokens.push_str(token.span().fragment()); - } - println!("String from Tokens:\n"); - println!("{}\n", string_from_tokens); - string_from_tokens - }, - *input.fragment(), - ); - } - Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - println!( - "PRINT ERROR:\n\n{}", - convert_located_span_error(input, err.clone()) - ); - } - Err(err) => panic!("{}", err), - } - assert!(ast.is_ok()); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; + // let ast = parse_program::>(input); + let (ast, _) = parse_program(input); + println!("{}", ast); + assert_eq!( + { + let mut string_from_tokens = String::new(); + for token in get_all_tokens(&ast) { + string_from_tokens.push_str(token.span().fragment()); + } + println!("String from Tokens:\n"); + println!("{}\n", string_from_tokens); + string_from_tokens + }, + *input.input.fragment(), + ); } #[test] fn parser_test() { - let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); + let file = "../testfile2.rls"; + let str = std::fs::read_to_string(file).expect("testfile not found"); let input = Span::new(str.as_str()); - let result = parse_program::>(input); - match result { - Ok(ast) => println!("{}", ast.1), - Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - println!("{}", convert_located_span_error(input, err)) - } - Err(_) => (), + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_program::>(input); + let (ast, errors) = parse_program(input); + println!("{}\n\n{:#?}", ast, errors); + let mut error_map: HashMap> = HashMap::new(); + for error in errors { + if let Some(set) = error_map.get_mut(&error.0) { + set.insert(error.1); + } else { + let mut set = HashSet::new(); + set.insert(error.1); + error_map.insert(error.0, set); + }; + } + dbg!(&error_map); + // assert!(false); + let lines: Vec<_> = str.lines().collect(); + for (pos, str) in error_map { + // println!("{pos:?}, {str:?}"); + println!("error: {str:?}"); + println!("--> {}:{}:{}", file, pos.line, pos.column); + println!("{}", lines.get((pos.line - 1) as usize).unwrap()); + println!("{0:>1$}\n", "^", pos.column as usize) } - assert!(false); } #[test] @@ -4072,7 +4212,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("42")); + let input = Span::new("42"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Primitive(Primitive::Number { @@ -4087,7 +4239,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("35+7")); + let input = Span::new("35+7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4116,7 +4280,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("6*7")); + let input = Span::new("6*7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4145,7 +4321,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("49-7")); + let input = Span::new("49-7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4174,7 +4362,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("84/2")); + let input = Span::new("84/2"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4203,7 +4403,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("5*7+7")); + let input = Span::new("5*7+7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4246,7 +4458,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("7+5*7")); + let input = Span::new("7+5*7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4290,30 +4514,42 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { let input = Span::new("(15+3*2-(7+35)*8)/3"); - let result = parse_term::>(input); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); - match result { - Ok(ast) => { - println!("{}", ast.1); - ast.1 - } - Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - panic!( - "{}", - convert_error( - *(input.fragment()), - VerboseError { - errors: err - .errors - .into_iter() - .map(|(span, tag)| { (*(span.fragment()), tag) }) - .collect() - } - ) - ) - } - Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), - } + // match result { + // Ok(ast) => { + // println!("{}", ast.1); + // ast.1 + // } + // Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + // panic!( + // "{}", + // convert_error( + // *(input.input.fragment()), + // VerboseError { + // errors: err + // .errors + // .into_iter() + // .map(|(span, tag)| { (*(span.fragment()), tag) }) + // .collect() + // } + // ) + // ) + // } + // Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), + // } + result.unwrap().1 }, Term::Binary { span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), @@ -4449,7 +4685,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("15+3*2-(7+35)*8/3")); + let input = Span::new("15+3*2-(7+35)*8/3"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4592,17 +4840,43 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { let input = Span::new("e42"); - parse_exponent::>(input) + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // parse_exponent::>(input) + parse_exponent(input).unwrap().1 }, - Ok(( - s!(3, 1, ""), - Exponent { - e: T! {TokenKind::Exponent, 0,1,"e"}, - sign: None, - number: T! {TokenKind::Number, 1,1,"42"} - } - )) + Exponent { + e: T! {TokenKind::Exponent, 0,1,"e"}, + sign: None, + number: T! {TokenKind::Number, 1,1,"42"} + } ) } + + #[test] + fn missing_dot() { + let input = Span::new("some(Fact\nSome other, Fact.\nthird(fact)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + let result = parse_program(input); + println!("{}\n\n{:#?}", result.0, result.1); + // assert!(false); + } } } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 722aff57f..b9bec1e4e 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -20,6 +20,7 @@ pub(crate) trait AstNode: std::fmt::Debug + Display { fn name(&self) -> String; } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub(crate) struct Position { pub(crate) offset: usize, pub(crate) line: u32, @@ -33,6 +34,34 @@ pub(crate) struct List<'a, T> { // ([ws]?[,][ws]?[T])* pub(crate) rest: Option>, Token<'a>, Option>, T)>>, } +impl List<'_, T> { + pub fn to_vec(&self) -> Vec { + let mut vec = Vec::new(); + vec.push(self.first.clone()); + if let Some(rest) = &self.rest { + for (_, _, _, item) in rest { + vec.push(item.clone()); + } + } + vec + } +} +impl std::iter::IntoIterator for List<'_, T> { + type Item = T; + + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + let mut vec = Vec::new(); + vec.push(self.first); + if let Some(rest) = self.rest { + for (_, _, _, item) in rest { + vec.push(item); + } + } + vec.into_iter() + } +} impl AstNode for List<'_, T> { fn children(&self) -> Option> { let mut vec = Vec::new(); diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index a7397f7ec..334e1ee4a 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -43,12 +43,21 @@ impl AstNode for Program<'_> { } fn name(&self) -> String { - format!( - "Program \x1b[34m@{}:{} \x1b[92m\"{}…\"\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - &self.span.fragment()[..60], - ) + if self.span.fragment().len() < 60 { + format!( + "Program \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + &self.span.fragment(), + ) + } else { + format!( + "Program \x1b[34m@{}:{} \x1b[92m{:?}[…]\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + &self.span.fragment()[..60], + ) + } } } impl std::fmt::Display for Program<'_> { diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index e26ec39dd..a2b3a1c9d 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -27,6 +27,7 @@ pub(crate) enum Statement<'a> { }, Whitespace(Token<'a>), Comment(Token<'a>), + Error(Token<'a>), } impl AstNode for Statement<'_> { fn children(&self) -> Option> { @@ -84,6 +85,7 @@ impl AstNode for Statement<'_> { } Statement::Whitespace(ws) => Some(vec![ws]), Statement::Comment(c) => Some(vec![c]), + Statement::Error(t) => Some(vec![t]), } } @@ -94,6 +96,7 @@ impl AstNode for Statement<'_> { Statement::Rule { span, .. } => *span, Statement::Whitespace(ws) => ws.span(), Statement::Comment(c) => c.span(), + Statement::Error(t) => t.span, } } @@ -128,6 +131,7 @@ impl AstNode for Statement<'_> { Statement::Rule { .. } => name!("Rule"), Statement::Whitespace(_) => name!("Whitespace"), Statement::Comment(_) => name!("Comment"), + Statement::Error(_) => name!("ERROR"), } } } diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index a65730809..6850a69c2 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -1,19 +1,24 @@ -use std::num::{ParseFloatError, ParseIntError}; +use std::{ + num::{ParseFloatError, ParseIntError}, + ops::Range, + str::{CharIndices, Chars}, +}; use nemo_physical::datavalues::DataValueCreationError; use nom::{ error::{ErrorKind, FromExternalError}, - IResult, + AsBytes, IResult, InputIter, InputLength, InputTake, InputTakeAtPosition, }; use nom_locate::LocatedSpan; use thiserror::Error; use crate::{ io::formats::import_export::ImportExportError, + io::lexer::ParserState, model::rule_model::{Aggregate, Constraint, Literal, Term}, }; -use super::Variable; +use super::{ast::Position, Variable}; /// A [LocatedSpan] over the input. pub(super) type Span<'a> = LocatedSpan<&'a str>; @@ -434,7 +439,7 @@ impl<'a> Tokens<'a> { Tokens { tok: vec } } } -impl<'a> nom::AsBytes for Tokens<'a> { +impl<'a> AsBytes for Tokens<'a> { fn as_bytes(&self) -> &[u8] { todo!() } @@ -471,7 +476,7 @@ impl<'a, T> nom::FindToken for Tokens<'a> { todo!() } } -impl<'a> nom::InputIter for Tokens<'a> { +impl<'a> InputIter for Tokens<'a> { type Item = &'a Token<'a>; type Iter = std::iter::Enumerate<::std::slice::Iter<'a, Token<'a>>>; @@ -506,7 +511,7 @@ impl<'a> nom::InputLength for Tokens<'a> { self.tok.len() } } -impl<'a> nom::InputTake for Tokens<'a> { +impl<'a> InputTake for Tokens<'a> { fn take(&self, count: usize) -> Self { Tokens { tok: &self.tok[0..count], @@ -584,3 +589,251 @@ impl<'a, R> nom::Slice for Tokens<'a> { todo!() } } + +#[derive(Debug, Clone, Copy)] +pub(crate) struct Input<'a, 's> { + pub(crate) input: crate::io::lexer::Span<'a>, + pub(crate) parser_state: ParserState<'s>, +} +impl<'a, 's> Input<'a, 's> { + fn new(input: &'a str, errors: ParserState<'s>) -> Input<'a, 's> { + Input { + input: Span::new(input), + parser_state: errors, + } + } +} +impl ToRange for Input<'_, '_> { + fn to_range(&self) -> Range { + self.input.to_range() + } +} + +impl AsBytes for Input<'_, '_> { + fn as_bytes(&self) -> &[u8] { + self.input.fragment().as_bytes() + } +} + +impl<'a, 's> nom::Compare> for Input<'a, 's> { + fn compare(&self, t: Input) -> nom::CompareResult { + self.input.compare(t.as_bytes()) + } + + fn compare_no_case(&self, t: Input) -> nom::CompareResult { + self.input.compare_no_case(t.as_bytes()) + } +} +impl nom::Compare<&str> for Input<'_, '_> { + fn compare(&self, t: &str) -> nom::CompareResult { + self.input.compare(t) + } + + fn compare_no_case(&self, t: &str) -> nom::CompareResult { + self.input.compare_no_case(t) + } +} + +impl nom::ExtendInto for Input<'_, '_> { + type Item = char; + + type Extender = String; + + fn new_builder(&self) -> Self::Extender { + self.input.new_builder() + } + + fn extend_into(&self, acc: &mut Self::Extender) { + self.input.extend_into(acc) + } +} + +impl nom::FindSubstring<&str> for Input<'_, '_> { + fn find_substring(&self, substr: &str) -> Option { + self.input.find_substring(substr) + } +} + +impl<'a, 'e, T> nom::FindToken for Input<'a, 'e> +where + &'a str: nom::FindToken, +{ + fn find_token(&self, token: T) -> bool { + self.input.find_token(token) + } +} + +impl<'a, 's> InputIter for Input<'a, 's> { + type Item = char; + + type Iter = CharIndices<'a>; + + type IterElem = Chars<'a>; + + fn iter_indices(&self) -> Self::Iter { + todo!() + } + + fn iter_elements(&self) -> Self::IterElem { + todo!() + } + + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn slice_index(&self, count: usize) -> Result { + self.input.slice_index(count) + } +} + +impl nom::InputLength for Input<'_, '_> { + fn input_len(&self) -> usize { + self.input.input_len() + } +} + +impl InputTake for Input<'_, '_> { + fn take(&self, count: usize) -> Self { + Input { + input: self.input.take(count), + parser_state: self.parser_state, + } + } + + fn take_split(&self, count: usize) -> (Self, Self) { + let (first, second) = self.input.take_split(count); + ( + Input { + input: first, + parser_state: self.parser_state, + }, + Input { + input: second, + parser_state: self.parser_state, + }, + ) + } +} + +impl nom::InputTakeAtPosition for Input<'_, '_> { + type Item = char; + + fn split_at_position>( + &self, + predicate: P, + ) -> nom::IResult + where + P: Fn(Self::Item) -> bool, + { + match self.input.position(predicate) { + Some(n) => Ok(self.take_split(n)), + None => Err(nom::Err::Incomplete(nom::Needed::new(1))), + } + } + + fn split_at_position1>( + &self, + predicate: P, + e: nom::error::ErrorKind, + ) -> nom::IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn split_at_position_complete>( + &self, + predicate: P, + ) -> nom::IResult + where + P: Fn(Self::Item) -> bool, + { + match self.split_at_position(predicate) { + Err(nom::Err::Incomplete(_)) => Ok(self.take_split(self.input_len())), + res => res, + } + } + + fn split_at_position1_complete>( + &self, + predicate: P, + e: nom::error::ErrorKind, + ) -> nom::IResult + where + P: Fn(Self::Item) -> bool, + { + match self.input.fragment().position(predicate) { + Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))), + Some(n) => Ok(self.take_split(n)), + None => { + if self.input.fragment().input_len() == 0 { + Err(nom::Err::Error(E::from_error_kind(*self, e))) + } else { + Ok(self.take_split(self.input_len())) + } + } + } + } +} + +impl nom::Offset for Input<'_, '_> { + fn offset(&self, second: &Self) -> usize { + self.input.offset(&second.input) + } +} + +impl nom::ParseTo for Input<'_, '_> { + fn parse_to(&self) -> Option { + todo!() + } +} + +impl<'a, 'e, R> nom::Slice for Input<'a, 'e> +where + &'a str: nom::Slice, +{ + fn slice(&self, range: R) -> Self { + Input { + input: self.input.slice(range), + parser_state: self.parser_state, + } + } +} + +pub(crate) trait ToRange { + fn to_range(&self) -> Range; +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) enum ParserLabel { + Rule, + Fact, + Directive, + Dot, + Arrow, + // Head, + // Body, + Comma, + Iri, + Prefix, + Identifier, + OpenParen, + CloseParen, + OpenBrace, + CloseBrace, + OpenBracket, + ClosePracket, + Equal, + Number, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) struct Label { + pub(crate) label: ParserLabel, + pub(crate) pos: Position, +} diff --git a/testfile.rls b/testfile.rls index b5f5d6db9..07e551c67 100644 --- a/testfile.rls +++ b/testfile.rls @@ -9,6 +9,10 @@ father(alice, bob). mother(bob, carla). father(bob, darius). mother(alice, carla). +father(, ). +mother(, ). +father(, ). +mother(, ). % Rules: parent(?X, ?Y) :- mother(?X, ?Y). @@ -16,3 +20,16 @@ parent(?X, ?Y) :- father(?X, ?Y). parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . a(?x) :- b(?x, B) . s(4) :- s(3). + +ancestor(?X,?Y) :- parent(?X, ?Y) . +ancestor(?X,?Z) :- ancestor(?X, ?Y), parent(?Y, ?Z) . +ancestorOfAlice(?X) :- ancestor(alice,?X). + +mydata(a,b) . +mydata("hello", 42) . +mydata(3.14, "2023-06-19"^^) . + +resulta(?N + 10) :- mydata(_, ?N) . +resultB(?R) :- mydata(?X, ?Y), ?R = SQRT(?X) . +result(?D) :- mydata(?X, _), ?D = DATATYPE(?X) . + diff --git a/testfile2.rls b/testfile2.rls new file mode 100644 index 000000000..32ee5c5af --- /dev/null +++ b/testfile2.rls @@ -0,0 +1,11 @@ +% Facts: +father(alice, bob). +mother(bob, carla). +father(bob, darius). +mother(alice, carla). + +% Rules: +parent(?X, ?Y) :- mother(?X, ?Y). +parent(?X, ?Y) :- father(?X, ?Y). +parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . + From e814e3e335185f09648b38079ca3b9b2dedc43f6 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 16 May 2024 09:07:00 +0200 Subject: [PATCH 039/214] Refactor to use type annotation on vecs instead of trivial cast on the first element --- nemo/src/io/parser/ast.rs | 5 ++--- nemo/src/io/parser/ast/atom.rs | 5 ++--- nemo/src/io/parser/ast/directive.rs | 25 ++++++++++--------------- nemo/src/io/parser/ast/map.rs | 10 ++++------ nemo/src/io/parser/ast/program.rs | 5 ++--- nemo/src/io/parser/ast/statement.rs | 10 ++++------ nemo/src/io/parser/ast/term.rs | 20 ++++++++------------ nemo/src/io/parser/ast/tuple.rs | 5 ++--- 8 files changed, 34 insertions(+), 51 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index b9bec1e4e..539d991ce 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -64,9 +64,8 @@ impl std::iter::IntoIterator for List<'_, T> { } impl AstNode for List<'_, T> { fn children(&self) -> Option> { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(&self.first as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(&self.first); if let Some(rest) = &self.rest { for (ws1, delim, ws2, item) in rest { if let Some(ws) = ws1 { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 52e8d5383..75a9d4d4d 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -36,9 +36,8 @@ impl AstNode for Atom<'_> { rhs, .. } => { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(lhs as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(lhs); if let Some(ws) = ws1 { vec.push(ws); }; diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index c75351256..fd13de1d8 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -78,10 +78,9 @@ impl AstNode for Directive<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); if let Some(ws) = ws1 { @@ -105,10 +104,9 @@ impl AstNode for Directive<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); if let Some(ws) = ws1 { @@ -138,10 +136,9 @@ impl AstNode for Directive<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); vec.push(ws1); @@ -173,10 +170,9 @@ impl AstNode for Directive<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); vec.push(ws1); @@ -204,10 +200,9 @@ impl AstNode for Directive<'_> { ws2, dot, } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); vec.push(ws1); diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 669963ae8..626d93aa6 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -17,10 +17,9 @@ pub(crate) struct Map<'a> { } impl AstNode for Map<'_> { fn children(&self) -> Option> { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(identifier) = &self.identifier { - #[allow(trivial_casts)] - vec.push(identifier as &dyn AstNode); + vec.push(identifier); }; if let Some(ws) = &self.ws1 { vec.push(ws); @@ -78,9 +77,8 @@ pub(crate) struct Pair<'a, K, V> { } impl AstNode for Pair<'_, K, V> { fn children(&self) -> Option> { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(&self.key as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(&self.key); if let Some(ws) = &self.ws1 { vec.push(ws); } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 334e1ee4a..466be7577 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -12,10 +12,9 @@ pub(crate) struct Program<'a> { } impl AstNode for Program<'_> { fn children(&self) -> Option> { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = &self.tl_doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; // NOTE: The current implementation puts the doc comment and all the // statements in the same vec, so there is no need to implement AstNode diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index a2b3a1c9d..383b58272 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -40,10 +40,9 @@ impl AstNode for Statement<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(atom); if let Some(ws) = ws { @@ -63,10 +62,9 @@ impl AstNode for Statement<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(head); if let Some(ws) = ws1 { diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 8be1e79e7..17eddee16 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -53,9 +53,8 @@ impl AstNode for Term<'_> { rhs, .. } => { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(&**lhs as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(&**lhs); if let Some(ws) = ws1 { vec.push(ws); }; @@ -75,9 +74,8 @@ impl AstNode for Term<'_> { close_paren, .. } => { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(operation as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(operation); vec.push(open_paren); if let Some(ws) = ws1 { vec.push(ws); @@ -194,10 +192,9 @@ impl AstNode for Primitive<'_> { exponent, .. } => { - let mut vec = Vec::new(); - #[allow(trivial_casts)] + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(s) = sign { - vec.push(s as &dyn AstNode); + vec.push(s); } if let Some(b) = before { vec.push(b); @@ -284,9 +281,8 @@ pub(crate) struct Exponent<'a> { } impl AstNode for Exponent<'_> { fn children(&self) -> Option> { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(&self.e as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(&self.e); if let Some(s) = &self.sign { vec.push(s); }; diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index a49f67012..4b9e9da9f 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -16,10 +16,9 @@ pub(crate) struct Tuple<'a> { } impl AstNode for Tuple<'_> { fn children(&self) -> Option> { - let mut vec = Vec::new(); - #[allow(trivial_casts)] + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(identifier) = &self.identifier { - vec.push(identifier as &dyn AstNode); + vec.push(identifier); } if let Some(ws) = &self.ws1 { vec.push(ws); From 7730190321b004904deaaa7b77ec32887065f068 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 16 May 2024 11:31:25 +0200 Subject: [PATCH 040/214] Fix whitespace parsing within statements. Now also comments are allowed and do not produce an error --- nemo/src/io/parser.rs | 264 +++++++++++++++++----------- nemo/src/io/parser/ast.rs | 67 ++++--- nemo/src/io/parser/ast/atom.rs | 6 +- nemo/src/io/parser/ast/directive.rs | 32 ++-- nemo/src/io/parser/ast/map.rs | 12 +- nemo/src/io/parser/ast/statement.rs | 10 +- nemo/src/io/parser/ast/term.rs | 10 +- nemo/src/io/parser/ast/tuple.rs | 8 +- 8 files changed, 248 insertions(+), 161 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 758bb9061..3a1394708 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2434,6 +2434,7 @@ mod new { use super::ast::{ atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, + Wsoc, }; use super::types::{Input, Label, ParserLabel, ToRange}; use crate::io::lexer::{ @@ -2649,6 +2650,34 @@ mod new { } } + fn wsoc0<'a, 's>(input: Input<'a, 's>) -> IResult, Option>> { + many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { + if vec.is_empty() { + (rest_input, None) + } else { + ( + rest_input, + Some(Wsoc { + span: outer_span(input.input, rest_input.input), + token: vec, + }), + ) + } + }) + } + + fn wsoc1<'a, 's>(input: Input<'a, 's>) -> IResult, Wsoc<'a>> { + many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { + ( + rest_input, + Wsoc { + span: outer_span(input.input, rest_input.input), + token: vec, + }, + ) + }) + } + /// Parse a full program consisting of directives, facts, rules and comments. fn parse_program<'a, 'e>(input: Input<'a, 'e>) -> (Program<'a>, Vec) { let (rest_input, (tl_doc_comment, statements)) = all_consuming(pair( @@ -2689,10 +2718,11 @@ mod new { /// Parse a fact of the form `predicateName(term1, term2, …).` fn parse_fact<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + dbg!(&input.parser_state.labels); tuple(( opt(lex_doc_comment), parse_normal_atom, - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2722,11 +2752,11 @@ mod new { tuple(( opt(lex_doc_comment), parse_head, - opt(lex_whitespace), + wsoc0, report_label(arrow, ParserLabel::Arrow), - opt(lex_whitespace), + wsoc0, parse_body, - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2787,9 +2817,9 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Base), )), - opt(lex_whitespace), + wsoc0, report_label(lex_iri, ParserLabel::Iri), - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2829,11 +2859,11 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Prefix), )), - opt(lex_whitespace), + wsoc0, report_label(recognize(pair(lex_ident, colon)), ParserLabel::Prefix), - opt(lex_whitespace), + wsoc0, report_label(lex_iri, ParserLabel::Iri), - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2880,13 +2910,13 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Import), )), - lex_whitespace, + wsoc1, report_label(lex_ident, ParserLabel::Identifier), - opt(lex_whitespace), + wsoc0, report_label(arrow, ParserLabel::Arrow), - opt(lex_whitespace), + wsoc0, parse_map, - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2932,13 +2962,13 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Export), )), - lex_whitespace, + wsoc1, report_label(lex_ident, ParserLabel::Identifier), - opt(lex_whitespace), + wsoc0, report_label(arrow, ParserLabel::Arrow), - opt(lex_whitespace), + wsoc0, parse_map, - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2984,9 +3014,9 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Output), )), - lex_whitespace, + wsoc1, opt(parse_list(lex_ident)), - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -3047,25 +3077,18 @@ mod new { parse_t: fn(Input<'a, 'e>) -> IResult, T>, ) -> impl Fn(Input<'a, 'e>) -> IResult, List<'a, T>> { move |input: Input<'a, 'e>| { - pair( - parse_t, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_t, - ))), - )(input) - .map(|(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input.input, rest_input.input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }) + pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t))))(input).map( + |(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input.input, rest_input.input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) + }, + ) } } @@ -3107,26 +3130,21 @@ mod new { /// Parse an "infix atom" of the form `term1 term2`. /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. fn parse_infix_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - tuple(( - parse_term, - opt(lex_whitespace), - parse_operation_token, - opt(lex_whitespace), - parse_term, - ))(input) - .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { - ( - rest_input, - Atom::InfixAtom { - span: outer_span(input.input, rest_input.input), - lhs, - ws1, - operation, - ws2, - rhs, - }, - ) - }) + tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term))(input).map( + |(rest_input, (lhs, ws1, operation, ws2, rhs))| { + ( + rest_input, + Atom::InfixAtom { + span: outer_span(input.input, rest_input.input), + lhs, + ws1, + operation, + ws2, + rhs, + }, + ) + }, + ) } /// Parse a tuple with an optional name, like `ident(term1, term2)` @@ -3134,11 +3152,11 @@ mod new { fn parse_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { tuple(( opt(lex_ident), - opt(lex_whitespace), + wsoc0, open_paren, - opt(lex_whitespace), + wsoc0, opt(parse_list(parse_term)), - opt(lex_whitespace), + wsoc0, report_label(close_paren, ParserLabel::CloseParen), ))(input) .map( @@ -3165,11 +3183,11 @@ mod new { fn parse_named_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { tuple(( lex_ident, - opt(lex_whitespace), + wsoc0, report_label(open_paren, ParserLabel::OpenParen), - opt(lex_whitespace), + wsoc0, opt(parse_list(parse_term)), - opt(lex_whitespace), + wsoc0, report_label(close_paren, ParserLabel::CloseParen), ))(input) .map( @@ -3196,11 +3214,11 @@ mod new { fn parse_map<'a, 'e>(input: Input<'a, 'e>) -> IResult, Map<'a>> { tuple(( opt(lex_ident), - opt(lex_whitespace), + wsoc0, open_brace, - opt(lex_whitespace), + wsoc0, opt(parse_list(parse_pair)), - opt(lex_whitespace), + wsoc0, report_label(close_brace, ParserLabel::CloseBrace), ))(input) .map( @@ -3266,9 +3284,9 @@ mod new { ) -> IResult, Pair<'a, Term<'a>, Term<'a>>> { tuple(( parse_term, - opt(lex_whitespace), + wsoc0, report_label(equal, ParserLabel::Equal), - opt(lex_whitespace), + wsoc0, parse_term, ))(input) .map(|(rest_input, (key, ws1, equal, ws2, value))| { @@ -3451,12 +3469,7 @@ mod new { fn parse_binary_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { pair( parse_arithmetic_product, - opt(tuple(( - opt(lex_whitespace), - alt((plus, minus)), - opt(lex_whitespace), - parse_binary_term, - ))), + opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3483,9 +3496,9 @@ mod new { pair( parse_arithmetic_factor, opt(tuple(( - opt(lex_whitespace), + wsoc0, alt((star, slash)), - opt(lex_whitespace), + wsoc0, parse_arithmetic_product, ))), )(input) @@ -3544,9 +3557,9 @@ mod new { tuple(( recognize(pair(hash, lex_ident)), report_label(open_paren, ParserLabel::OpenParen), - opt(lex_whitespace), + wsoc0, parse_list(parse_term), - opt(lex_whitespace), + wsoc0, report_label(close_paren, ParserLabel::CloseParen), ))(input) .map( @@ -3782,9 +3795,12 @@ mod new { kind: TokenKind::Base, span: s!(0, 1, "@base"), }, - ws1: Some(Token { - kind: TokenKind::Whitespace, - span: s!(5, 1, " ") + ws1: Some(Wsoc { + span: s!(5, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(5, 1, " ") + }] }), base_iri: Token { kind: TokenKind::Iri, @@ -3807,9 +3823,12 @@ mod new { kind: TokenKind::Prefix, span: s!(32, 1, "@prefix"), }, - ws1: Some(Token { - kind: TokenKind::Whitespace, - span: s!(39, 1, " ") + ws1: Some(Wsoc { + span: s!(39, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(39, 1, " ") + }] }), prefix: Token { kind: TokenKind::Ident, @@ -3837,9 +3856,12 @@ mod new { kind: TokenKind::Import, span: s!(85, 1, "@import"), }, - ws1: Token { - kind: TokenKind::Whitespace, - span: s!(92, 1, " "), + ws1: Wsoc { + span: s!(91, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(92, 1, " "), + }] }, predicate: Token { kind: TokenKind::Ident, @@ -3903,9 +3925,12 @@ mod new { kind: TokenKind::Export, span: s!(136, 1, "@export"), }, - ws1: Token { - kind: TokenKind::Whitespace, + ws1: Wsoc { span: s!(143, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(143, 1, " "), + }] }, predicate: Token { kind: TokenKind::Ident, @@ -3949,9 +3974,12 @@ mod new { kind: TokenKind::Output, span: s!(153, 1, "@output") }, - ws1: Token { - kind: TokenKind::Whitespace, + ws1: Wsoc { span: s!(160, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(160, 1, " "), + }] }, predicates: Some(List { span: s!(161, 1, "a, b, c"), @@ -3966,9 +3994,12 @@ mod new { kind: TokenKind::Comma, span: s!(162, 1, ","), }, - Some(Token { - kind: TokenKind::Whitespace, + Some(Wsoc { span: s!(163, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(163, 1, " "), + }] }), Token { kind: TokenKind::Ident, @@ -3981,9 +4012,12 @@ mod new { kind: TokenKind::Comma, span: s!(165, 1, ","), }, - Some(Token { - kind: TokenKind::Whitespace, + Some(Wsoc { span: s!(166, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(166, 1, " "), + }] }), Token { kind: TokenKind::Ident, @@ -4066,9 +4100,12 @@ mod new { kind: TokenKind::Comma, span: s!(9, 1, ","), }, - Some(Token { - kind: TokenKind::Whitespace, + Some(Wsoc { span: s!(10, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(10, 1, " "), + }] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, @@ -4081,9 +4118,12 @@ mod new { kind: TokenKind::Comma, span: s!(15, 1, ","), }, - Some(Token { - kind: TokenKind::Whitespace, + Some(Wsoc { span: s!(16, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(16, 1, " "), + }] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, @@ -4098,9 +4138,12 @@ mod new { span: s!(27, 1, ")") }, }), - ws: Some(Token { - kind: TokenKind::Whitespace, + ws: Some(Wsoc { span: s!(28, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(28, 1, " "), + }] }), dot: Token { kind: TokenKind::Dot, @@ -4878,5 +4921,22 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters println!("{}\n\n{:#?}", result.0, result.1); // assert!(false); } + + #[test] + fn wsoc() { + let input = Span::new(" \t\n % first comment\n % second comment\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + dbg!(wsoc0(input)); + dbg!(wsoc1(input)); + } } } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 539d991ce..3ed70e89d 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -27,12 +27,51 @@ pub(crate) struct Position { pub(crate) column: u32, } +/// Whitespace or Comment token +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Wsoc<'a> { + pub(crate) span: Span<'a>, + pub(crate) token: Vec> +} +impl AstNode for Wsoc<'_> { + fn children(&self) -> Option> { + if self.token.is_empty() { + None + } else { + #[allow(trivial_casts)] + Some(self.token.iter().map(|t| t as &dyn AstNode).collect()) + } + } + + fn span(&self) -> Span { + self.span + } + + fn position(&self) -> Position { + Position { offset: self.span.location_offset(), line: self.span.location_line(), column: self.span.get_utf8_column() as u32 } + } + + fn is_token(&self) -> bool { + false + } + + fn name(&self) -> String { + format!("Wsoc \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", self.span.location_line(), self.span.get_utf8_column(), self.span.fragment()) + } +} +impl Display for Wsoc<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + + #[derive(Debug, Clone, PartialEq)] pub(crate) struct List<'a, T> { pub(crate) span: Span<'a>, pub(crate) first: T, // ([ws]?[,][ws]?[T])* - pub(crate) rest: Option>, Token<'a>, Option>, T)>>, + pub(crate) rest: Option>, Token<'a>, Option>, T)>>, } impl List<'_, T> { pub fn to_vec(&self) -> Vec { @@ -178,19 +217,13 @@ mod test { kw: Token{ kind:TokenKind::Prefix, span:s!(125,4,"@prefix") - } , - ws1:Some(Token{ - kind:TokenKind::Whitespace, - span:s!(132,4," ") - }) , + }, + ws1:Some(Wsoc {span: s!(132, 4, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(132,4," ")}] }), prefix: Token { kind: TokenKind::PrefixIdent, span: s!(133, 4, "xsd:"), }, - ws2: Some(Token{ - kind:TokenKind::Whitespace, - span:s!(137,4," ") - }), + ws2: Some(Wsoc {span: s!(137, 4, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(137,4," ")}] }), prefix_iri: Token { kind: TokenKind::Iri, span: s!(138, 4, ""), @@ -239,10 +272,7 @@ mod test { kind: TokenKind::Comma, span: s!(242, 8, ","), }, - Some(Token { - kind: TokenKind::Whitespace, - span: s!(243, 8, " "), - }), + Some(Wsoc {span: s!(243, 8, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(243,8," "),}] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(244, 8, "ConstB"), @@ -296,9 +326,9 @@ mod test { }), rest: None, }, - ws1: Some(Token{kind:TokenKind::Whitespace,span:s!(310,12," ")}), + ws1: Some(Wsoc {span: s!(310, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(310,12," ")}] }), arrow: Token{kind:TokenKind::Arrow, span:s!(311,12,":-")}, - ws2: Some(Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}), + ws2: Some(Wsoc {span: s!(313, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}] }), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), first: Atom::Positive(Tuple { @@ -322,10 +352,7 @@ mod test { kind: TokenKind::Comma, span: s!(333, 12, ","), }, - Some(Token { - kind: TokenKind::Whitespace, - span: s!(334, 12, " "), - }), + Some(Wsoc {span: s!(334, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(334,12," "),}] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(335, 12, "ConstB"), diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 75a9d4d4d..46dbf0ac4 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,6 +1,6 @@ use super::term::Term; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode}; +use super::{ast_to_ascii_tree, AstNode, Wsoc}; use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -16,9 +16,9 @@ pub(crate) enum Atom<'a> { InfixAtom { span: Span<'a>, lhs: Term<'a>, - ws1: Option>, + ws1: Option>, operation: Token<'a>, - ws2: Option>, + ws2: Option>, rhs: Term<'a>, }, Map(Map<'a>), diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index fd13de1d8..8b04c3c91 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,5 +1,5 @@ use super::map::Map; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -10,9 +10,9 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Option>, + ws1: Option>, base_iri: Token<'a>, - ws2: Option>, + ws2: Option>, dot: Token<'a>, }, // "@prefix wikidata: ." @@ -20,11 +20,11 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Option>, + ws1: Option>, prefix: Token<'a>, - ws2: Option>, + ws2: Option>, prefix_iri: Token<'a>, - ws3: Option>, + ws3: Option>, dot: Token<'a>, }, // "@import table :- csv{resource="path/to/file.csv"} ." @@ -32,13 +32,13 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Token<'a>, + ws1: Wsoc<'a>, predicate: Token<'a>, - ws2: Option>, + ws2: Option>, arrow: Token<'a>, - ws3: Option>, + ws3: Option>, map: Map<'a>, - ws4: Option>, + ws4: Option>, dot: Token<'a>, }, // "@export result :- turtle{resource="out.ttl"} ." @@ -46,13 +46,13 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Token<'a>, + ws1: Wsoc<'a>, predicate: Token<'a>, - ws2: Option>, + ws2: Option>, arrow: Token<'a>, - ws3: Option>, + ws3: Option>, map: Map<'a>, - ws4: Option>, + ws4: Option>, dot: Token<'a>, }, // "@output A, B, C." @@ -60,9 +60,9 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Token<'a>, + ws1: Wsoc<'a>, predicates: Option>>, - ws2: Option>, + ws2: Option>, dot: Token<'a>, }, } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 626d93aa6..473d01e92 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,5 +1,5 @@ use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; use std::fmt::Debug; @@ -8,11 +8,11 @@ use std::fmt::Debug; pub(crate) struct Map<'a> { pub(crate) span: Span<'a>, pub(crate) identifier: Option>, - pub(crate) ws1: Option>, + pub(crate) ws1: Option>, pub(crate) open_brace: Token<'a>, - pub(crate) ws2: Option>, + pub(crate) ws2: Option>, pub(crate) pairs: Option, Term<'a>>>>, - pub(crate) ws3: Option>, + pub(crate) ws3: Option>, pub(crate) close_brace: Token<'a>, } impl AstNode for Map<'_> { @@ -70,9 +70,9 @@ impl std::fmt::Display for Map<'_> { pub(crate) struct Pair<'a, K, V> { pub(crate) span: Span<'a>, pub(crate) key: K, - pub(crate) ws1: Option>, + pub(crate) ws1: Option>, pub(crate) equal: Token<'a>, - pub(crate) ws2: Option>, + pub(crate) ws2: Option>, pub(crate) value: V, } impl AstNode for Pair<'_, K, V> { diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 383b58272..a0762d0c8 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -1,6 +1,6 @@ use super::atom::Atom; use super::directive::Directive; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -11,18 +11,18 @@ pub(crate) enum Statement<'a> { span: Span<'a>, doc_comment: Option>, atom: Atom<'a>, - ws: Option>, + ws: Option>, dot: Token<'a>, }, Rule { span: Span<'a>, doc_comment: Option>, head: List<'a, Atom<'a>>, - ws1: Option>, + ws1: Option>, arrow: Token<'a>, - ws2: Option>, + ws2: Option>, body: List<'a, Atom<'a>>, - ws3: Option>, + ws3: Option>, dot: Token<'a>, }, Whitespace(Token<'a>), diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 17eddee16..060269687 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,6 +1,6 @@ use super::map::Map; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -18,18 +18,18 @@ pub(crate) enum Term<'a> { Binary { span: Span<'a>, lhs: Box>, - ws1: Option>, + ws1: Option>, operation: Token<'a>, - ws2: Option>, + ws2: Option>, rhs: Box>, }, Aggregation { span: Span<'a>, operation: Token<'a>, open_paren: Token<'a>, - ws1: Option>, + ws1: Option>, terms: Box>>, - ws2: Option>, + ws2: Option>, close_paren: Token<'a>, }, Tuple(Box>), diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 4b9e9da9f..f074eb6a1 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -1,5 +1,5 @@ use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -7,11 +7,11 @@ use ascii_tree::write_tree; pub(crate) struct Tuple<'a> { pub(crate) span: Span<'a>, pub(crate) identifier: Option>, - pub(crate) ws1: Option>, + pub(crate) ws1: Option>, pub(crate) open_paren: Token<'a>, - pub(crate) ws2: Option>, + pub(crate) ws2: Option>, pub(crate) terms: Option>>, - pub(crate) ws3: Option>, + pub(crate) ws3: Option>, pub(crate) close_paren: Token<'a>, } impl AstNode for Tuple<'_> { From 4f8b90ba4efe5e733d5358ba6fcf203785bca70f Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 29 May 2024 14:08:47 +0200 Subject: [PATCH 041/214] Fix test --- nemo/src/io/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 3a1394708..0bb7945a0 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -3857,7 +3857,7 @@ mod new { span: s!(85, 1, "@import"), }, ws1: Wsoc { - span: s!(91, 1, " "), + span: s!(92, 1, " "), token: vec![Token { kind: TokenKind::Whitespace, span: s!(92, 1, " "), From 91df274af138ad0fb34ca30caa3ca794af807270 Mon Sep 17 00:00:00 2001 From: logicallangs <> Date: Thu, 23 May 2024 11:44:08 +0100 Subject: [PATCH 042/214] feat: add language server --- Cargo.lock | 201 +++++-- Cargo.toml | 3 + nemo-language-server/Cargo.toml | 30 + nemo-language-server/README.md | 0 nemo-language-server/src/language_server.rs | 544 ++++++++++++++++++ .../src/language_server/nemo_position.rs | 69 +++ nemo-language-server/src/lib.rs | 19 + nemo-language-server/src/main.rs | 15 + nemo-wasm/Cargo.toml | 3 + nemo-wasm/README.md | 50 +- nemo-wasm/src/language_server.rs | 85 +++ nemo-wasm/src/lib.rs | 2 + nemo/Cargo.toml | 1 + nemo/src/io/lexer.rs | 31 +- nemo/src/io/parser.rs | 64 ++- nemo/src/io/parser/ast.rs | 102 +++- nemo/src/io/parser/ast/atom.rs | 41 +- nemo/src/io/parser/ast/directive.rs | 17 +- nemo/src/io/parser/ast/map.rs | 59 +- nemo/src/io/parser/ast/program.rs | 27 +- nemo/src/io/parser/ast/statement.rs | 27 +- nemo/src/io/parser/ast/term.rs | 98 +++- nemo/src/io/parser/ast/tuple.rs | 33 +- nemo/src/io/parser/types.rs | 34 +- 24 files changed, 1361 insertions(+), 194 deletions(-) create mode 100644 nemo-language-server/Cargo.toml create mode 100644 nemo-language-server/README.md create mode 100644 nemo-language-server/src/language_server.rs create mode 100644 nemo-language-server/src/language_server/nemo_position.rs create mode 100644 nemo-language-server/src/lib.rs create mode 100644 nemo-language-server/src/main.rs create mode 100644 nemo-wasm/src/language_server.rs diff --git a/Cargo.lock b/Cargo.lock index 46134e7f8..31fb2db39 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -147,6 +147,17 @@ dependencies = [ "tempfile", ] +[[package]] +name = "async-trait" +version = "0.1.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -164,6 +175,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "auto_impl" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c87f3f15e7794432337fc718554eaa4dc8f04c9677a950ffe366f20a162ae42" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "autocfg" version = "1.3.0" @@ -376,16 +398,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "console_error_panic_hook" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" -dependencies = [ - "cfg-if", - "wasm-bindgen", -] - [[package]] name = "core-foundation" version = "0.9.4" @@ -457,6 +469,19 @@ dependencies = [ "memchr", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "delegate" version = "0.12.0" @@ -679,6 +704,20 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -686,6 +725,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -694,6 +734,23 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -712,10 +769,16 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -1068,6 +1131,19 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lsp-types" +version = "0.94.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1" +dependencies = [ + "bitflags 1.3.2", + "serde", + "serde_json", + "serde_repr", + "url", +] + [[package]] name = "macros" version = "0.0.1" @@ -1186,6 +1262,7 @@ dependencies = [ "test-log", "thiserror", "tokio", + "tower-lsp", "unicode-ident", ] @@ -1245,21 +1322,6 @@ dependencies = [ "pyo3", ] -[[package]] -name = "nemo-wasm" -version = "0.5.2-dev" -dependencies = [ - "console_error_panic_hook", - "js-sys", - "nemo", - "nemo-physical", - "thiserror", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-bindgen-test", - "web-sys", -] - [[package]] name = "nom" version = "5.1.3" @@ -1953,12 +2015,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - [[package]] name = "scopeguard" version = "1.2.0" @@ -2019,6 +2075,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2312,6 +2379,40 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +[[package]] +name = "tower-lsp" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ba052b54a6627628d9b3c34c176e7eda8359b7da9acd497b9f20998d118508" +dependencies = [ + "async-trait", + "auto_impl", + "bytes", + "dashmap", + "futures", + "httparse", + "lsp-types", + "memchr", + "serde", + "serde_json", + "tokio", + "tokio-util", + "tower", + "tower-lsp-macros", + "tracing", +] + +[[package]] +name = "tower-lsp-macros" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "tower-service" version = "0.3.2" @@ -2325,9 +2426,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "tracing-core" version = "0.1.32" @@ -2420,6 +2533,7 @@ dependencies = [ "form_urlencoded", "idna", "percent-encoding", + "serde", ] [[package]] @@ -2552,31 +2666,6 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" -[[package]] -name = "wasm-bindgen-test" -version = "0.3.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9bf62a58e0780af3e852044583deee40983e5886da43a271dd772379987667b" -dependencies = [ - "console_error_panic_hook", - "js-sys", - "scoped-tls", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-bindgen-test-macro", -] - -[[package]] -name = "wasm-bindgen-test-macro" -version = "0.3.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - [[package]] name = "web-sys" version = "0.3.69" diff --git a/Cargo.toml b/Cargo.toml index 03c3bb78d..cde9888d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,9 @@ members = [ "nemo-cli", "nemo-physical", "nemo-python", +] +exclude = [ + "nemo-language-server", "nemo-wasm", ] diff --git a/nemo-language-server/Cargo.toml b/nemo-language-server/Cargo.toml new file mode 100644 index 000000000..6e16e33f3 --- /dev/null +++ b/nemo-language-server/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "nemo-language-server" +description.workspace = true +version.workspace = true +authors.workspace = true +edition.workspace = true +homepage.workspace = true +license.workspace = true +readme = "README.md" +repository.workspace = true +default-run = "nemo-language-server" + +[[bin]] +name = "nemo-language-server" +path = "src/main.rs" +required-features = ["tokio"] + +[features] +default = ["tokio"] +# Allows building for web assembly environments +js = [] +tokio = ["dep:tokio"] + +[dependencies] +line-index = "0.1.1" +nemo = { path = "../nemo", default-features = false } +futures = "0.3.21" +tokio = { version = "1.27.0", features = ["full"], optional = true } +tower-lsp = { version = "0.20.0", default-features = false } +tower-service = "0.3.2" diff --git a/nemo-language-server/README.md b/nemo-language-server/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs new file mode 100644 index 000000000..35acce8c6 --- /dev/null +++ b/nemo-language-server/src/language_server.rs @@ -0,0 +1,544 @@ +use std::collections::HashMap; +use std::vec; + +use futures::lock::Mutex; +use line_index::{LineCol, LineIndex, WideEncoding}; +use nemo::io::parser::ast::program::Program; +use nemo::io::parser::ast::{AstNode, Position}; +use nemo::io::parser::new::parse_program_str; +use nemo_position::{ + lsp_position_to_nemo_position, nemo_position_to_lsp_position, PositionConversionError, +}; +use tower_lsp::lsp_types::{ + CompletionOptions, Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, + DocumentChangeOperation, DocumentChanges, DocumentSymbol, DocumentSymbolOptions, + DocumentSymbolParams, DocumentSymbolResponse, InitializeParams, InitializeResult, + InitializedParams, Location, MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, + PrepareRenameResponse, Range, ReferenceParams, RenameOptions, RenameParams, ServerCapabilities, + TextDocumentEdit, TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, + TextEdit, Url, VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit, +}; +use tower_lsp::{Client, LanguageServer}; + +mod nemo_position; + +#[derive(Debug)] +pub struct Backend { + client: Client, + state: Mutex, // TODO: Replace with RwLock, see https://github.com/rust-lang/futures-rs/pull/2082 +} + +#[derive(Debug)] +pub(crate) struct BackendState { + text_document_store: HashMap, +} + +#[derive(Debug, Clone)] +struct TextDocumentInfo { + /// Content of the text document + text: String, + // Version information so that the language client can check if the server operated on the up to date version + version: i32, +} + +/// Converts a source position to a LSP position +pub(crate) fn line_col_to_position( + line_index: &LineIndex, + line_col: LineCol, +) -> Result { + let wide_line_col = line_index + .to_wide(WideEncoding::Utf16, line_col) + .ok_or(())?; + + Ok(tower_lsp::lsp_types::Position { + line: wide_line_col.line, + character: wide_line_col.col, + }) +} + +impl Backend { + pub fn new(client: Client) -> Self { + Self { + client, + state: Mutex::new(BackendState { + text_document_store: HashMap::new(), + }), + } + } + + async fn handle_change(&self, text_document: VersionedTextDocumentIdentifier, text: &str) { + self.state.lock().await.text_document_store.insert( + text_document.uri.clone(), + TextDocumentInfo { + text: text.to_string(), + version: text_document.version, + }, + ); + + let line_index = LineIndex::new(text); + + let (_program, errors) = parse_program_str(text); + + let diagnostics = errors + .into_iter() + .map(|error| Diagnostic { + message: error.1, + range: Range::new( + line_col_to_position( + &line_index, + LineCol { + line: error.0.line - 1, + col: error.0.column - 1, + }, + ) + .unwrap(), + line_col_to_position( + &line_index, + LineCol { + line: error.0.line - 1, + col: error.0.column - 1 + 1, + }, + ) + .unwrap(), + ), + ..Default::default() + }) + .collect(); + + self.client + .publish_diagnostics( + text_document.uri.clone(), + diagnostics, + Some(text_document.version), + ) + .await; + } + + async fn read_text_document_info(&self, uri: &Url) -> Option { + if let Some(info) = self.state.lock().await.text_document_store.get(uri) { + let a = info.clone(); + Some(a) + } else { + self.client + .log_message( + MessageType::ERROR, + "could not find text document with URI {uri}", + ) + .await; + None + } + } +} + +#[tower_lsp::async_trait] +impl LanguageServer for Backend { + async fn initialize( + &self, + _: InitializeParams, + ) -> tower_lsp::jsonrpc::Result { + Ok(InitializeResult { + capabilities: ServerCapabilities { + text_document_sync: Some(TextDocumentSyncCapability::Kind( + TextDocumentSyncKind::FULL, + )), + references_provider: Some(OneOf::Left(true)), + document_symbol_provider: Some(OneOf::Right(DocumentSymbolOptions { + label: Some("Nemo".to_string()), + work_done_progress_options: WorkDoneProgressOptions { + ..Default::default() + }, + })), + rename_provider: Some(OneOf::Right(RenameOptions { + prepare_provider: Some(true), + work_done_progress_options: WorkDoneProgressOptions { + ..Default::default() + }, + })), + completion_provider: Some(CompletionOptions { + work_done_progress_options: WorkDoneProgressOptions { + ..Default::default() + }, + ..Default::default() + }), + + ..Default::default() + }, + ..Default::default() + }) + } + + async fn initialized(&self, _: InitializedParams) { + self.client + .log_message(MessageType::INFO, "server initialized") + .await; + } + + async fn did_open(&self, params: DidOpenTextDocumentParams) { + self.handle_change( + VersionedTextDocumentIdentifier { + uri: params.text_document.uri, + version: params.text_document.version, + }, + ¶ms.text_document.text, + ) + .await; + } + + async fn did_change(&self, params: DidChangeTextDocumentParams) { + self.handle_change(params.text_document, ¶ms.content_changes[0].text) + .await; + } + + async fn references( + &self, + params: ReferenceParams, + ) -> tower_lsp::jsonrpc::Result>> { + let info = self + .read_text_document_info(¶ms.text_document_position.text_document.uri) + .await; + + match info { + Some(info) => { + let text = info.text; + let line_index = LineIndex::new(&text); + let position = lsp_position_to_nemo_position( + &line_index, + params.text_document_position.position, + ) + .unwrap(); // TODO handle unwrap + + let program = parse_program_str(&text); + let program = program.0; + + let node_path = find_in_ast(&program, position); + + // Get most identifier most specific to the position + let indentified_node = node_path_deepest_identifier(&node_path); + let indentified_node = match indentified_node { + Some(indentified_node) => indentified_node, + None => return Ok(None), + }; + + // Find other AST nodes with the same global identifier + let referenced_nodes = + find_by_identifier(indentified_node.scoping_node, &indentified_node.identifier); + + let locations = referenced_nodes + .iter() + .map(|node| Location { + uri: params.text_document_position.text_document.uri.clone(), + range: node_to_range_lsp(&line_index, *node), + }) + .collect(); + + Ok(Some(locations)) + } + None => Ok(None), // TODO: Handle error + } + } + + async fn document_symbol( + &self, + params: DocumentSymbolParams, + ) -> tower_lsp::jsonrpc::Result> { + let info = self + .read_text_document_info(¶ms.text_document.uri) + .await; + + match info { + Some(info) => { + let text = info.text; + let line_index = LineIndex::new(&text); + + let program = parse_program_str(&text); + let program = program.0; + + let document_symbol = ast_node_to_document_symbol(&line_index, &program); + + if let Ok(document_symbol) = document_symbol { + return Ok(document_symbol.map(|document_symbol| { + DocumentSymbolResponse::Nested(document_symbol.children.unwrap()) + })); + } + + Ok(None) + } + None => Ok(None), // TODO: Handle error + } + } + + /// Finds references to symbol that was renamed and sends edit operations to language client + async fn rename( + &self, + params: RenameParams, + ) -> tower_lsp::jsonrpc::Result> { + let info = self + .read_text_document_info(¶ms.text_document_position.text_document.uri) + .await; + + let info = match info { + Some(info) => info, + None => return Ok(None), + }; + + let text = info.text; + let line_index = LineIndex::new(&text); + let position = + lsp_position_to_nemo_position(&line_index, params.text_document_position.position) + .unwrap(); + + let program = parse_program_str(&text); + let program = program.0; + + let node_path = find_in_ast(&program, position); + + // Get most identifier most specific to the position + let indentified_node = node_path_deepest_identifier(&node_path); + let indentified_node = match indentified_node { + Some(indentified_node) => indentified_node, + None => return Ok(None), + }; + + // Find other AST nodes with the same global identifier + let referenced_nodes = + find_by_identifier(indentified_node.scoping_node, &indentified_node.identifier); + + let edit = TextDocumentEdit { + text_document: OptionalVersionedTextDocumentIdentifier { + uri: params.text_document_position.text_document.uri, + version: Some(info.version), + }, + edits: referenced_nodes + .into_iter() + .filter_map(|node| { + node.lsp_sub_node_to_rename().map(|renamed_node| { + OneOf::Left(TextEdit { + range: node_to_range_lsp(&line_index, renamed_node), + new_text: params.new_name.clone(), + }) + }) + }) + .collect(), + }; + + Ok(Some(WorkspaceEdit { + document_changes: Some(DocumentChanges::Operations(vec![ + DocumentChangeOperation::Edit(edit), + ])), + ..Default::default() + })) + } + + /// Tells the language client the range of the token that will be renamed + async fn prepare_rename( + &self, + params: TextDocumentPositionParams, + ) -> tower_lsp::jsonrpc::Result> { + let info = self + .read_text_document_info(¶ms.text_document.uri) + .await; + + let info = match info { + Some(info) => info, + None => return Ok(None), + }; + + let text = info.text; + let line_index = LineIndex::new(&text); + let position = lsp_position_to_nemo_position(&line_index, params.position).unwrap(); + + let program = parse_program_str(&text); + let program = program.0; + + let node_path = find_in_ast(&program, position); + + // Get identifier most specific to the position + let indentified_node = node_path_deepest_identifier(&node_path); + + match indentified_node { + Some(indentified_node) => { + Ok(indentified_node + .node + .lsp_sub_node_to_rename() + .map(|renamed_node| { + PrepareRenameResponse::Range(node_to_range_lsp(&line_index, renamed_node)) + })) + } + None => Ok(None), + } + } + + async fn shutdown(&self) -> tower_lsp::jsonrpc::Result<()> { + Ok(()) + } +} + +struct IdentifiedNode<'a> { + node: &'a dyn AstNode, + identifier: String, + scoping_node: &'a dyn AstNode, +} + +struct PariallyIdentifiedNode<'a> { + node: &'a dyn AstNode, + identifier: String, + identifier_scope: String, +} + +/// Get identifier most specific to the position of the node path +fn node_path_deepest_identifier<'a>(node_path: &[&'a dyn AstNode]) -> Option> { + let mut info = None; + + for node in node_path.iter().rev() { + match info { + None => { + if let Some((identifier, identifier_scope)) = node.lsp_identifier() { + info = Some(PariallyIdentifiedNode { + node: *node, + identifier, + identifier_scope, + }); + } + } + Some(ref info) => { + if let Some(parent_identifier) = node.lsp_identifier() + && parent_identifier.0.starts_with(&info.identifier_scope) + { + return Some(IdentifiedNode { + node: info.node, + identifier: info.identifier.clone(), + scoping_node: *node, + }); + } + } + } + } + + return info.map(|info| IdentifiedNode { + node: info.node, + identifier: info.identifier, + scoping_node: *node_path.first().unwrap(), + }); +} + +fn find_by_identifier<'a>(node: &'a dyn AstNode, identifier: &str) -> Vec<&'a dyn AstNode> { + let mut references = Vec::new(); + + find_by_identifier_recurse(node, identifier, &mut references); + + references +} + +fn find_by_identifier_recurse<'a>( + node: &'a dyn AstNode, + identifier: &str, + references: &mut Vec<&'a dyn AstNode>, +) { + if node + .lsp_identifier() + .map(|(i, _)| i == identifier) + .unwrap_or(false) + { + references.push(node); + } + + if let Some(children) = node.children() { + for child in children { + find_by_identifier_recurse(child, identifier, references); + } + }; +} + +fn find_in_ast<'a>(node: &'a Program<'a>, position: Position) -> Vec<&'a dyn AstNode> { + let mut path = Vec::new(); + + find_in_ast_recurse(node, position, &mut path); + + path +} + +fn find_in_ast_recurse<'a>( + node: &'a dyn AstNode, + position: Position, + path: &mut Vec<&'a dyn AstNode>, +) { + path.push(node); + + if let Some(children) = node.children() { + for (child, next_child) in children.iter().zip(children.iter().skip(1)) { + if next_child.position() > position { + find_in_ast_recurse(*child, position, path); + return; + } + } + if let Some(child) = children.last() { + find_in_ast_recurse(*child, position, path); + } + }; +} + +fn node_to_range_lsp(line_index: &LineIndex, node: &dyn AstNode) -> Range { + Range { + start: nemo_position_to_lsp_position(line_index, node.position()).unwrap(), // TODO: Improve error handling + end: nemo_position_to_lsp_position( + line_index, + Position { + offset: node.position().offset + node.span().len(), + line: node.position().line + node.span().fragment().lines().count() as u32 - 1, + column: if node.span().fragment().lines().count() > 1 { + 1 + node.span().fragment().lines().last().unwrap().len() // TODO: Check if length is in correct encoding + as u32 + } else { + node.position().column + node.span().fragment().len() as u32 + // TODO: Check if length is in correct encoding + }, + }, + ) + .unwrap(), + } +} + +fn ast_node_to_document_symbol( + line_index: &LineIndex, + node: &dyn AstNode, +) -> Result, PositionConversionError> { + let range = node_to_range_lsp(line_index, node); + + let selection_range = range; + + if let Some((name, kind)) = node.lsp_symbol_info() { + let children_results: Vec<_> = node + .children() + .into_iter() + .flatten() + .map(|child| ast_node_to_document_symbol(line_index, child)) + .collect(); + let mut children = Vec::with_capacity(children_results.len()); + for child_result in children_results { + child_result? + .into_iter() + .for_each(|symbol| children.push(symbol)) + } + let children = if children.is_empty() { + None + } else { + Some(children) + }; + + Ok(Some( + #[allow(deprecated)] + DocumentSymbol { + children, + detail: None, + kind, + name, + range, + selection_range, + tags: None, + deprecated: None, + }, + )) + } else { + Ok(None) + } +} diff --git a/nemo-language-server/src/language_server/nemo_position.rs b/nemo-language-server/src/language_server/nemo_position.rs new file mode 100644 index 000000000..4e155166e --- /dev/null +++ b/nemo-language-server/src/language_server/nemo_position.rs @@ -0,0 +1,69 @@ +//! LSP position: +//! +//! * line: u32 index of the line, first line gets index 0 +//! * offset: u32 index of the UTF-16 code point within the line, first column gets index 0 +//! +//! Nemo position: +//! +//! * line: u32 index of the line, first line gets index 1 +//! * offset: u32 index of the UTF-8 code point (byte) within the line, first column gets index 0 + +use line_index::{LineCol, LineIndex, WideEncoding, WideLineCol}; + +#[derive(Debug)] +pub enum PositionConversionError { + NemoPosition(nemo::io::parser::ast::Position), + LspPosition(tower_lsp::lsp_types::Position), +} + +fn line_col_to_nemo_position( + line_index: &LineIndex, + line_col: LineCol, +) -> Result { + Ok(nemo::io::parser::ast::Position { + line: line_col.line + 1, + column: line_col.col, + offset: line_index.offset(line_col).ok_or(())?.into(), + }) +} + +/// Converts a LSP position to a Nemo parser position +pub fn lsp_position_to_nemo_position( + line_index: &LineIndex, + position: tower_lsp::lsp_types::Position, +) -> Result { + let line_col = line_index + .to_utf8( + WideEncoding::Utf16, + WideLineCol { + line: position.line, + col: position.character, + }, + ) + .ok_or(PositionConversionError::LspPosition(position))?; + + Ok(line_col_to_nemo_position(line_index, line_col).unwrap()) +} + +fn nemo_position_to_line_col(position: nemo::io::parser::ast::Position) -> LineCol { + LineCol { + line: position.line - 1, + col: position.column - 1, + } +} + +/// Converts a source position to a LSP position +pub fn nemo_position_to_lsp_position( + line_index: &LineIndex, + position: nemo::io::parser::ast::Position, +) -> Result { + // TODO: Find out what UTF encoding nemo parser uses + let wide_line_col = line_index + .to_wide(WideEncoding::Utf16, nemo_position_to_line_col(position)) + .ok_or(PositionConversionError::NemoPosition(position))?; + + Ok(tower_lsp::lsp_types::Position { + line: wide_line_col.line, + character: wide_line_col.col, + }) +} diff --git a/nemo-language-server/src/lib.rs b/nemo-language-server/src/lib.rs new file mode 100644 index 000000000..de15066cd --- /dev/null +++ b/nemo-language-server/src/lib.rs @@ -0,0 +1,19 @@ +#![feature(let_chains)] + +pub use language_server::Backend; + +pub use tower_lsp::{ + jsonrpc::{Request, Response}, + ClientSocket, ExitedError, LspService, +}; +pub use tower_service::Service; + +mod language_server; + +pub fn create_language_server() -> (LspService, ClientSocket) { + LspService::new(Backend::new) +} + +// // See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features +#[cfg(all(feature = "js", feature = "tokio"))] +compile_error!("feature \"js\" and feature \"tokio\" cannot be enabled at the same time"); diff --git a/nemo-language-server/src/main.rs b/nemo-language-server/src/main.rs new file mode 100644 index 000000000..6a000d4af --- /dev/null +++ b/nemo-language-server/src/main.rs @@ -0,0 +1,15 @@ +#![feature(let_chains)] + +use language_server::Backend; +use tower_lsp::{LspService, Server}; + +mod language_server; + +#[tokio::main] +async fn main() { + let stdin = tokio::io::stdin(); + let stdout = tokio::io::stdout(); + + let (service, socket) = LspService::new(Backend::new); + Server::new(stdin, stdout, socket).serve(service).await; +} diff --git a/nemo-wasm/Cargo.toml b/nemo-wasm/Cargo.toml index 5054e1b31..432406182 100644 --- a/nemo-wasm/Cargo.toml +++ b/nemo-wasm/Cargo.toml @@ -21,6 +21,9 @@ console_error_panic_hook = "0.1.7" js-sys = "0.3.63" nemo = { path = "../nemo", features = [ "js" ], default-features = false } nemo-physical = { path = "../nemo-physical", default-features = false } +nemo-language-server = { path = "../nemo-language-server", features = [ "js" ], default-features = false} +futures = "0.3.21" +gloo-utils = { version = "0.1", features = ["serde"] } thiserror = "1.0" wasm-bindgen = "0.2" wasm-bindgen-futures = "0.4.37" diff --git a/nemo-wasm/README.md b/nemo-wasm/README.md index b979fbf03..f33d4b7fe 100644 --- a/nemo-wasm/README.md +++ b/nemo-wasm/README.md @@ -7,17 +7,17 @@ This crate provides a Web Assembly build and JavaScript/TypeScript bindings for ## Building -- Install [wasm-pack](https://rustwasm.github.io/wasm-pack/book/prerequisites/index.html) -- Build the library: +- Install [wasm-pack](https://rustwasm.github.io/wasm-pack/book/prerequisites/index.html) +- Build the library: ``` -wasm-pack build --target bundler --weak-refs -wasm-pack build --target bundler --weak-refs --release +wasm-pack build --out-dir nemoWASMBundler --target bundler --weak-refs --release +wasm-pack build --out-dir nemoWASMWeb --target web --weak-refs --release ``` -- In order to use the `FileSystemSyncAccessHandle` APIs, the `web_sys_unstable_apis` `cfg` flag needs to be set - - See https://rustwasm.github.io/docs/wasm-bindgen/web-sys/unstable-apis.html - - See https://rustwasm.github.io/wasm-bindgen/api/web_sys/struct.FileSystemSyncAccessHandle.html +- In order to use the `FileSystemSyncAccessHandle` APIs, the `web_sys_unstable_apis` `cfg` flag needs to be set + - See https://rustwasm.github.io/docs/wasm-bindgen/web-sys/unstable-apis.html + - See https://rustwasm.github.io/wasm-bindgen/api/web_sys/struct.FileSystemSyncAccessHandle.html ## Example usage @@ -31,21 +31,21 @@ const engine = new NemoEngine(program); engine.reason(); for (const predicate of program.getOutputPredicates()) { - const rows = new NemoResultsIterable(engine.getResult(predicate)); + const rows = new NemoResultsIterable(engine.getResult(predicate)); - for (const row of rows) { - console.log(row); - } + for (const row of rows) { + console.log(row); + } } // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators // Iterables are not directly supported yet, see https://github.com/rustwasm/wasm-bindgen/issues/1478 class NemoResultsIterable { - public constructor(private iterator: NemoResults) {} + public constructor(private iterator: NemoResults) {} - public [Symbol.iterator]() { - return this.iterator; - } + public [Symbol.iterator]() { + return this.iterator; + } } ``` @@ -59,22 +59,22 @@ const engine = new NemoEngine(program); engine.reason(); for (const predicate of program.getOutputPredicates()) { - const rows = new NemoResultsIterable(engine.getResult(predicate)); + const rows = new NemoResultsIterable(engine.getResult(predicate)); - for (const row of rows) { - console.log(row); - } + for (const row of rows) { + console.log(row); + } } // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators // Iterables are not directly supported yet, see https://github.com/rustwasm/wasm-bindgen/issues/1478 class NemoResultsIterable { - constructor(iterator) { - this.iterator = iterator; - } + constructor(iterator) { + this.iterator = iterator; + } - [Symbol.iterator]() { - return this.iterator; - } + [Symbol.iterator]() { + return this.iterator; + } } ``` diff --git a/nemo-wasm/src/language_server.rs b/nemo-wasm/src/language_server.rs new file mode 100644 index 000000000..046bb0114 --- /dev/null +++ b/nemo-wasm/src/language_server.rs @@ -0,0 +1,85 @@ +use std::pin::Pin; + +use futures::{FutureExt, SinkExt, StreamExt}; +use futures::{Sink, Stream}; +use gloo_utils::format::JsValueSerdeExt; +use js_sys::{Array, Promise}; +use nemo_language_server::{ + create_language_server, Backend, ExitedError, LspService, Request, Response, Service, +}; +use wasm_bindgen::prelude::wasm_bindgen; +use wasm_bindgen::JsValue; +use wasm_bindgen_futures::future_to_promise; + +/// Creates a Nemo language server +/// The server is split up into mutliple parts to allow concurrent sending/waiting for server/client-bound requests/responses. +/// To enable this with `wasm_bindgen`, multiple structs are required to ensure exclusive access, see https://stackoverflow.com/questions/75712197/rust-wasm-bindgen-recursive-use-of-an-object-detected-which-would-lead-to-unsaf#77013978 . +#[wasm_bindgen(js_name = "createNemoLanguageServer")] +pub fn create_nemo_language_server() -> JsValue { + let (service, socket) = create_language_server(); + + let (request_stream, responses_sink) = socket.split(); + + let (a, b, c) = ( + NemoLspChannelClientInitiated(service), + NemoLspRequestsServerInitiated(Box::pin(request_stream)), + NemoLspResponsesServerInitiated(Box::pin(responses_sink)), + ); + + let (a, b, c): (JsValue, JsValue, JsValue) = (a.into(), b.into(), c.into()); + + let array = Array::new(); + + array.push(&a); + array.push(&b); + array.push(&c); + + array.into() +} + +/// Handles requests initiated by the server +#[wasm_bindgen] +pub struct NemoLspRequestsServerInitiated(Pin>>); + +/// Handles responses corresponding to requests initiated by the server +#[wasm_bindgen] +pub struct NemoLspResponsesServerInitiated(Pin>>); + +#[wasm_bindgen] +impl NemoLspRequestsServerInitiated { + #[wasm_bindgen(js_name = "getNextRequest")] + pub async fn next_request(&mut self) -> JsValue { + let request = self.0.next().await; + + JsValue::from_serde(&request).unwrap() + } +} + +#[wasm_bindgen] +impl NemoLspResponsesServerInitiated { + /// Only one response may be sent at a time, wait for the promise to resolve before sending the next response + #[wasm_bindgen(js_name = "sendResponse")] + pub async fn send_response(&mut self, response_json_object: JsValue) { + let response = response_json_object.into_serde().unwrap(); + + self.0.send(response).await.unwrap(); + } +} + +/// Handles requests initiated by the client and the corresponding responses +#[wasm_bindgen] +pub struct NemoLspChannelClientInitiated(LspService); + +#[wasm_bindgen] +impl NemoLspChannelClientInitiated { + #[wasm_bindgen(js_name = "sendRequest")] + pub fn send_request(&mut self, request_json_object: JsValue) -> Promise { + let request = request_json_object.into_serde().unwrap(); + + future_to_promise( + self.0 + .call(request) + .map(|response| Result::Ok(JsValue::from_serde(&response.unwrap()).unwrap())), + ) + } +} diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index d3be7676e..b5d7e8301 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -34,6 +34,8 @@ use wasm_bindgen::JsValue; use web_sys::Blob; use web_sys::FileReaderSync; +mod language_server; + #[wasm_bindgen] #[derive(Clone)] pub struct NemoProgram(nemo::model::Program); diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 173a3de22..ea6dfc52e 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -44,6 +44,7 @@ bytesize = "1.2" ascii_tree = "0.1.1" serde_json = "1.0.108" serde = {version = "1.0.138", features = ["derive"] } +tower-lsp = "0.20.0" dyn-clone = "1.0.16" unicode-ident = "1.0.12" diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 555e9e74f..c0e1c90b4 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -4,14 +4,15 @@ use std::{cell::RefCell, ops::Range}; use nom::{ branch::alt, - bytes::complete::{is_not, tag, take, take_till, take_while}, + bytes::complete::{is_not, tag, take, take_till}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, combinator::{all_consuming, cut, map, recognize}, - error::{ContextError, ParseError}, + error::{ParseError}, multi::{many0, many1}, sequence::{delimited, pair, tuple}, }; use nom_locate::LocatedSpan; +use tower_lsp::lsp_types::SymbolKind; #[derive(Debug)] pub(crate) enum NewParseError { @@ -23,7 +24,7 @@ pub(crate) enum NewParseError { SyntaxError(String), MissingTlDocComment, } -impl nom::error::ParseError> for NewParseError { +impl ParseError> for NewParseError { fn from_error_kind(input: Input, kind: nom::error::ErrorKind) -> Self { NewParseError::SyntaxError(kind.description().to_string()) } @@ -36,12 +37,12 @@ impl nom::error::ParseError> for NewParseError { pub(crate) type IResult = nom::IResult; use super::parser::{ - ast::Position, + ast::{AstNode, Position}, types::{Input, Label, ToRange}, }; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct Error(pub(crate) Position, pub(crate) String); +pub struct Error(pub Position, pub String); #[derive(Debug, Clone, Copy, PartialEq)] pub(crate) struct ParserState<'a> { @@ -64,7 +65,7 @@ impl ToRange for Span<'_> { } } -pub(crate) fn to_range<'a>(span: Span<'a>) -> Range { +pub(crate) fn to_range(span: Span<'_>) -> Range { let start = span.location_offset(); let end = start + span.fragment().len(); start..end @@ -228,7 +229,7 @@ impl std::fmt::Display for TokenKind { } #[derive(Debug, Copy, Clone, PartialEq)] -pub(crate) struct Token<'a> { +pub struct Token<'a> { pub(crate) kind: TokenKind, pub(crate) span: Span<'a>, } @@ -260,8 +261,8 @@ impl std::fmt::Display for Token<'_> { } } } -impl<'a> crate::io::parser::ast::AstNode for Token<'a> { - fn children(&self) -> Option> { +impl<'a> AstNode for Token<'a> { + fn children(&self) -> Option> { None::> } @@ -281,6 +282,18 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { true } + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None + } + fn name(&self) -> String { String::from("Token") } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 0bb7945a0..b06986d68 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -22,7 +22,7 @@ use nom::{ use macros::traced; -pub(crate) mod ast; +pub mod ast; pub(crate) mod types; use types::{ConstraintOperator, IntermediateResult, Span}; @@ -2429,45 +2429,45 @@ mod test { } /// NEW PARSER -mod new { +pub mod new { use std::cell::RefCell; use super::ast::{ atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, Wsoc, }; - use super::types::{Input, Label, ParserLabel, ToRange}; + use super::types::{Input, Label, ParserLabel}; use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, - lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, - lex_whitespace, map_err, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, - slash, star, tilde, underscore, unequal, Error, NewParseError, ParserState, Span, Token, + lex_ident, lex_iri, lex_number, lex_string, lex_toplevel_doc_comment, + lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, + slash, star, tilde, underscore, unequal, Error, ParserState, Span, Token, TokenKind, }; - use crate::io::parser::ast::AstNode; - use nom::combinator::{all_consuming, cut, map, opt, recognize}; - use nom::error::{context, ContextError, ParseError}; - use nom::sequence::{delimited, pair}; + + use nom::combinator::{all_consuming, opt, recognize}; + use nom::error::{ParseError}; + use nom::sequence::{pair}; use nom::Parser; use nom::{ branch::alt, combinator::verify, - multi::{many0, many1, separated_list0}, + multi::{many0, many1}, sequence::tuple, IResult, }; fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { unsafe { - let span = Span::new_from_raw_offset( + + // dbg!(&input, &span, &rest_input); + Span::new_from_raw_offset( input.location_offset(), input.location_line(), &input[..(rest_input.location_offset() - input.location_offset())], (), - ); - // dbg!(&input, &span, &rest_input); - span + ) } } @@ -2516,14 +2516,14 @@ mod new { 'e, O: Copy, E: ParseError>, - F: nom::Parser, O, E>, + F: Parser, O, E>, >( mut parser: F, error_msg: impl ToString, error_output: O, errors: ParserState<'e>, ) -> impl FnMut(Input<'a, 'e>) -> IResult, O, E> { - move |input| match parser.parse(input.clone()) { + move |input| match parser.parse(input) { Ok(result) => Ok(result), Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { let err = Error( @@ -2542,7 +2542,7 @@ mod new { } fn recover<'a, 'e, E>( - mut parser: impl nom::Parser, Statement<'a>, E>, + mut parser: impl Parser, Statement<'a>, E>, error_msg: impl ToString, errors: ParserState<'e>, ) -> impl FnMut(Input<'a, 'e>) -> IResult, Statement<'a>, E> { @@ -2567,7 +2567,7 @@ mod new { } fn report_label<'a, 's, O, E>( - mut parser: impl nom::Parser, O, E>, + mut parser: impl Parser, O, E>, label: ParserLabel, ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { move |input| match parser.parse(input) { @@ -2594,7 +2594,7 @@ mod new { } fn report_error<'a, 's, O, E>( - mut parser: impl nom::Parser, O, E>, + mut parser: impl Parser, O, E>, ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { move |input| match parser.parse(input) { Ok(result) => { @@ -2612,7 +2612,7 @@ mod new { .into_iter(); for label in labels { if let Some(last) = furthest_errors.last() { - if label.pos.offset >= (*last).0.offset { + if label.pos.offset >= last.0.offset { let err = Error(label.pos, format!("expected {:?}", label.label)); furthest_errors.push(err); @@ -2679,7 +2679,7 @@ mod new { } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a, 'e>(input: Input<'a, 'e>) -> (Program<'a>, Vec) { + fn parse_program<'a>(input: Input<'a, '_>) -> (Program<'a>, Vec) { let (rest_input, (tl_doc_comment, statements)) = all_consuming(pair( opt(lex_toplevel_doc_comment), many1(recover( @@ -2706,6 +2706,20 @@ mod new { ) } + pub fn parse_program_str(input: &str) -> (Program<'_>, Vec) { + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input: Span::new(input), + parser_state, + }; + parse_program(input) + } + /// Parse whitespace that is between directives, facts, rules and comments. fn parse_whitespace<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) @@ -3763,7 +3777,7 @@ mod new { } }], } - ) + ); } #[test] @@ -4034,7 +4048,7 @@ mod new { }), ], } - ) + ); } // #[test] @@ -4160,7 +4174,7 @@ mod new { }) ], } - ) + ); } #[test] diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 3ed70e89d..30d3a8aa6 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,37 +1,50 @@ use nom::Offset; +use tower_lsp::lsp_types::SymbolKind; use crate::io::lexer::{Span, Token}; +use ascii_tree::{write_tree, Tree}; use std::fmt::Display; -use ascii_tree::{Tree, write_tree}; pub(crate) mod atom; pub(crate) mod directive; pub(crate) mod map; -pub(crate) mod tuple; -pub(crate) mod program; +pub mod program; pub(crate) mod statement; pub(crate) mod term; +pub(crate) mod tuple; -pub(crate) trait AstNode: std::fmt::Debug + Display { +pub trait AstNode: std::fmt::Debug + Display + Sync { fn children(&self) -> Option>; fn span(&self) -> Span; fn position(&self) -> Position; fn is_token(&self) -> bool; + fn name(&self) -> String; + + /// Returns an optional pair of the identfier and identifier scope. + /// + /// The identifier scope will scope this identifier up to any [`AstNode`] + /// that has an identifier that has this node's identifier scope as a prefix. + /// + /// This can be used to restict rename operations to be local, e.g. for variable idenfiers inside of rules. + fn lsp_identifier(&self) -> Option<(String, String)>; + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)>; + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode>; } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct Position { - pub(crate) offset: usize, - pub(crate) line: u32, - pub(crate) column: u32, +// TODO: tidy up PartialOrd and Ord implementation +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Position { + pub offset: usize, + pub line: u32, + pub column: u32, } /// Whitespace or Comment token #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Wsoc<'a> { - pub(crate) span: Span<'a>, - pub(crate) token: Vec> +pub struct Wsoc<'a> { + pub span: Span<'a>, + pub token: Vec>, } impl AstNode for Wsoc<'_> { fn children(&self) -> Option> { @@ -48,7 +61,11 @@ impl AstNode for Wsoc<'_> { } fn position(&self) -> Position { - Position { offset: self.span.location_offset(), line: self.span.location_line(), column: self.span.get_utf8_column() as u32 } + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_utf8_column() as u32, + } } fn is_token(&self) -> bool { @@ -56,22 +73,39 @@ impl AstNode for Wsoc<'_> { } fn name(&self) -> String { - format!("Wsoc \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", self.span.location_line(), self.span.get_utf8_column(), self.span.fragment()) + format!( + "Wsoc \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) + } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None } } + impl Display for Wsoc<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() } } - #[derive(Debug, Clone, PartialEq)] -pub(crate) struct List<'a, T> { - pub(crate) span: Span<'a>, - pub(crate) first: T, +pub struct List<'a, T> { + pub span: Span<'a>, + pub first: T, // ([ws]?[,][ws]?[T])* - pub(crate) rest: Option>, Token<'a>, Option>, T)>>, + pub rest: Option>, Token<'a>, Option>, T)>>, } impl List<'_, T> { pub fn to_vec(&self) -> Vec { @@ -85,7 +119,7 @@ impl List<'_, T> { vec } } -impl std::iter::IntoIterator for List<'_, T> { +impl IntoIterator for List<'_, T> { type Item = T; type IntoIter = std::vec::IntoIter; @@ -137,9 +171,27 @@ impl AstNode for List<'_, T> { } fn name(&self) -> String { - format!("List \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", self.span.location_line(), self.span.get_utf8_column(), self.span.fragment()) + format!( + "List \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) + } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some((String::from("List"), SymbolKind::ARRAY)) } } + impl Display for List<'_, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); @@ -175,9 +227,9 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { } mod test { - use super::*; - use super::{atom::Atom, directive::Directive, tuple::Tuple, program::Program, statement::Statement, term::Term, term::Primitive}; - use crate::io::lexer::TokenKind; + + + macro_rules! s { ($offset:literal,$line:literal,$str:literal) => { @@ -203,7 +255,7 @@ mod test { let span = Span::new(input); let ast = Program { span, - tl_doc_comment: Some(Token { + tl_doc_comment: Some(Token { kind: TokenKind::TlDocComment, span: s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") }), diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 46dbf0ac4..5410ae9d1 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,3 +1,5 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::term::Term; use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode, Wsoc}; @@ -6,7 +8,7 @@ use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Atom<'a> { +pub enum Atom<'a> { Positive(Tuple<'a>), Negative { span: Span<'a>, @@ -23,6 +25,17 @@ pub(crate) enum Atom<'a> { }, Map(Map<'a>), } + +impl Atom<'_> { + fn tuple(&self) -> Option<&Tuple<'_>> { + match &self { + Atom::Positive(tuple) => Some(tuple), + Atom::Negative { atom, .. } => Some(atom), + _ => None, + } + } +} + impl AstNode for Atom<'_> { fn children(&self) -> Option> { match self { @@ -93,6 +106,32 @@ impl AstNode for Atom<'_> { Atom::Map(_) => name!("Map Atom"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + self.tuple().map(|tuple| ( + format!("atom/{}", tuple.identifier.unwrap().span().fragment()), + "file".to_string(), + )) + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + // TODO: + // match self.tuple() { + // Some(tuple) => Some(&tuple.identifier.unwrap()), + // None => None, + // } + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + match self.tuple() { + Some(tuple) => Some(( + format!("Atom: {}", tuple.identifier.unwrap().span.fragment()), + SymbolKind::FUNCTION, + )), + None => Some((String::from("Atom"), SymbolKind::FUNCTION)), + } + } } impl std::fmt::Display for Atom<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 8b04c3c91..415b584d4 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,10 +1,12 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::map::Map; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Directive<'a> { +pub enum Directive<'a> { // "@base ." Base { span: Span<'a>, @@ -261,7 +263,20 @@ impl AstNode for Directive<'_> { Directive::Output { .. } => name!("Output Directive"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some((String::from("Directive"), SymbolKind::FUNCTION)) + } } + impl std::fmt::Display for Directive<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 473d01e92..cdafbd7f9 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,3 +1,5 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::term::Term; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; @@ -5,15 +7,15 @@ use ascii_tree::write_tree; use std::fmt::Debug; #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Map<'a> { - pub(crate) span: Span<'a>, - pub(crate) identifier: Option>, - pub(crate) ws1: Option>, - pub(crate) open_brace: Token<'a>, - pub(crate) ws2: Option>, - pub(crate) pairs: Option, Term<'a>>>>, - pub(crate) ws3: Option>, - pub(crate) close_brace: Token<'a>, +pub struct Map<'a> { + pub span: Span<'a>, + pub identifier: Option>, + pub ws1: Option>, + pub open_brace: Token<'a>, + pub ws2: Option>, + pub pairs: Option, Term<'a>>>>, + pub ws3: Option>, + pub close_brace: Token<'a>, } impl AstNode for Map<'_> { fn children(&self) -> Option> { @@ -57,7 +59,20 @@ impl AstNode for Map<'_> { fn name(&self) -> String { String::from("Map") } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some((String::from("Map"), SymbolKind::STRUCT)) + } } + impl std::fmt::Display for Map<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); @@ -67,13 +82,13 @@ impl std::fmt::Display for Map<'_> { } #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Pair<'a, K, V> { - pub(crate) span: Span<'a>, - pub(crate) key: K, - pub(crate) ws1: Option>, - pub(crate) equal: Token<'a>, - pub(crate) ws2: Option>, - pub(crate) value: V, +pub struct Pair<'a, K, V> { + pub span: Span<'a>, + pub key: K, + pub ws1: Option>, + pub equal: Token<'a>, + pub ws2: Option>, + pub value: V, } impl AstNode for Pair<'_, K, V> { fn children(&self) -> Option> { @@ -114,6 +129,18 @@ impl AstNode for Pair<'_, K, V> { self.span.fragment() ) } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some((String::from("Pair"), SymbolKind::ARRAY)) + } } impl std::fmt::Display for Pair<'_, K, V> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 466be7577..9331e59d5 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,14 +1,14 @@ -use ascii_tree::write_tree; +use tower_lsp::lsp_types::SymbolKind; -use super::statement::Statement; -use super::{ast_to_ascii_tree, AstNode, Position}; +use super::{ast_to_ascii_tree, statement::Statement, AstNode, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Program<'a> { - pub(crate) span: Span<'a>, - pub(crate) tl_doc_comment: Option>, - pub(crate) statements: Vec>, +pub struct Program<'a> { + pub span: Span<'a>, + pub tl_doc_comment: Option>, + pub statements: Vec>, } impl AstNode for Program<'_> { fn children(&self) -> Option> { @@ -58,7 +58,20 @@ impl AstNode for Program<'_> { ) } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some(("File".to_string(), SymbolKind::FILE)) + } } + impl std::fmt::Display for Program<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index a0762d0c8..f641dbd8e 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -1,3 +1,5 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::atom::Atom; use super::directive::Directive; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; @@ -5,7 +7,7 @@ use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Statement<'a> { +pub enum Statement<'a> { Directive(Directive<'a>), Fact { span: Span<'a>, @@ -123,6 +125,7 @@ impl AstNode for Statement<'_> { ) }; } + match self { Statement::Directive(_) => name!("Directive"), Statement::Fact { .. } => name!("Fact"), @@ -132,7 +135,29 @@ impl AstNode for Statement<'_> { Statement::Error(_) => name!("ERROR"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + Some(("statement".to_string(), "statement".to_string())) + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + let name = match self { + Statement::Directive(_) => "Directive", + Statement::Fact { .. } => "Fact", + Statement::Rule { .. } => "Rule", + Statement::Whitespace(_ws) => return None, + Statement::Comment(_) => return None, + Statement::Error(_) => "Invalid", + }; + + Some((String::from(name), SymbolKind::CLASS)) + } } + impl std::fmt::Display for Statement<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 060269687..81ed9d47f 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,3 +1,5 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::map::Map; use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; @@ -5,7 +7,7 @@ use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Term<'a> { +pub enum Term<'a> { Primitive(Primitive<'a>), Variable(Token<'a>), Existential(Token<'a>), @@ -36,6 +38,7 @@ pub(crate) enum Term<'a> { Map(Box>), Blank(Token<'a>), } + impl AstNode for Term<'_> { fn children(&self) -> Option> { match self { @@ -141,7 +144,7 @@ impl AstNode for Term<'_> { Term::Binary { .. } => name!("Binary Term"), Term::Aggregation { .. } => name!("Aggregation"), Term::Tuple(f) => { - if let Some(_) = f.identifier { + if f.identifier.is_some() { name!("Function Symbol") } else { name!("Tuple") @@ -151,6 +154,70 @@ impl AstNode for Term<'_> { Term::Blank(_) => name!("Blank"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + match self { + Term::Variable(t) => Some(( + format!("variable/{}", t.span().fragment()), + "statement".to_string(), + )), + Term::Aggregation { operation, .. } => Some(( + format!("aggregation/{}", operation.span().fragment()), + "file".to_string(), + )), + Term::Tuple(tuple) => { + tuple.identifier.map(|identifier| ( + format!("function/{}", identifier.span().fragment()), + "file".to_string(), + )) + } + _ => None, + } + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + // TODO: + // match self { + // Term::Variable(t) => Some(t), + // Term::Aggregation { operation, .. } => Some(operation), + // Term::Tuple(tuple) => { + // if let Some(identifier) = tuple.identifier { + // Some(identifier) + // } else { + // None + // } + // } + // // Term::Function(named_tuple) => Some(&named_tuple.identifier), + // _ => None, + // } + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + match self { + Term::Primitive(_) => Some((String::from("Primitive term"), SymbolKind::CONSTANT)), + Term::Variable(t) => Some((format!("Variable: {}", t.span()), SymbolKind::VARIABLE)), + Term::UnaryPrefix { .. } => Some((String::from("Unary prefix"), SymbolKind::OPERATOR)), + Term::Blank { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), + Term::Existential { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), + Term::Binary { .. } => Some((String::from("Binary term"), SymbolKind::OPERATOR)), + Term::Aggregation { operation, .. } => Some(( + format!("Aggregation: {}", operation.span.fragment()), + SymbolKind::OPERATOR, + )), + Term::Tuple(tuple) => { + if let Some(identifier) = tuple.identifier { + Some(( + format!("Function: {}", identifier.span.fragment()), + SymbolKind::OPERATOR, + )) + } else { + Some((String::from("Tuple"), SymbolKind::ARRAY)) + } + } + Term::Map(map) => Some((String::from("Map"), SymbolKind::ARRAY)), + } + } } impl std::fmt::Display for Term<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -180,6 +247,7 @@ pub(crate) enum Primitive<'a> { iri: Token<'a>, }, } + impl AstNode for Primitive<'_> { fn children(&self) -> Option> { match self { @@ -264,6 +332,18 @@ impl AstNode for Primitive<'_> { Primitive::RdfLiteral { .. } => name!("RDF Literal"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None + } } impl std::fmt::Display for Primitive<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -279,6 +359,7 @@ pub(crate) struct Exponent<'a> { pub(crate) sign: Option>, pub(crate) number: Token<'a>, } + impl AstNode for Exponent<'_> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); @@ -305,7 +386,20 @@ impl AstNode for Exponent<'_> { fn name(&self) -> String { todo!() } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None + } } + impl std::fmt::Display for Exponent<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index f074eb6a1..459d22962 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -1,19 +1,22 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::term::Term; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Tuple<'a> { - pub(crate) span: Span<'a>, - pub(crate) identifier: Option>, - pub(crate) ws1: Option>, - pub(crate) open_paren: Token<'a>, - pub(crate) ws2: Option>, - pub(crate) terms: Option>>, - pub(crate) ws3: Option>, - pub(crate) close_paren: Token<'a>, +pub struct Tuple<'a> { + pub span: Span<'a>, + pub identifier: Option>, + pub ws1: Option>, + pub open_paren: Token<'a>, + pub ws2: Option>, + pub terms: Option>>, + pub ws3: Option>, + pub close_paren: Token<'a>, } + impl AstNode for Tuple<'_> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); @@ -61,6 +64,18 @@ impl AstNode for Tuple<'_> { self.span.fragment() ) } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None + } } impl std::fmt::Display for Tuple<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 6850a69c2..6dea14af0 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -39,11 +39,11 @@ pub type ParseResult<'a, T> = Result; #[error("Parse error on line {}, column {}: {}\nat {}{}", .line, .column, .source, .fragment, format_parse_error_context(.context))] pub struct LocatedParseError { #[source] - pub(super) source: ParseError, - pub(super) line: u32, - pub(super) column: usize, - pub(super) fragment: String, - pub(super) context: Vec, + pub source: ParseError, + pub line: u32, + pub column: usize, + pub fragment: String, + pub context: Vec, } impl LocatedParseError { @@ -479,9 +479,9 @@ impl<'a, T> nom::FindToken for Tokens<'a> { impl<'a> InputIter for Tokens<'a> { type Item = &'a Token<'a>; - type Iter = std::iter::Enumerate<::std::slice::Iter<'a, Token<'a>>>; + type Iter = std::iter::Enumerate>>; - type IterElem = ::std::slice::Iter<'a, Token<'a>>; + type IterElem = std::slice::Iter<'a, Token<'a>>; fn iter_indices(&self) -> Self::Iter { self.tok.iter().enumerate() @@ -506,7 +506,7 @@ impl<'a> InputIter for Tokens<'a> { } } } -impl<'a> nom::InputLength for Tokens<'a> { +impl<'a> InputLength for Tokens<'a> { fn input_len(&self) -> usize { self.tok.len() } @@ -529,7 +529,7 @@ impl<'a> InputTake for Tokens<'a> { ) } } -impl<'a> nom::InputTakeAtPosition for Tokens<'a> { +impl<'a> InputTakeAtPosition for Tokens<'a> { type Item = &'a Token<'a>; fn split_at_position>( @@ -690,7 +690,7 @@ impl<'a, 's> InputIter for Input<'a, 's> { } } -impl nom::InputLength for Input<'_, '_> { +impl InputLength for Input<'_, '_> { fn input_len(&self) -> usize { self.input.input_len() } @@ -719,13 +719,13 @@ impl InputTake for Input<'_, '_> { } } -impl nom::InputTakeAtPosition for Input<'_, '_> { +impl InputTakeAtPosition for Input<'_, '_> { type Item = char; fn split_at_position>( &self, predicate: P, - ) -> nom::IResult + ) -> IResult where P: Fn(Self::Item) -> bool, { @@ -738,8 +738,8 @@ impl nom::InputTakeAtPosition for Input<'_, '_> { fn split_at_position1>( &self, predicate: P, - e: nom::error::ErrorKind, - ) -> nom::IResult + e: ErrorKind, + ) -> IResult where P: Fn(Self::Item) -> bool, { @@ -749,7 +749,7 @@ impl nom::InputTakeAtPosition for Input<'_, '_> { fn split_at_position_complete>( &self, predicate: P, - ) -> nom::IResult + ) -> IResult where P: Fn(Self::Item) -> bool, { @@ -762,8 +762,8 @@ impl nom::InputTakeAtPosition for Input<'_, '_> { fn split_at_position1_complete>( &self, predicate: P, - e: nom::error::ErrorKind, - ) -> nom::IResult + e: ErrorKind, + ) -> IResult where P: Fn(Self::Item) -> bool, { From ccaf400a41a0d548c7e85c419e741537cd478492 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 29 May 2024 14:56:40 +0200 Subject: [PATCH 043/214] Refactor error reporting to use nom_supreme::error::ErrorTree --- Cargo.lock | 52 ++ nemo/Cargo.toml | 2 + nemo/src/io/lexer.rs | 420 +++++----- nemo/src/io/parser.rs | 1465 ++++++++++++++++++++--------------- nemo/src/io/parser/ast.rs | 41 +- nemo/src/io/parser/types.rs | 46 +- testfile2.rls | 15 +- 7 files changed, 1154 insertions(+), 887 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 31fb2db39..847bd5322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,6 +111,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "ascii_tree" version = "0.1.1" @@ -260,6 +266,15 @@ dependencies = [ "wyz", ] +[[package]] +name = "brownstone" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5839ee4f953e811bfdcf223f509cb2c6a3e1447959b0bff459405575bc17f22" +dependencies = [ + "arrayvec", +] + [[package]] name = "bstr" version = "1.9.1" @@ -1023,6 +1038,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "indent_write" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3" + [[package]] name = "indexmap" version = "2.2.6" @@ -1057,6 +1078,12 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "joinery" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72167d68f5fce3b8655487b8038691a3c9984ee769590f93f2a631f4ad64e4f5" + [[package]] name = "js-sys" version = "0.3.69" @@ -1243,6 +1270,8 @@ dependencies = [ "macros", "nemo-physical", "nom 7.1.3", + "nom-greedyerror", + "nom-supreme", "nom_locate", "num", "oxiri", @@ -1342,6 +1371,29 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom-greedyerror" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f359007d505b20cd6e4974ff0d5c8e4565f0f9e15823937238221ccb74b516" +dependencies = [ + "nom 7.1.3", + "nom_locate", +] + +[[package]] +name = "nom-supreme" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bd3ae6c901f1959588759ff51c95d24b491ecb9ff91aa9c2ef4acc5b1dcab27" +dependencies = [ + "brownstone", + "indent_write", + "joinery", + "memchr", + "nom 7.1.3", +] + [[package]] name = "nom_locate" version = "4.2.0" diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index ea6dfc52e..ab6f43231 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -47,6 +47,8 @@ serde = {version = "1.0.138", features = ["derive"] } tower-lsp = "0.20.0" dyn-clone = "1.0.16" unicode-ident = "1.0.12" +nom-greedyerror = "0.5.0" +nom-supreme = "0.8.0" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index c0e1c90b4..cd2aa1cd9 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -6,11 +6,13 @@ use nom::{ branch::alt, bytes::complete::{is_not, tag, take, take_till}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, - combinator::{all_consuming, cut, map, recognize}, - error::{ParseError}, + combinator::{all_consuming, cut, map, opt, recognize}, + error::{context, ContextError, ErrorKind, ParseError}, multi::{many0, many1}, sequence::{delimited, pair, tuple}, + IResult, }; +use nom_greedyerror::GreedyError; use nom_locate::LocatedSpan; use tower_lsp::lsp_types::SymbolKind; @@ -34,20 +36,21 @@ impl ParseError> for NewParseError { } } -pub(crate) type IResult = nom::IResult; - use super::parser::{ ast::{AstNode, Position}, - types::{Input, Label, ToRange}, + types::{Input, ToRange}, }; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Error(pub Position, pub String); +pub struct Error { + pub pos: Position, + pub msg: String, + pub context: Vec<&'static str>, +} #[derive(Debug, Clone, Copy, PartialEq)] pub(crate) struct ParserState<'a> { pub(crate) errors: &'a RefCell>, - pub(crate) labels: &'a RefCell>, } impl ParserState<'_> { pub fn report_error(&self, error: Error) { @@ -299,25 +302,29 @@ impl<'a> AstNode for Token<'a> { } } -pub(crate) fn map_err<'a, 'e, O, E: ParseError>>( - mut f: impl nom::Parser, O, E>, - mut op: impl FnMut(E) -> NewParseError, -) -> impl FnMut(Input<'a, 'e>) -> IResult, O> { - move |input| { - f.parse(input).map_err(|e| match e { - nom::Err::Incomplete(err) => nom::Err::Incomplete(err), - nom::Err::Error(err) => nom::Err::Error(op(err)), - nom::Err::Failure(err) => nom::Err::Error(op(err)), - }) - } -} +// pub(crate) fn map_err<'a, 's, O, E: ParseError>>( +// mut f: impl nom::Parser, O, E>, +// mut op: impl FnMut(E) -> NewParseError, +// ) -> impl FnMut(Input<'a, 's>) -> IResult, O> { +// move |input| { +// f.parse(input).map_err(|e| match e { +// nom::Err::Incomplete(err) => nom::Err::Incomplete(err), +// nom::Err::Error(err) => nom::Err::Error(op(err)), +// nom::Err::Failure(err) => nom::Err::Error(op(err)), +// }) +// } +// } macro_rules! syntax { - ($func_name: ident, $tag_string: literal, $token: expr) => { - pub(crate) fn $func_name<'a, 'e>( - input: Input<'a, 'e>, - ) -> nom::IResult, Token<'a>> { - map(tag($tag_string), |span: Input| { + ($func_name: ident, $tag_str: literal, $token: expr) => { + pub(crate) fn $func_name< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Token<'a>, E> { + map(context($tag_str, tag($tag_str)), |span: Input| { Token::new($token, span.input) })(input) } @@ -344,32 +351,41 @@ syntax!(at, "@", TokenKind::At); syntax!(exp_lower, "e", TokenKind::Exponent); syntax!(exp_upper, "E", TokenKind::Exponent); -pub(crate) fn exp<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - alt((exp_lower, exp_upper))(input) +pub(crate) fn exp<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context("lex exponent", alt((exp_lower, exp_upper)))(input) } -pub(crate) fn lex_punctuations<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - alt(( - arrow, - open_paren, - close_paren, - open_bracket, - close_bracket, - open_brace, - close_brace, - dot, - comma, - colon, - question_mark, - exclamation_mark, - tilde, - caret, - hash, - underscore, - at, - ))(input) +pub(crate) fn lex_punctuations< + 'a, + 's, + E: ParseError> + ContextError>, +>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "lex punctuations", + alt(( + arrow, + open_paren, + close_paren, + open_bracket, + close_bracket, + open_brace, + close_brace, + dot, + comma, + colon, + question_mark, + exclamation_mark, + tilde, + caret, + hash, + underscore, + at, + )), + )(input) } syntax!(less, "<", TokenKind::Less); @@ -383,34 +399,42 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - alt(( - less_equal, - greater_equal, - unequal, - less, - equal, - greater, - plus, - minus, - star, - slash, - ))(input) +pub(crate) fn lex_operators<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "lex operators", + alt(( + less_equal, + greater_equal, + unequal, + less, + equal, + greater, + plus, + minus, + star, + slash, + )), + )(input) } -// pub(crate) fn lex_unary_prefix_operators<'a, 'e>( -// input: Input<'a, 'e>, -// ) -> IResult, Token<'a>> { +// pub(crate) fn lex_unary_prefix_operators<'a, 's>( +// input: Input<'a, 's>, +// ) -> IResult, Token<'a>> { // alt((plus, minus))(input) // } -pub(crate) fn lex_ident<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - let (rest_input, ident) = recognize(pair( - alpha1, - many0(alt((alphanumeric1, tag("_"), tag("-")))), - ))(input)?; +pub(crate) fn lex_ident<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + let (rest_input, ident) = context( + "lex identifier", + recognize(pair( + alpha1, + many0(alt((alphanumeric1, tag("_"), tag("-")))), + )), + )(input)?; let token = match *ident.input.fragment() { "base" => Token::new(TokenKind::Base, ident.input), "prefix" => Token::new(TokenKind::Prefix, ident.input), @@ -422,57 +446,97 @@ pub(crate) fn lex_ident<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">"))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) +pub(crate) fn lex_iri<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "lex iri", + recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) } -pub(crate) fn lex_number<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - digit1(input) +pub(crate) fn lex_number<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context("lex number", digit1)(input) .map(|(rest_input, result)| (rest_input, Token::new(TokenKind::Number, result.input))) } -pub(crate) fn lex_string<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - recognize(delimited(tag("\""), is_not("\""), cut(tag("\""))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) +pub(crate) fn lex_string<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "lex string", + recognize(delimited(tag("\""), is_not("\""), cut(tag("\"")))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) } -pub(crate) fn lex_comment<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - recognize(tuple((tag("%"), many0(is_not("\n")), line_ending)))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) +pub(crate) fn lex_comment<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "comment", + recognize(tuple((tag("%"), many0(is_not("\n")), line_ending))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) } -pub(crate) fn lex_doc_comment<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) +pub(crate) fn lex_doc_comment< + 'a, + 's, + E: ParseError> + ContextError>, +>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "documentation comment", + recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending)))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) } -pub(crate) fn lex_toplevel_doc_comment<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) +pub(crate) fn lex_toplevel_doc_comment< + 'a, + 's, + E: ParseError> + ContextError>, +>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "top level documentation comment", + recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending)))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) } -pub(crate) fn lex_comments<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment))(input) +pub(crate) fn lex_comments<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "comments", + alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment)), + )(input) } -pub(crate) fn lex_whitespace<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) +pub(crate) fn lex_whitespace<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context("whitespace", multispace1)(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) } -pub(crate) fn lex_illegal<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) +pub(crate) fn lex_illegal<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context("illegal character", take(1usize))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) } -pub(crate) fn lex_tokens<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Vec>> { +pub(crate) fn lex_tokens<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Vec>, E> { all_consuming(many0(alt(( lex_iri, lex_operators, @@ -490,10 +554,12 @@ pub(crate) fn lex_tokens<'a, 'e>( }) } -pub(crate) fn skip_to_dot<'a, 'e>(input: Input<'a, 'e>) -> (Input<'a, 'e>, Token<'a>) { +pub(crate) fn skip_to_dot<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> (Input<'a, 's>, Token<'a>) { let (rest_input, error_input) = recognize(pair( take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), - tag("."), + opt(tag(".")), ))(input) .expect("Skipping to the next dot should not fail!"); ( @@ -522,18 +588,14 @@ mod tests { fn empty_input() { let input = Span::new(""); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(Eof, 0, 1, "")] ) } @@ -542,18 +604,14 @@ mod tests { fn base() { let input = Span::new("@base"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } @@ -562,18 +620,14 @@ mod tests { fn prefix() { let input = Span::new("@prefix"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Prefix, 1, 1, "prefix"), @@ -586,18 +640,14 @@ mod tests { fn output() { let input = Span::new("@output"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Output, 1, 1, "output"), @@ -610,18 +660,14 @@ mod tests { fn import() { let input = Span::new("@import"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Import, 1, 1, "import"), @@ -634,18 +680,14 @@ mod tests { fn export() { let input = Span::new("@export"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Export, 1, 1, "export"), @@ -658,18 +700,14 @@ mod tests { fn idents_with_keyword_prefix() { let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Ident, 1, 1, "baseA"), @@ -699,18 +737,14 @@ mod tests { fn tokenize() { let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "P"), T!(OpenParen, 1, 1, "("), @@ -741,18 +775,14 @@ mod tests { fn comment() { let input = Span::new(" % Some Comment\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " "), T!(Comment, 4, 1, "% Some Comment\n"), @@ -767,18 +797,14 @@ mod tests { fn ident() { let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "some_Ident"), T!(OpenParen, 10, 1, "("), @@ -796,18 +822,14 @@ mod tests { fn forbidden_ident() { let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Underscore, 0, 1, "_"), T!(Ident, 1, 1, "someIdent"), @@ -826,18 +848,14 @@ mod tests { fn iri() { let input = Span::new(""); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Eof, 31, 1, ""), @@ -849,18 +867,14 @@ mod tests { fn iri_pct_enc() { let input = Span::new("\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Whitespace, 37, 1, "\n"), @@ -875,18 +889,14 @@ mod tests { fn constraints() { let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "A"), T!(OpenParen, 1, 1, "("), @@ -919,18 +929,14 @@ mod tests { fn pct_enc_comment() { let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Comment, 0, 1, "%d4 this should be a comment,\n"), T!( @@ -949,18 +955,14 @@ mod tests { fn fact() { let input = Span::new("somePred(term1, term2)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "somePred"), T!(OpenParen, 8, 1, "("), @@ -979,18 +981,14 @@ mod tests { fn whitespace() { let input = Span::new(" \t \n\n\t \n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " \t \n\n\t \n"), T!(Eof, 12, 4, ""), @@ -1002,15 +1000,11 @@ mod tests { fn skip_to_dot() { let input = Span::new("some ?broken :- rule). A(Fact)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; - dbg!(super::skip_to_dot(input)); + dbg!(super::skip_to_dot::>(input)); } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index b06986d68..a8d9f82e8 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2430,25 +2430,26 @@ mod test { /// NEW PARSER pub mod new { + use std::borrow::BorrowMut; use std::cell::RefCell; use super::ast::{ atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, Wsoc, }; - use super::types::{Input, Label, ParserLabel}; + use super::types::{Input, ToRange}; use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, - lex_ident, lex_iri, lex_number, lex_string, lex_toplevel_doc_comment, - lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, - slash, star, tilde, underscore, unequal, Error, ParserState, Span, Token, + lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, + lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, slash, + star, tilde, underscore, unequal, Error, NewParseError, ParserState, Span, Token, TokenKind, }; - - use nom::combinator::{all_consuming, opt, recognize}; - use nom::error::{ParseError}; - use nom::sequence::{pair}; + use crate::io::parser::ast::AstNode; + use nom::combinator::{all_consuming, cut, map, opt, recognize}; + use nom::error::{context, ContextError, ErrorKind, ParseError}; + use nom::sequence::{delimited, pair}; use nom::Parser; use nom::{ branch::alt, @@ -2457,10 +2458,11 @@ pub mod new { sequence::tuple, IResult, }; + use nom_greedyerror::GreedyError; + use nom_supreme::error::{ErrorTree, StackContext}; fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { unsafe { - // dbg!(&input, &span, &rest_input); Span::new_from_raw_offset( input.location_offset(), @@ -2511,29 +2513,45 @@ pub mod new { // } // } - fn expect< - 'a, - 'e, - O: Copy, - E: ParseError>, - F: Parser, O, E>, - >( + // fn context<'a, 's, O, E>( + // mut f: impl FnMut(Input<'a, 's>) -> IResult, O, E>, + // context: ParserContext, + // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + // move |input| { + // let mut labels = *input.parser_state.labels.borrow_mut(); + // if let None = labels { + // labels = Some(Context { + // context: context.clone(), + // label: None, + // inner: vec![], + // }); + // labels + // } else { + // dbg!(&labels); + // labels + // }; + // f(input) + // } + // } + + fn expect<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( mut parser: F, error_msg: impl ToString, error_output: O, - errors: ParserState<'e>, - ) -> impl FnMut(Input<'a, 'e>) -> IResult, O, E> { + errors: ParserState<'s>, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { move |input| match parser.parse(input) { Ok(result) => Ok(result), Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { - let err = Error( - Position { + let err = Error { + pos: Position { offset: input.input.location_offset(), line: input.input.location_line(), column: input.input.get_utf8_column() as u32, }, - error_msg.to_string(), - ); + msg: error_msg.to_string(), + context: vec![], + }; errors.report_error(err); Ok((input, error_output)) } @@ -2541,116 +2559,231 @@ pub mod new { } } - fn recover<'a, 'e, E>( - mut parser: impl Parser, Statement<'a>, E>, + fn recover<'a, 's, E>( + mut parser: impl Parser, Statement<'a>, E>, error_msg: impl ToString, - errors: ParserState<'e>, - ) -> impl FnMut(Input<'a, 'e>) -> IResult, Statement<'a>, E> { - move |input: Input<'a, 'e>| match parser.parse(input) { + context: &'static str, + errors: ParserState<'s>, + ) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { + move |input: Input<'a, 's>| match parser.parse(input) { Ok(result) => Ok(result), Err(err) if input.input.is_empty() => Err(err), Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => { - let err = Error( - Position { + let err = Error { + pos: Position { offset: input.input.location_offset(), line: input.input.location_line(), column: input.input.get_utf8_column() as u32, }, - error_msg.to_string(), - ); + msg: error_msg.to_string(), + context: vec![context], + }; errors.report_error(err); - let (rest_input, token) = skip_to_dot(input); + let (rest_input, token) = skip_to_dot::>>(input); Ok((rest_input, Statement::Error(token))) } Err(err) => Err(err), } } - fn report_label<'a, 's, O, E>( - mut parser: impl Parser, O, E>, - label: ParserLabel, - ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + // fn report_label<'a, 's, O, E>( + // mut parser: impl nom::Parser, O, E>, + // label: ParserLabel, + // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + // move |input| match parser.parse(input) { + // Ok(result) => Ok(result), + // Err(err) => { + // match err { + // nom::Err::Incomplete(_) => (), + // nom::Err::Error(_) | nom::Err::Failure(_) => { + // if !input.input.is_empty() { + // input.parser_state.labels.borrow_mut().push(Label { + // label, + // pos: Position { + // offset: input.input.location_offset(), + // line: input.input.location_line(), + // column: input.input.get_utf8_column() as u32, + // }, + // }) + // }; + // } + // }; + // Err(err) + // } + // } + // } + + // fn report_error<'a, 's, O, E>( + // mut parser: impl nom::Parser, O, E>, + // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + // move |input| match parser.parse(input) { + // Ok(result) => { + // input.parser_state.labels.borrow_mut().inner.clear(); + // Ok(result) + // } + // Err(err) => { + // match err { + // nom::Err::Incomplete(_) => (), + // nom::Err::Error(_) | nom::Err::Failure(_) => { + // // println!("LABELS BEFORE REPORT!!!!: {:#?}", input.parser_state.labels); + // let mut furthest_errors: Vec = Vec::new(); + // let labels = + // as Clone>::clone(&input.parser_state.labels.borrow()) + // .into_iter(); + // for label in labels { + // if let Some(last) = furthest_errors.last() { + // if label.pos.offset >= (*last).0.offset { + // let err = + // Error(label.pos, format!("expected {:?}", label.label)); + // furthest_errors.push(err); + // } + // } else { + // let err = Error(label.pos, format!("expected {:?}", label.label)); + // furthest_errors.push(err); + // }; + // } + // for err in furthest_errors { + // input.parser_state.report_error(err) + // } + // // for label in furthest_errors { + // // println!( + // // "Syntax error: Parser got stuck at line {} column {}, expected {:?}", + // // label.position.line, label.position.column, label.label + // // ); + // // println!( + // // "\n{}", + // // input + // // .parser_state + // // .source + // // .fragment() + // // .lines() + // // .collect::>() + // // .get((label.position.line - 1) as usize) + // // .unwrap() + // // ); + // // println!("{1:>0$}", label.position.column, "^"); + // // } + // } + // }; + // Err(err) + // } + // } + // } + fn report_error<'a, 's, O>( + mut parser: impl nom::Parser, O, ErrorTree>>, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, ErrorTree>> { move |input| match parser.parse(input) { Ok(result) => Ok(result), - Err(err) => { - match err { + Err(e) => { + if input.input.is_empty() { + return Err(e); + }; + match &e { nom::Err::Incomplete(_) => (), - nom::Err::Error(_) | nom::Err::Failure(_) => { - if !input.input.is_empty() { - input.parser_state.labels.borrow_mut().push(Label { - label, - pos: Position { - offset: input.input.location_offset(), - line: input.input.location_line(), - column: input.input.get_utf8_column() as u32, - }, - }) - }; + nom::Err::Error(err) | nom::Err::Failure(err) => { + let (deepest_pos, errors) = get_deepest_errors(err); + for error in errors { + input.parser_state.report_error(error); + } + // let error = Error(deepest_pos, format!("")); + // // input.parser_state.report_error(error) } }; - Err(err) + Err(e) } } } - fn report_error<'a, 's, O, E>( - mut parser: impl Parser, O, E>, - ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - move |input| match parser.parse(input) { - Ok(result) => { - input.parser_state.labels.borrow_mut().clear(); - Ok(result) + fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { + match e { + ErrorTree::Base { location, kind } => { + let span = location.input; + let err_pos = Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }; + ( + err_pos, + vec![Error { + pos: err_pos, + msg: format!("{}", e), + context: Vec::new(), + }], + ) } - Err(err) => { - match err { - nom::Err::Incomplete(_) => (), - nom::Err::Error(_) | nom::Err::Failure(_) => { - // println!("LABELS BEFORE REPORT!!!!: {:#?}", input.parser_state.labels); - let mut furthest_errors: Vec = Vec::new(); - let labels = - as Clone>::clone(&input.parser_state.labels.borrow()) - .into_iter(); - for label in labels { - if let Some(last) = furthest_errors.last() { - if label.pos.offset >= last.0.offset { - let err = - Error(label.pos, format!("expected {:?}", label.label)); - furthest_errors.push(err); - } - } else { - let err = Error(label.pos, format!("expected {:?}", label.label)); - furthest_errors.push(err); - }; + ErrorTree::Stack { base, contexts } => { + // let mut err_pos = Position::default(); + match &**base { + ErrorTree::Base { location, kind } => { + let span = location.input; + let err_pos = Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }; + ( + err_pos, + vec![Error { + pos: err_pos, + msg: format!("{}", base), + context: context_strs(contexts), + }], + ) + } + ErrorTree::Stack { base, contexts } => { + let (pos, mut deepest_errors) = get_deepest_errors(base); + let contexts = context_strs(contexts); + dbg!(&deepest_errors); + for mut error in &mut deepest_errors { + error.context.append(&mut contexts.clone()); } - for err in furthest_errors { - input.parser_state.report_error(err) + dbg!(&deepest_errors); + (pos, deepest_errors) + } + ErrorTree::Alt(error_tree) => { + let (pos, mut deepest_errors) = get_deepest_errors(base); + let contexts = context_strs(contexts); + for mut error in &mut deepest_errors { + error.context.append(&mut contexts.clone()); } - // for label in furthest_errors { - // println!( - // "Syntax error: Parser got stuck at line {} column {}, expected {:?}", - // label.position.line, label.position.column, label.label - // ); - // println!( - // "\n{}", - // input - // .parser_state - // .source - // .fragment() - // .lines() - // .collect::>() - // .get((label.position.line - 1) as usize) - // .unwrap() - // ); - // println!("{1:>0$}", label.position.column, "^"); - // } + (pos, deepest_errors) } - }; - Err(err) + } + } + ErrorTree::Alt(vec) => { + let mut return_vec: Vec = Vec::new(); + let mut deepest_pos = Position::default(); + for error in vec { + let (pos, mut deepest_errors) = get_deepest_errors(error); + if pos > deepest_pos { + deepest_pos = pos; + return_vec.clear(); + return_vec.append(&mut deepest_errors); + } else if pos == deepest_pos { + return_vec.append(&mut deepest_errors); + } + } + (deepest_pos, return_vec) } } } - fn wsoc0<'a, 's>(input: Input<'a, 's>) -> IResult, Option>> { + fn context_strs( + contexts: &Vec<(Input<'_, '_>, StackContext<&'static str>)>, + ) -> Vec<&'static str> { + contexts + .iter() + .map(|(_, c)| match c { + StackContext::Kind(k) => todo!(), + StackContext::Context(str) => *str, + }) + .collect() + } + + fn wsoc0<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Option>, E> { many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { if vec.is_empty() { (rest_input, None) @@ -2666,7 +2799,9 @@ pub mod new { }) } - fn wsoc1<'a, 's>(input: Input<'a, 's>) -> IResult, Wsoc<'a>> { + fn wsoc1<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Wsoc<'a>, E> { many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { ( rest_input, @@ -2679,74 +2814,82 @@ pub mod new { } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a>(input: Input<'a, '_>) -> (Program<'a>, Vec) { - let (rest_input, (tl_doc_comment, statements)) = all_consuming(pair( - opt(lex_toplevel_doc_comment), - many1(recover( - report_error(alt(( - // TODO: Discuss wether directives should only get parsed at the beginning of the source file - report_label(parse_rule, ParserLabel::Rule), - report_label(parse_fact, ParserLabel::Fact), - parse_whitespace, - report_label(parse_directive, ParserLabel::Directive), - parse_comment, - ))), - "failed to parse a statement", - input.parser_state, - )), - ))(input) - .expect("Parser can't fail. If it fails it's a bug! Pleas report it. Got"); - ( - Program { - span: input.input, - tl_doc_comment, - statements, - }, - rest_input.parser_state.errors.take(), - ) + fn parse_program<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> (Program<'a>, Vec) { + let result = context( + "program", + pair( + opt(lex_toplevel_doc_comment::>>), + many0(recover( + report_error(alt(( + // TODO: Discuss wether directives should only get parsed at the beginning of the source file + parse_rule, + parse_fact, + parse_whitespace, + parse_directive, + parse_comment, + ))), + "failed to parse statement", + "program", + input.parser_state, + )), + ), + )(input); + match result { + Ok((rest_input, (tl_doc_comment, statements))) => { + if !rest_input.input.is_empty() { + panic!("Parser did not consume all input. This is considered a bug. Please report it. Unparsed input is: {:?}", rest_input); + }; + ( + Program { + span: input.input, + tl_doc_comment, + statements, + }, + rest_input.parser_state.errors.take(), + ) + } + Err(e) => panic!( + "Parser can't fail. If it fails it's a bug! Please report it. Got: {:?}", + e + ), + } } pub fn parse_program_str(input: &str) -> (Program<'_>, Vec) { let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input: Span::new(input), parser_state, }; - parse_program(input) + parse_program::>>(input) } /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + fn parse_whitespace<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) } /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + fn parse_comment<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) } /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { - dbg!(&input.parser_state.labels); - tuple(( - opt(lex_doc_comment), - parse_normal_atom, - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + fn parse_fact<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { + // dbg!(&input.parser_state.labels); + context( + "fact", + tuple((opt(lex_doc_comment), parse_normal_atom, wsoc0, dot)), + )(input) .map(|(rest_input, (doc_comment, atom, ws, dot))| { ( rest_input, @@ -2762,25 +2905,22 @@ pub mod new { } /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { - tuple(( - opt(lex_doc_comment), - parse_head, - wsoc0, - report_label(arrow, ParserLabel::Arrow), - wsoc0, - parse_body, - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + fn parse_rule<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { + context( + "rule", + tuple(( + opt(lex_doc_comment), + parse_head, + wsoc0, + arrow, + wsoc0, + parse_body, + wsoc0, + dot, + )), + )(input) .map( |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { ( @@ -2802,48 +2942,54 @@ pub mod new { } /// Parse the head atoms of a rule. - fn parse_head<'a, 'e>(input: Input<'a, 'e>) -> IResult, List<'a, Atom<'a>>> { - parse_list(parse_head_atoms)(input) + fn parse_head<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, List<'a, Atom<'a>>, E> { + context("rule head", parse_list(parse_head_atoms))(input) } /// Parse the body atoms of a rule. - fn parse_body<'a, 'e>(input: Input<'a, 'e>) -> IResult, List<'a, Atom<'a>>> { - parse_list(parse_body_atoms)(input) + fn parse_body<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, List<'a, Atom<'a>>, E> { + context("rule body", parse_list(parse_body_atoms))(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { - alt(( - parse_base_directive, - parse_prefix_directive, - parse_import_directive, - parse_export_directive, - parse_output_directive, - ))(input) + fn parse_directive<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { + context( + "directive", + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, + )), + )(input) .map(|(rest, directive)| (rest, Statement::Directive(directive))) } /// Parse the base directive. - fn parse_base_directive<'a, 'e>(input: Input<'a, 'e>) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Base), + fn parse_base_directive<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "base directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Base), + )), + wsoc0, + lex_iri, + wsoc0, + dot, )), - wsoc0, - report_label(lex_iri, ParserLabel::Iri), - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { ( rest_input, @@ -2864,30 +3010,29 @@ pub mod new { } /// Parse the prefix directive. - fn parse_prefix_directive<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), + fn parse_prefix_directive< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "prefix directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), + )), + wsoc0, + recognize(pair(opt(lex_ident), colon)), + wsoc0, + lex_iri, + wsoc0, + dot, )), - wsoc0, - report_label(recognize(pair(lex_ident, colon)), ParserLabel::Prefix), - wsoc0, - report_label(lex_iri, ParserLabel::Iri), - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { ( @@ -2915,32 +3060,31 @@ pub mod new { } /// Parse the import directive. - fn parse_import_directive<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Import), + fn parse_import_directive< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "import directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Import), + )), + wsoc1, + lex_ident, + wsoc0, + arrow, + wsoc0, + parse_map, + wsoc0, + dot, )), - wsoc1, - report_label(lex_ident, ParserLabel::Identifier), - wsoc0, - report_label(arrow, ParserLabel::Arrow), - wsoc0, - parse_map, - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( @@ -2967,32 +3111,31 @@ pub mod new { } /// Parse the export directive. - fn parse_export_directive<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Export), + fn parse_export_directive< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "export directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Export), + )), + wsoc1, + lex_ident, + wsoc0, + arrow, + wsoc0, + parse_map, + wsoc0, + dot, )), - wsoc1, - report_label(lex_ident, ParserLabel::Identifier), - wsoc0, - report_label(arrow, ParserLabel::Arrow), - wsoc0, - parse_map, - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( @@ -3019,28 +3162,27 @@ pub mod new { } /// Parse the output directive. - fn parse_output_directive<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Output), + fn parse_output_directive< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "output directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + )), + wsoc1, + opt(parse_list(lex_ident)), + wsoc0, + dot, )), - wsoc1, - opt(parse_list(lex_ident)), - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { ( @@ -3063,9 +3205,9 @@ pub mod new { } // /// Parse a list of `ident1, ident2, …` - // fn parse_identifier_list<'a, 'e>( - // input: Input<'a, 'e>, - // ) -> IResult, List<'a, Token<'a>>> { + // fn parse_identifier_list<'a, 's, E: ParseError> + ContextError>>( + // input: Input<'a, 's>, + // ) -> IResult, List<'a, Token<'a>>, E> { // pair( // lex_ident, // many0(tuple(( @@ -3087,92 +3229,119 @@ pub mod new { // }) // } - fn parse_list<'a, 'e, T>( - parse_t: fn(Input<'a, 'e>) -> IResult, T>, - ) -> impl Fn(Input<'a, 'e>) -> IResult, List<'a, T>> { - move |input: Input<'a, 'e>| { - pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t))))(input).map( - |(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input.input, rest_input.input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }, - ) + fn parse_list<'a, 's, T, E: ParseError> + ContextError>>( + parse_t: fn(Input<'a, 's>) -> IResult, T, E>, + ) -> impl Fn(Input<'a, 's>) -> IResult, List<'a, T>, E> { + move |input: Input<'a, 's>| { + context( + "list", + pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t)))), + )(input) + .map(|(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input.input, rest_input.input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) + }) } } /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) + fn parse_head_atoms<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context( + "rule head atoms", + alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), + )(input) } /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - alt(( - parse_normal_atom, - parse_negative_atom, - parse_infix_atom, - parse_map_atom, - ))(input) + fn parse_body_atoms<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context( + "rule body atoms", + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, + )), + )(input) } /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - parse_named_tuple(input) + fn parse_normal_atom<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context("positive atom", parse_named_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { - ( - rest_input, - Atom::Negative { - span: outer_span(input.input, rest_input.input), - neg: tilde, - atom: named_tuple, - }, - ) - }) - } - - /// Parse an "infix atom" of the form `term1 term2`. - /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term))(input).map( - |(rest_input, (lhs, ws1, operation, ws2, rhs))| { + fn parse_negative_atom<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context("negative atom", pair(tilde, parse_named_tuple))(input).map( + |(rest_input, (tilde, named_tuple))| { ( rest_input, - Atom::InfixAtom { + Atom::Negative { span: outer_span(input.input, rest_input.input), - lhs, - ws1, - operation, - ws2, - rhs, + neg: tilde, + atom: named_tuple, }, ) }, ) } + /// Parse an "infix atom" of the form `term1 term2`. + /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. + fn parse_infix_atom<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context( + "infix atom", + tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), + )(input) + .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { + ( + rest_input, + Atom::InfixAtom { + span: outer_span(input.input, rest_input.input), + lhs, + ws1, + operation, + ws2, + rhs, + }, + ) + }) + } + /// Parse a tuple with an optional name, like `ident(term1, term2)` /// or just `(int, int, skip)`. - fn parse_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { - tuple(( - opt(lex_ident), - wsoc0, - open_paren, - wsoc0, - opt(parse_list(parse_term)), - wsoc0, - report_label(close_paren, ParserLabel::CloseParen), - ))(input) + fn parse_tuple<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Tuple<'a>, E> { + context( + "tuple", + tuple(( + opt(lex_ident), + wsoc0, + open_paren, + wsoc0, + opt(parse_list(parse_term)), + wsoc0, + close_paren, + )), + )(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( @@ -3194,16 +3363,21 @@ pub mod new { /// Parse a named tuple. This function is like `parse_tuple` with the difference, /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { - tuple(( - lex_ident, - wsoc0, - report_label(open_paren, ParserLabel::OpenParen), - wsoc0, - opt(parse_list(parse_term)), - wsoc0, - report_label(close_paren, ParserLabel::CloseParen), - ))(input) + fn parse_named_tuple<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Tuple<'a>, E> { + context( + "named tuple", + tuple(( + lex_ident, + wsoc0, + open_paren, + wsoc0, + opt(parse_list(parse_term)), + wsoc0, + close_paren, + )), + )(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( @@ -3225,16 +3399,21 @@ pub mod new { /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. /// Inside the curly braces ist a list of pairs. - fn parse_map<'a, 'e>(input: Input<'a, 'e>) -> IResult, Map<'a>> { - tuple(( - opt(lex_ident), - wsoc0, - open_brace, - wsoc0, - opt(parse_list(parse_pair)), - wsoc0, - report_label(close_brace, ParserLabel::CloseBrace), - ))(input) + fn parse_map<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Map<'a>, E> { + context( + "map", + tuple(( + opt(lex_ident), + wsoc0, + open_brace, + wsoc0, + opt(parse_list(parse_pair)), + wsoc0, + close_brace, + )), + )(input) .map( |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { ( @@ -3255,15 +3434,17 @@ pub mod new { } /// Parse a map in an atom position. - fn parse_map_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + fn parse_map_atom<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) } // /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - // fn parse_pair_list<'a, 'e, E: ParseError> + ContextError>>( - // input: Input<'a, 'e>, + // fn parse_pair_list<'a, 's, E: ParseError> + ContextError>>( + // input: Input<'a, 's>, // state: Errors, - // ) -> IResult, Option, Term<'a>>>>, E> { + // ) -> IResult, Option, Term<'a>>>>, E> { // context( // "parse pair list", // opt(pair( @@ -3293,36 +3474,31 @@ pub mod new { // } /// Parse a pair of the form `key = value`. - fn parse_pair<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Pair<'a, Term<'a>, Term<'a>>> { - tuple(( - parse_term, - wsoc0, - report_label(equal, ParserLabel::Equal), - wsoc0, - parse_term, - ))(input) - .map(|(rest_input, (key, ws1, equal, ws2, value))| { - ( - rest_input, - Pair { - span: outer_span(input.input, rest_input.input), - key, - ws1, - equal, - ws2, - value, - }, - ) - }) + fn parse_pair<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Pair<'a, Term<'a>, Term<'a>>, E> { + context("pair", tuple((parse_term, wsoc0, equal, wsoc0, parse_term)))(input).map( + |(rest_input, (key, ws1, equal, ws2, value))| { + ( + rest_input, + Pair { + span: outer_span(input.input, rest_input.input), + key, + ws1, + equal, + ws2, + value, + }, + ) + }, + ) } // /// Parse a list of terms of the form `term1, term2, …`. - // fn parse_term_list<'a, 'e, E: ParseError> + ContextError>>( - // input: Input<'a, 'e>, + // fn parse_term_list<'a, 's, E: ParseError> + ContextError>>( + // input: Input<'a, 's>, // state: Errors, - // ) -> IResult, List<'a, Term<'a>>, E> { + // ) -> IResult, List<'a, Term<'a>>, E> { // context( // "parse term list", // pair( @@ -3350,39 +3526,50 @@ pub mod new { /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - report_error(alt(( - parse_binary_term, - parse_tuple_term, - // parse_unary_prefix_term, - parse_map_term, - parse_primitive_term, - parse_variable, - parse_existential, - parse_aggregation_term, - parse_blank, - )))(input) + fn parse_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "term", + alt(( + parse_binary_term, + parse_tuple_term, + // parse_unary_prefix_term, + parse_map_term, + parse_primitive_term, + parse_variable, + parse_existential, + parse_aggregation_term, + parse_blank, + )), + )(input) } /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - report_error(alt(( - parse_rdf_literal, - parse_ident, - parse_iri, - parse_number, - parse_string, - )))(input) + fn parse_primitive_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "primitive term", + alt(( + parse_rdf_literal, + parse_ident, + parse_iri, + parse_number, + parse_string, + )), + )(input) .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } /// Parse a rdf literal e.g. "2023-06-19"^^ - fn parse_rdf_literal<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { - tuple(( - lex_string, - recognize(pair(caret, caret)), - report_label(lex_iri, ParserLabel::Iri), - ))(input) + fn parse_rdf_literal<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { + context( + "RDF Literal", + tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), + )(input) .map(|(rest_input, (string, carets, iri))| { ( rest_input, @@ -3399,26 +3586,37 @@ pub mod new { }) } - fn parse_ident<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + fn parse_ident<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { lex_ident(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) } - fn parse_iri<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + fn parse_iri<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) } - fn parse_number<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { - report_error(alt((parse_decimal, parse_integer)))(input) + fn parse_number<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { + context("number", alt((parse_decimal, parse_integer)))(input) } - fn parse_decimal<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { - tuple(( - opt(alt((plus, minus))), - opt(lex_number), - dot, - lex_number, - opt(parse_exponent), - ))(input) + fn parse_decimal<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { + context( + "decimal", + tuple(( + opt(alt((plus, minus))), + opt(lex_number), + dot, + lex_number, + opt(parse_exponent), + )), + )(input) .map(|(rest_input, (sign, before, dot, after, exponent))| { ( rest_input, @@ -3434,37 +3632,44 @@ pub mod new { }) } - fn parse_integer<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { - pair(opt(alt((plus, minus))), lex_number)(input).map(|(rest_input, (sign, number))| { - ( - rest_input, - Primitive::Number { - span: outer_span(input.input, rest_input.input), - sign, - before: None, - dot: None, - after: number, - exponent: None, - }, - ) - }) + fn parse_integer<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { + context("integer", pair(opt(alt((plus, minus))), lex_number))(input).map( + |(rest_input, (sign, number))| { + ( + rest_input, + Primitive::Number { + span: outer_span(input.input, rest_input.input), + sign, + before: None, + dot: None, + after: number, + exponent: None, + }, + ) + }, + ) } - fn parse_exponent<'a, 'e>(input: Input<'a, 'e>) -> IResult, Exponent<'a>> { - tuple(( - exp, - opt(alt((plus, minus))), - report_label(lex_number, ParserLabel::Number), - ))(input) + fn parse_exponent<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Exponent<'a>, E> { + context( + "exponent", + tuple((exp, opt(alt((plus, minus))), lex_number)), + )(input) .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) } - fn parse_string<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + fn parse_string<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) } // /// Parse an unary term. - // fn parse_unary_prefix_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + // fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { // pair(lex_unary_prefix_operators, parse_term)(input).map( // |(rest_input, (operation, term))| { // ( @@ -3480,10 +3685,15 @@ pub mod new { // } /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - pair( - parse_arithmetic_product, - opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), + fn parse_binary_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "binary term", + pair( + parse_arithmetic_product, + opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), + ), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3506,15 +3716,24 @@ pub mod new { /// Parse an arithmetic product, i.e. an expression involving /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - pair( - parse_arithmetic_factor, - opt(tuple(( - wsoc0, - alt((star, slash)), - wsoc0, - parse_arithmetic_product, - ))), + fn parse_arithmetic_product< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "arithmetic product", + pair( + parse_arithmetic_factor, + opt(tuple(( + wsoc0, + alt((star, slash)), + wsoc0, + parse_arithmetic_product, + ))), + ), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3535,14 +3754,23 @@ pub mod new { }) } - fn parse_arithmetic_factor<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - report_error(alt(( - parse_tuple_term, - parse_aggregation_term, - parse_primitive_term, - parse_variable, - parse_existential, - )))(input) + fn parse_arithmetic_factor< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "arithmetic factor", + alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + )), + )(input) } // fn fold_arithmetic_expression<'a>( @@ -3567,15 +3795,24 @@ pub mod new { // } /// Parse an aggregation term of the form `#sum(…)`. - fn parse_aggregation_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - tuple(( - recognize(pair(hash, lex_ident)), - report_label(open_paren, ParserLabel::OpenParen), - wsoc0, - parse_list(parse_term), - wsoc0, - report_label(close_paren, ParserLabel::CloseParen), - ))(input) + fn parse_aggregation_term< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "aggregation term", + tuple(( + recognize(pair(hash, lex_ident)), + open_paren, + wsoc0, + parse_list(parse_term), + wsoc0, + close_paren, + )), + )(input) .map( |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { ( @@ -3598,28 +3835,38 @@ pub mod new { } /// Parse a `_` - fn parse_blank<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - underscore(input).map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) + fn parse_blank<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context("blank", underscore)(input) + .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) } /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. - fn parse_tuple_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - parse_tuple(input) + fn parse_tuple_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context("tuple term", parse_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } /// Parse a map as a term. - fn parse_map_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) + fn parse_map_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context("map term", parse_map)(input) + .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } /// Parse a variable. - fn parse_variable<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - recognize(pair( - question_mark, - report_label(lex_ident, ParserLabel::Identifier), - ))(input) + fn parse_variable<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "universal variable", + recognize(pair(question_mark, lex_ident)), + )(input) .map(|(rest_input, var)| { ( rest_input, @@ -3632,11 +3879,13 @@ pub mod new { } /// Parse an existential variable. - fn parse_existential<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - recognize(pair( - exclamation_mark, - report_label(lex_ident, ParserLabel::Identifier), - ))(input) + fn parse_existential<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "existential variable", + recognize(pair(exclamation_mark, lex_ident)), + )(input) .map(|(rest_input, existential)| { ( rest_input, @@ -3648,27 +3897,26 @@ pub mod new { }) } - // Order of functions is important, because of ordered choice and no backtracking + // Order of parser compinator is important, because of ordered choice and no backtracking /// Parse the operator for an infix atom. - fn parse_operation_token<'a, 'e>(input: Input<'a, 'e>) -> IResult, Token<'a>> { - report_error(alt(( - less_equal, - greater_equal, - equal, - unequal, - less, - greater, - )))(input) + fn parse_operation_token<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Token<'a>, E> { + context( + "operators", + alt((less_equal, greater_equal, equal, unequal, less, greater)), + )(input) } #[cfg(test)] mod tests { use std::{ cell::RefCell, - collections::{HashMap, HashSet}, + collections::{BTreeMap, HashMap, HashSet}, }; use nom::error::{convert_error, VerboseError}; + use nom_supreme::error::ErrorTree; use super::*; use crate::io::{ @@ -3692,9 +3940,9 @@ pub mod new { }; } - fn convert_located_span_error<'a, 'e>( + fn convert_located_span_error<'a, 's>( input: Span<'a>, - err: VerboseError>, + err: VerboseError>, ) -> String { convert_error( *(input.fragment()), @@ -3715,18 +3963,14 @@ pub mod new { // }; let input = Span::new("a(B,C)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program(input).0, + parse_program::>(input).0, Program { span: input.input, tl_doc_comment: None, @@ -3786,18 +4030,14 @@ pub mod new { r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, ); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program(input).0, + parse_program::>(input).0, Program { tl_doc_comment: None, span: input.input, @@ -4070,18 +4310,14 @@ pub mod new { fn fact_with_ws() { let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program(input).0, + parse_program::>(input).0, Program { span: input.input, tl_doc_comment: None, @@ -4197,17 +4433,13 @@ limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, ); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; // let ast = parse_program::>(input); - let (ast, _) = parse_program(input); + let (ast, _) = parse_program::>(input); println!("{}", ast); assert_eq!( { @@ -4229,29 +4461,26 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters let str = std::fs::read_to_string(file).expect("testfile not found"); let input = Span::new(str.as_str()); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_program::>(input); - let (ast, errors) = parse_program(input); + let (ast, errors) = parse_program::>>(input); println!("{}\n\n{:#?}", ast, errors); - let mut error_map: HashMap> = HashMap::new(); + let mut error_map: BTreeMap> = BTreeMap::new(); for error in errors { - if let Some(set) = error_map.get_mut(&error.0) { - set.insert(error.1); + if let Some(set) = error_map.get_mut(&error.pos) { + set.insert(error.msg); } else { let mut set = HashSet::new(); - set.insert(error.1); - error_map.insert(error.0, set); + set.insert(error.msg); + error_map.insert(error.pos, set); }; } dbg!(&error_map); + println!("\n\n"); // assert!(false); let lines: Vec<_> = str.lines().collect(); for (pos, str) in error_map { @@ -4271,17 +4500,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("42"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Primitive(Primitive::Number { @@ -4298,17 +4523,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("35+7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4339,17 +4560,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("6*7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4380,17 +4597,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("49-7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4421,17 +4634,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("84/2"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4462,17 +4671,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("5*7+7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4517,17 +4722,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("7+5*7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4572,17 +4773,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("(15+3*2-(7+35)*8)/3"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); // match result { // Ok(ast) => { @@ -4744,17 +4941,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("15+3*2-(7+35)*8/3"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4898,17 +5091,15 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("e42"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // parse_exponent::>(input) - parse_exponent(input).unwrap().1 + parse_exponent::>(input) + .unwrap() + .1 }, Exponent { e: T! {TokenKind::Exponent, 0,1,"e"}, @@ -4922,16 +5113,12 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters fn missing_dot() { let input = Span::new("some(Fact\nSome other, Fact.\nthird(fact)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; - let result = parse_program(input); + let result = parse_program::>(input); println!("{}\n\n{:#?}", result.0, result.1); // assert!(false); } @@ -4940,17 +5127,27 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters fn wsoc() { let input = Span::new(" \t\n % first comment\n % second comment\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, }; + dbg!(wsoc0::>(input)); + dbg!(wsoc1::>(input)); + } + + #[test] + fn debug_test() { + let str = "asd"; + let input = Span::new(str); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; - dbg!(wsoc0(input)); - dbg!(wsoc1(input)); + let result = parse_program::>>(input); + dbg!(&result); } } } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 30d3a8aa6..41c767ec7 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -32,13 +32,37 @@ pub trait AstNode: std::fmt::Debug + Display + Sync { fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode>; } -// TODO: tidy up PartialOrd and Ord implementation -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, Hash)] pub struct Position { pub offset: usize, pub line: u32, pub column: u32, } +impl PartialEq for Position { + fn eq(&self, other: &Self) -> bool { + self.offset.eq(&other.offset) + } +} +impl Eq for Position {} +impl PartialOrd for Position { + fn partial_cmp(&self, other: &Self) -> Option { + self.offset.partial_cmp(&other.offset) + } +} +impl Ord for Position { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.offset.cmp(&other.offset) + } +} +impl Default for Position { + fn default() -> Self { + Position { + offset: 0, + line: 1, + column: 1, + } + } +} /// Whitespace or Comment token #[derive(Debug, Clone, PartialEq)] @@ -227,9 +251,16 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { } mod test { - - - + use super::*; + use super::{ + atom::Atom, + directive::Directive, + program::Program, + statement::Statement, + term::{Primitive, Term}, + tuple::Tuple, + }; + use crate::io::lexer::{Span, TokenKind}; macro_rules! s { ($offset:literal,$line:literal,$str:literal) => { diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 6dea14af0..7031f1d26 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -805,35 +805,23 @@ where } } -pub(crate) trait ToRange { - fn to_range(&self) -> Range; +impl nom_greedyerror::Position for Input<'_, '_> { + fn position(&self) -> usize { + nom_greedyerror::Position::position(&self.input) + } } -#[derive(Debug, Clone, Copy, PartialEq)] -pub(crate) enum ParserLabel { - Rule, - Fact, - Directive, - Dot, - Arrow, - // Head, - // Body, - Comma, - Iri, - Prefix, - Identifier, - OpenParen, - CloseParen, - OpenBrace, - CloseBrace, - OpenBracket, - ClosePracket, - Equal, - Number, -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub(crate) struct Label { - pub(crate) label: ParserLabel, - pub(crate) pos: Position, +impl std::fmt::Display for Input<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "line {}, column {}", + self.input.location_line(), + self.input.get_utf8_column() + ) + } +} + +pub(crate) trait ToRange { + fn to_range(&self) -> Range; } diff --git a/testfile2.rls b/testfile2.rls index 32ee5c5af..95963d67b 100644 --- a/testfile2.rls +++ b/testfile2.rls @@ -1,11 +1,14 @@ % Facts: -father(alice, bob). -mother(bob, carla). -father(bob, darius). -mother(alice, carla). +father( % father predicate means, that 'alice has father bob' + alice, + bob). +mother bob, carla). +father(bob darius). +mother(alice, carla . % Rules: -parent(?X, ?Y) :- mother(?X, ?Y). +parent(?X, ?Y) :- mother(?X, ?Y) parent(?X, ?Y) :- father(?X, ?Y). -parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . +parent( ?X , ?Y :- ~sibling( ?X , ?Y ) . + From c2fdc1efac8043e4ffdd857e305155ed6f352832 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 29 May 2024 14:57:56 +0200 Subject: [PATCH 044/214] Fix errors caused by Error struct change --- nemo-language-server/src/language_server.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 35acce8c6..e39252745 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -82,21 +82,21 @@ impl Backend { let diagnostics = errors .into_iter() .map(|error| Diagnostic { - message: error.1, + message: error.msg, range: Range::new( line_col_to_position( &line_index, LineCol { - line: error.0.line - 1, - col: error.0.column - 1, + line: error.pos.line - 1, + col: error.pos.column - 1, }, ) .unwrap(), line_col_to_position( &line_index, LineCol { - line: error.0.line - 1, - col: error.0.column - 1 + 1, + line: error.pos.line - 1, + col: error.pos.column - 1 + 1, }, ) .unwrap(), From fe80d865ac7e5b2cbe14fe9fbf056d2f64ff402d Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 31 May 2024 08:53:35 +0200 Subject: [PATCH 045/214] Make context parser compinator input generic and add Context enum --- nemo/src/io/lexer.rs | 325 ++++++++++++++------- nemo/src/io/parser.rs | 553 ++++++++++++++++++------------------ nemo/src/io/parser/types.rs | 6 + 3 files changed, 492 insertions(+), 392 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index cd2aa1cd9..f976edb49 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -2,40 +2,132 @@ use std::{cell::RefCell, ops::Range}; +use super::parser::new::context; use nom::{ branch::alt, bytes::complete::{is_not, tag, take, take_till}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, combinator::{all_consuming, cut, map, opt, recognize}, - error::{context, ContextError, ErrorKind, ParseError}, + error::ParseError, multi::{many0, many1}, sequence::{delimited, pair, tuple}, IResult, }; -use nom_greedyerror::GreedyError; use nom_locate::LocatedSpan; +use nom_supreme::{context::ContextError, error::GenericErrorTree}; use tower_lsp::lsp_types::SymbolKind; -#[derive(Debug)] -pub(crate) enum NewParseError { - MissingWhitespace, - Rule, +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) enum Context { + Tag(&'static str), + Exponent, + Punctuations, + Operators, + Identifier, + Iri, + Number, + String, + Comment, + DocComment, + TlDocComment, + Comments, + Whitespace, + Illegal, + Program, Fact, + Rule, + RuleHead, + RuleBody, Directive, - Comment, - SyntaxError(String), - MissingTlDocComment, + DirectiveBase, + DirectivePrefix, + DirectiveImport, + DirectiveExport, + DirectiveOutput, + List, + HeadAtoms, + BodyAtoms, + PositiveAtom, + NegativeAtom, + InfixAtom, + Tuple, + NamedTuple, + Map, + Pair, + Term, + TermPrivimitive, + TermBinary, + TermAggregation, + TermTuple, + TermMap, + RdfLiteral, + Decimal, + Integer, + ArithmeticProduct, + ArithmeticFactor, + Blank, + UniversalVariable, + ExistentialVariable, } -impl ParseError> for NewParseError { - fn from_error_kind(input: Input, kind: nom::error::ErrorKind) -> Self { - NewParseError::SyntaxError(kind.description().to_string()) - } - - fn append(_: Input, _: nom::error::ErrorKind, other: Self) -> Self { - other +impl std::fmt::Display for Context { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Context::Tag(c) => write!(f, "{}", c), + Context::Exponent => write!(f, "exponent"), + Context::Punctuations => write!(f, "punctuations"), + Context::Operators => write!(f, "operators"), + Context::Identifier => write!(f, "identifier"), + Context::Iri => write!(f, "lex iri"), + Context::Number => write!(f, "lex number"), + Context::String => write!(f, "lex string"), + Context::Comment => write!(f, "lex comment"), + Context::DocComment => write!(f, "lex documentation comment"), + Context::TlDocComment => write!(f, "lex top level documentation comment"), + Context::Comments => write!(f, "comments"), + Context::Whitespace => write!(f, "lex whitespace"), + Context::Illegal => write!(f, "lex illegal character"), + Context::Program => write!(f, "program"), + Context::Fact => write!(f, "fact"), + Context::Rule => write!(f, "rule"), + Context::RuleHead => write!(f, "rule head"), + Context::RuleBody => write!(f, "rule body"), + Context::Directive => write!(f, "directive"), + Context::DirectiveBase => write!(f, "base directive"), + Context::DirectivePrefix => write!(f, "prefix directive"), + Context::DirectiveImport => write!(f, "import directive"), + Context::DirectiveExport => write!(f, "export directive"), + Context::DirectiveOutput => write!(f, "output directive"), + Context::List => write!(f, "list"), + Context::HeadAtoms => write!(f, "head atoms"), + Context::BodyAtoms => write!(f, "body atoms"), + Context::PositiveAtom => write!(f, "positive atom"), + Context::NegativeAtom => write!(f, "negative atom"), + Context::InfixAtom => write!(f, "infix atom"), + Context::Tuple => write!(f, "tuple"), + Context::NamedTuple => write!(f, "named tuple"), + Context::Map => write!(f, "map"), + Context::Pair => write!(f, "pair"), + Context::Term => write!(f, "term"), + Context::TermPrivimitive => write!(f, "primitive term"), + Context::TermBinary => write!(f, "binary term"), + Context::TermAggregation => write!(f, "aggreation term"), + Context::TermTuple => write!(f, "tuple term"), + Context::TermMap => write!(f, "map term"), + Context::RdfLiteral => write!(f, "rdf literal"), + Context::Decimal => write!(f, "decimal"), + Context::Integer => write!(f, "integer"), + Context::ArithmeticProduct => write!(f, "arithmetic product"), + Context::ArithmeticFactor => write!(f, "arithmetic factor"), + Context::Blank => write!(f, "blank"), + Context::UniversalVariable => write!(f, "universal variable"), + Context::ExistentialVariable => write!(f, "existential variable"), + } } } +pub(crate) type ErrorTree = + GenericErrorTree>; + use super::parser::{ ast::{AstNode, Position}, types::{Input, ToRange}, @@ -45,7 +137,7 @@ use super::parser::{ pub struct Error { pub pos: Position, pub msg: String, - pub context: Vec<&'static str>, + pub context: Vec, } #[derive(Debug, Clone, Copy, PartialEq)] @@ -317,16 +409,16 @@ impl<'a> AstNode for Token<'a> { macro_rules! syntax { ($func_name: ident, $tag_str: literal, $token: expr) => { - pub(crate) fn $func_name< - 'a, - 's, - E: ParseError> + ContextError>, - >( + pub(crate) fn $func_name<'a, 's, E>( input: Input<'a, 's>, - ) -> IResult, Token<'a>, E> { - map(context($tag_str, tag($tag_str)), |span: Input| { - Token::new($token, span.input) - })(input) + ) -> IResult, Token<'a>, E> + where + E: ParseError> + ContextError, Context>, + { + map( + context(Context::Tag($tag_str), tag($tag_str)), + |span: Input| Token::new($token, span.input), + )(input) } }; } @@ -351,21 +443,21 @@ syntax!(at, "@", TokenKind::At); syntax!(exp_lower, "e", TokenKind::Exponent); syntax!(exp_upper, "E", TokenKind::Exponent); -pub(crate) fn exp<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { - context("lex exponent", alt((exp_lower, exp_upper)))(input) +pub(crate) fn exp<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context(Context::Exponent, alt((exp_lower, exp_upper)))(input) } -pub(crate) fn lex_punctuations< - 'a, - 's, - E: ParseError> + ContextError>, ->( +pub(crate) fn lex_punctuations<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "lex punctuations", + Context::Punctuations, alt(( arrow, open_paren, @@ -399,11 +491,12 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_operators<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "lex operators", + Context::Operators, alt(( less_equal, greater_equal, @@ -425,11 +518,12 @@ pub(crate) fn lex_operators<'a, 's, E: ParseError> + ContextError< // alt((plus, minus))(input) // } -pub(crate) fn lex_ident<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ let (rest_input, ident) = context( - "lex identifier", + Context::Identifier, recognize(pair( alpha1, many0(alt((alphanumeric1, tag("_"), tag("-")))), @@ -446,97 +540,107 @@ pub(crate) fn lex_ident<'a, 's, E: ParseError> + ContextError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "lex iri", + Context::Iri, recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) } -pub(crate) fn lex_number<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { - context("lex number", digit1)(input) +pub(crate) fn lex_number<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context(Context::Number, digit1)(input) .map(|(rest_input, result)| (rest_input, Token::new(TokenKind::Number, result.input))) } -pub(crate) fn lex_string<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_string<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "lex string", + Context::String, recognize(delimited(tag("\""), is_not("\""), cut(tag("\"")))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) } -pub(crate) fn lex_comment<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_comment<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "comment", + Context::Comment, recognize(tuple((tag("%"), many0(is_not("\n")), line_ending))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) } -pub(crate) fn lex_doc_comment< - 'a, - 's, - E: ParseError> + ContextError>, ->( +pub(crate) fn lex_doc_comment<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "documentation comment", + Context::DocComment, recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending)))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) } -pub(crate) fn lex_toplevel_doc_comment< - 'a, - 's, - E: ParseError> + ContextError>, ->( +pub(crate) fn lex_toplevel_doc_comment<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "top level documentation comment", + Context::TlDocComment, recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending)))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) } -pub(crate) fn lex_comments<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_comments<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "comments", + Context::Comments, alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment)), )(input) } -pub(crate) fn lex_whitespace<'a, 's, E: ParseError> + ContextError>>( +pub(crate) fn lex_whitespace<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { - context("whitespace", multispace1)(input) +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context(Context::Whitespace, multispace1)(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) } -pub(crate) fn lex_illegal<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { - context("illegal character", take(1usize))(input) +pub(crate) fn lex_illegal<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context(Context::Illegal, take(1usize))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) } -pub(crate) fn lex_tokens<'a, 's, E: ParseError> + ContextError>>( +pub(crate) fn lex_tokens<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Vec>, E> { +) -> IResult, Vec>, E> +where + E: ParseError> + ContextError, Context>, +{ all_consuming(many0(alt(( lex_iri, lex_operators, @@ -554,9 +658,10 @@ pub(crate) fn lex_tokens<'a, 's, E: ParseError> + ContextError> + ContextError>>( - input: Input<'a, 's>, -) -> (Input<'a, 's>, Token<'a>) { +pub(crate) fn skip_to_dot<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Token<'a>) +where + E: ParseError> + ContextError, Context>, +{ let (rest_input, error_input) = recognize(pair( take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), opt(tag(".")), @@ -573,6 +678,8 @@ pub(crate) fn skip_to_dot<'a, 's, E: ParseError> + ContextError>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(Eof, 0, 1, "")] ) } @@ -611,7 +718,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } @@ -627,7 +734,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Prefix, 1, 1, "prefix"), @@ -647,7 +754,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Output, 1, 1, "output"), @@ -667,7 +774,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Import, 1, 1, "import"), @@ -687,7 +794,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Export, 1, 1, "export"), @@ -707,7 +814,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Ident, 1, 1, "baseA"), @@ -744,7 +851,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "P"), T!(OpenParen, 1, 1, "("), @@ -782,7 +889,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " "), T!(Comment, 4, 1, "% Some Comment\n"), @@ -804,7 +911,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "some_Ident"), T!(OpenParen, 10, 1, "("), @@ -829,7 +936,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Underscore, 0, 1, "_"), T!(Ident, 1, 1, "someIdent"), @@ -855,7 +962,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Eof, 31, 1, ""), @@ -874,7 +981,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Whitespace, 37, 1, "\n"), @@ -896,7 +1003,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "A"), T!(OpenParen, 1, 1, "("), @@ -936,7 +1043,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Comment, 0, 1, "%d4 this should be a comment,\n"), T!( @@ -962,7 +1069,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "somePred"), T!(OpenParen, 8, 1, "("), @@ -988,7 +1095,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " \t \n\n\t \n"), T!(Eof, 12, 4, ""), @@ -1005,6 +1112,6 @@ mod tests { input, parser_state: errors, }; - dbg!(super::skip_to_dot::>(input)); + dbg!(super::skip_to_dot::>(input)); } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index a8d9f82e8..c6406aa2d 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2443,12 +2443,12 @@ pub mod new { exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, slash, - star, tilde, underscore, unequal, Error, NewParseError, ParserState, Span, Token, + star, tilde, underscore, unequal, Context, Error, ErrorTree, ParserState, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, cut, map, opt, recognize}; - use nom::error::{context, ContextError, ErrorKind, ParseError}; + use nom::error::{ErrorKind, ParseError}; use nom::sequence::{delimited, pair}; use nom::Parser; use nom::{ @@ -2458,8 +2458,8 @@ pub mod new { sequence::tuple, IResult, }; - use nom_greedyerror::GreedyError; - use nom_supreme::error::{ErrorTree, StackContext}; + use nom_supreme::{context::ContextError, error::StackContext}; + use sanitise_file_name::Stringy; fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { unsafe { @@ -2473,67 +2473,6 @@ pub mod new { } } - // fn ignore_ws_and_comments<'a, F, O, E: ParseError> + ContextError>>( - // inner: F, - // ) -> impl FnMut(Span<'a>) -> IResult, O, E> - // where - // F: Parser, O, E> + FnMut(Span<'a>) -> IResult, O, E>, - // { - // delimited( - // many0(alt((lex_whitespace, lex_comment))), - // inner, - // many0(alt((lex_whitespace, lex_comment))), - // ) - // } - - // fn expect<'a, F, E, T>( - // parser: F, - // error_msg: E, - // state: Errors, - // ) -> impl Fn(Span<'a>) -> IResult, T> - // where - // F: Fn(Span<'a>) -> IResult, T>, - // E: ToString, - // { - // move |input| match parser(input) { - // Ok((rest_input, output)) => Ok((rest_input, output)), - // Err(nom::Err::Error(nom::error::Error { input, .. })) - // | Err(nom::Err::Failure(nom::error::Error { input, .. })) => { - // let err = crate::io::lexer::Error(to_range(input), error_msg.to_string()); - // state.report_error(err); - // Ok(( - // input, - // Token { - // kind: TokenKind::Error, - // span: outer_span(input, rest_input), - // }, - // )) - // } - // Err(err) => Err(err), - // } - // } - - // fn context<'a, 's, O, E>( - // mut f: impl FnMut(Input<'a, 's>) -> IResult, O, E>, - // context: ParserContext, - // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - // move |input| { - // let mut labels = *input.parser_state.labels.borrow_mut(); - // if let None = labels { - // labels = Some(Context { - // context: context.clone(), - // label: None, - // inner: vec![], - // }); - // labels - // } else { - // dbg!(&labels); - // labels - // }; - // f(input) - // } - // } - fn expect<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( mut parser: F, error_msg: impl ToString, @@ -2562,7 +2501,7 @@ pub mod new { fn recover<'a, 's, E>( mut parser: impl Parser, Statement<'a>, E>, error_msg: impl ToString, - context: &'static str, + context: Context, errors: ParserState<'s>, ) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { move |input: Input<'a, 's>| match parser.parse(input) { @@ -2578,7 +2517,7 @@ pub mod new { msg: error_msg.to_string(), context: vec![context], }; - errors.report_error(err); + // errors.report_error(err); let (rest_input, token) = skip_to_dot::>>(input); Ok((rest_input, Statement::Error(token))) } @@ -2586,91 +2525,8 @@ pub mod new { } } - // fn report_label<'a, 's, O, E>( - // mut parser: impl nom::Parser, O, E>, - // label: ParserLabel, - // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - // move |input| match parser.parse(input) { - // Ok(result) => Ok(result), - // Err(err) => { - // match err { - // nom::Err::Incomplete(_) => (), - // nom::Err::Error(_) | nom::Err::Failure(_) => { - // if !input.input.is_empty() { - // input.parser_state.labels.borrow_mut().push(Label { - // label, - // pos: Position { - // offset: input.input.location_offset(), - // line: input.input.location_line(), - // column: input.input.get_utf8_column() as u32, - // }, - // }) - // }; - // } - // }; - // Err(err) - // } - // } - // } - - // fn report_error<'a, 's, O, E>( - // mut parser: impl nom::Parser, O, E>, - // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - // move |input| match parser.parse(input) { - // Ok(result) => { - // input.parser_state.labels.borrow_mut().inner.clear(); - // Ok(result) - // } - // Err(err) => { - // match err { - // nom::Err::Incomplete(_) => (), - // nom::Err::Error(_) | nom::Err::Failure(_) => { - // // println!("LABELS BEFORE REPORT!!!!: {:#?}", input.parser_state.labels); - // let mut furthest_errors: Vec = Vec::new(); - // let labels = - // as Clone>::clone(&input.parser_state.labels.borrow()) - // .into_iter(); - // for label in labels { - // if let Some(last) = furthest_errors.last() { - // if label.pos.offset >= (*last).0.offset { - // let err = - // Error(label.pos, format!("expected {:?}", label.label)); - // furthest_errors.push(err); - // } - // } else { - // let err = Error(label.pos, format!("expected {:?}", label.label)); - // furthest_errors.push(err); - // }; - // } - // for err in furthest_errors { - // input.parser_state.report_error(err) - // } - // // for label in furthest_errors { - // // println!( - // // "Syntax error: Parser got stuck at line {} column {}, expected {:?}", - // // label.position.line, label.position.column, label.label - // // ); - // // println!( - // // "\n{}", - // // input - // // .parser_state - // // .source - // // .fragment() - // // .lines() - // // .collect::>() - // // .get((label.position.line - 1) as usize) - // // .unwrap() - // // ); - // // println!("{1:>0$}", label.position.column, "^"); - // // } - // } - // }; - // Err(err) - // } - // } - // } fn report_error<'a, 's, O>( - mut parser: impl nom::Parser, O, ErrorTree>>, + mut parser: impl Parser, O, ErrorTree>>, ) -> impl FnMut(Input<'a, 's>) -> IResult, O, ErrorTree>> { move |input| match parser.parse(input) { Ok(result) => Ok(result), @@ -2681,8 +2537,9 @@ pub mod new { match &e { nom::Err::Incomplete(_) => (), nom::Err::Error(err) | nom::Err::Failure(err) => { - let (deepest_pos, errors) = get_deepest_errors(err); + let (_deepest_pos, errors) = get_deepest_errors(err); for error in errors { + dbg!(&error); input.parser_state.report_error(error); } // let error = Error(deepest_pos, format!("")); @@ -2695,8 +2552,10 @@ pub mod new { } fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { + dbg!(&e); match e { ErrorTree::Base { location, kind } => { + dbg!(&kind); let span = location.input; let err_pos = Position { offset: span.location_offset(), @@ -2707,7 +2566,7 @@ pub mod new { err_pos, vec![Error { pos: err_pos, - msg: format!("{}", e), + msg: "".to_string(), context: Vec::new(), }], ) @@ -2716,17 +2575,30 @@ pub mod new { // let mut err_pos = Position::default(); match &**base { ErrorTree::Base { location, kind } => { + dbg!(&kind); let span = location.input; let err_pos = Position { offset: span.location_offset(), line: span.location_line(), column: span.get_utf8_column() as u32, }; + let mut msg = String::from(""); + for (_, context) in contexts { + match context { + StackContext::Kind(_) => todo!(), + StackContext::Context(c) => match c { + Context::Tag(t) => { + msg.push_str(t); + } + _ => (), + }, + } + } ( err_pos, vec![Error { pos: err_pos, - msg: format!("{}", base), + msg, context: context_strs(contexts), }], ) @@ -2769,21 +2641,39 @@ pub mod new { } } - fn context_strs( - contexts: &Vec<(Input<'_, '_>, StackContext<&'static str>)>, - ) -> Vec<&'static str> { + fn context_strs(contexts: &Vec<(Input<'_, '_>, StackContext)>) -> Vec { contexts .iter() .map(|(_, c)| match c { StackContext::Kind(k) => todo!(), - StackContext::Context(str) => *str, + StackContext::Context(c) => *c, }) .collect() } - fn wsoc0<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, - ) -> IResult, Option>, E> { + pub(crate) fn context<'a, 's, P, E, F, O>( + context: P, + mut f: F, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> + where + P: Clone, + F: Parser, O, E>, + E: ContextError, P>, + { + move |i| match f.parse(i.clone()) { + Ok(o) => Ok(o), + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Err(nom::Err::Error(e)) => Err(nom::Err::Error(E::add_context(i, context.clone(), e))), + Err(nom::Err::Failure(e)) => { + Err(nom::Err::Failure(E::add_context(i, context.clone(), e))) + } + } + } + + fn wsoc0<'a, 's, E>(input: Input<'a, 's>) -> IResult, Option>, E> + where + E: ParseError> + ContextError, Context>, + { many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { if vec.is_empty() { (rest_input, None) @@ -2799,7 +2689,7 @@ pub mod new { }) } - fn wsoc1<'a, 's, E: ParseError> + ContextError>>( + fn wsoc1<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Wsoc<'a>, E> { many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { @@ -2814,11 +2704,15 @@ pub mod new { } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a, 's, E: ParseError> + ContextError>>( + fn parse_program< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> (Program<'a>, Vec) { let result = context( - "program", + Context::Program, pair( opt(lex_toplevel_doc_comment::>>), many0(recover( @@ -2831,7 +2725,7 @@ pub mod new { parse_comment, ))), "failed to parse statement", - "program", + Context::Program, input.parser_state, )), ), @@ -2868,26 +2762,34 @@ pub mod new { } /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace<'a, 's, E: ParseError> + ContextError>>( + fn parse_whitespace< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) } /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment<'a, 's, E: ParseError> + ContextError>>( + fn parse_comment< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) } /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a, 's, E: ParseError> + ContextError>>( + fn parse_fact<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { // dbg!(&input.parser_state.labels); context( - "fact", + Context::Fact, tuple((opt(lex_doc_comment), parse_normal_atom, wsoc0, dot)), )(input) .map(|(rest_input, (doc_comment, atom, ws, dot))| { @@ -2905,11 +2807,11 @@ pub mod new { } /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a, 's, E: ParseError> + ContextError>>( + fn parse_rule<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { context( - "rule", + Context::Rule, tuple(( opt(lex_doc_comment), parse_head, @@ -2942,25 +2844,29 @@ pub mod new { } /// Parse the head atoms of a rule. - fn parse_head<'a, 's, E: ParseError> + ContextError>>( + fn parse_head<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, List<'a, Atom<'a>>, E> { - context("rule head", parse_list(parse_head_atoms))(input) + context(Context::RuleHead, parse_list(parse_head_atoms))(input) } /// Parse the body atoms of a rule. - fn parse_body<'a, 's, E: ParseError> + ContextError>>( + fn parse_body<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, List<'a, Atom<'a>>, E> { - context("rule body", parse_list(parse_body_atoms))(input) + context(Context::RuleBody, parse_list(parse_body_atoms))(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive<'a, 's, E: ParseError> + ContextError>>( + fn parse_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { context( - "directive", + Context::Directive, alt(( parse_base_directive, parse_prefix_directive, @@ -2973,11 +2879,15 @@ pub mod new { } /// Parse the base directive. - fn parse_base_directive<'a, 's, E: ParseError> + ContextError>>( + fn parse_base_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "base directive", + Context::DirectiveBase, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3013,12 +2923,12 @@ pub mod new { fn parse_prefix_directive< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "prefix directive", + Context::DirectivePrefix, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3063,12 +2973,12 @@ pub mod new { fn parse_import_directive< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "import directive", + Context::DirectiveImport, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3114,12 +3024,12 @@ pub mod new { fn parse_export_directive< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "export directive", + Context::DirectiveExport, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3165,12 +3075,12 @@ pub mod new { fn parse_output_directive< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "output directive", + Context::DirectiveOutput, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3205,7 +3115,7 @@ pub mod new { } // /// Parse a list of `ident1, ident2, …` - // fn parse_identifier_list<'a, 's, E: ParseError> + ContextError>>( + // fn parse_identifier_list<'a, 's, E: ParseError> + ContextError, Context>>( // input: Input<'a, 's>, // ) -> IResult, List<'a, Token<'a>>, E> { // pair( @@ -3229,12 +3139,17 @@ pub mod new { // }) // } - fn parse_list<'a, 's, T, E: ParseError> + ContextError>>( + fn parse_list< + 'a, + 's, + T, + E: ParseError> + ContextError, Context>, + >( parse_t: fn(Input<'a, 's>) -> IResult, T, E>, ) -> impl Fn(Input<'a, 's>) -> IResult, List<'a, T>, E> { move |input: Input<'a, 's>| { context( - "list", + Context::List, pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t)))), )(input) .map(|(rest_input, (first, rest))| { @@ -3251,21 +3166,29 @@ pub mod new { } /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms<'a, 's, E: ParseError> + ContextError>>( + fn parse_head_atoms< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { context( - "rule head atoms", + Context::HeadAtoms, alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), )(input) } /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms<'a, 's, E: ParseError> + ContextError>>( + fn parse_body_atoms< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { context( - "rule body atoms", + Context::BodyAtoms, alt(( parse_normal_atom, parse_negative_atom, @@ -3276,18 +3199,26 @@ pub mod new { } /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom<'a, 's, E: ParseError> + ContextError>>( + fn parse_normal_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { - context("positive atom", parse_named_tuple)(input) + context(Context::PositiveAtom, parse_named_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom<'a, 's, E: ParseError> + ContextError>>( + fn parse_negative_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { - context("negative atom", pair(tilde, parse_named_tuple))(input).map( + context(Context::NegativeAtom, pair(tilde, parse_named_tuple))(input).map( |(rest_input, (tilde, named_tuple))| { ( rest_input, @@ -3303,11 +3234,15 @@ pub mod new { /// Parse an "infix atom" of the form `term1 term2`. /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom<'a, 's, E: ParseError> + ContextError>>( + fn parse_infix_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { context( - "infix atom", + Context::InfixAtom, tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), )(input) .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { @@ -3327,11 +3262,11 @@ pub mod new { /// Parse a tuple with an optional name, like `ident(term1, term2)` /// or just `(int, int, skip)`. - fn parse_tuple<'a, 's, E: ParseError> + ContextError>>( + fn parse_tuple<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Tuple<'a>, E> { context( - "tuple", + Context::Tuple, tuple(( opt(lex_ident), wsoc0, @@ -3363,11 +3298,15 @@ pub mod new { /// Parse a named tuple. This function is like `parse_tuple` with the difference, /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple<'a, 's, E: ParseError> + ContextError>>( + fn parse_named_tuple< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Tuple<'a>, E> { context( - "named tuple", + Context::NamedTuple, tuple(( lex_ident, wsoc0, @@ -3399,11 +3338,11 @@ pub mod new { /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. /// Inside the curly braces ist a list of pairs. - fn parse_map<'a, 's, E: ParseError> + ContextError>>( + fn parse_map<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Map<'a>, E> { context( - "map", + Context::Map, tuple(( opt(lex_ident), wsoc0, @@ -3434,14 +3373,18 @@ pub mod new { } /// Parse a map in an atom position. - fn parse_map_atom<'a, 's, E: ParseError> + ContextError>>( + fn parse_map_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) } // /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - // fn parse_pair_list<'a, 's, E: ParseError> + ContextError>>( + // fn parse_pair_list<'a, 's, E: ParseError> + ContextError, Context>>( // input: Input<'a, 's>, // state: Errors, // ) -> IResult, Option, Term<'a>>>>, E> { @@ -3474,28 +3417,30 @@ pub mod new { // } /// Parse a pair of the form `key = value`. - fn parse_pair<'a, 's, E: ParseError> + ContextError>>( + fn parse_pair<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Pair<'a, Term<'a>, Term<'a>>, E> { - context("pair", tuple((parse_term, wsoc0, equal, wsoc0, parse_term)))(input).map( - |(rest_input, (key, ws1, equal, ws2, value))| { - ( - rest_input, - Pair { - span: outer_span(input.input, rest_input.input), - key, - ws1, - equal, - ws2, - value, - }, - ) - }, - ) + context( + Context::Pair, + tuple((parse_term, wsoc0, equal, wsoc0, parse_term)), + )(input) + .map(|(rest_input, (key, ws1, equal, ws2, value))| { + ( + rest_input, + Pair { + span: outer_span(input.input, rest_input.input), + key, + ws1, + equal, + ws2, + value, + }, + ) + }) } // /// Parse a list of terms of the form `term1, term2, …`. - // fn parse_term_list<'a, 's, E: ParseError> + ContextError>>( + // fn parse_term_list<'a, 's, E: ParseError> + ContextError, Context>>( // input: Input<'a, 's>, // state: Errors, // ) -> IResult, List<'a, Term<'a>>, E> { @@ -3526,11 +3471,11 @@ pub mod new { /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_term<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "term", + Context::Term, alt(( parse_binary_term, parse_tuple_term, @@ -3546,11 +3491,15 @@ pub mod new { } /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_primitive_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "primitive term", + Context::TermPrivimitive, alt(( parse_rdf_literal, parse_ident, @@ -3563,11 +3512,15 @@ pub mod new { } /// Parse a rdf literal e.g. "2023-06-19"^^ - fn parse_rdf_literal<'a, 's, E: ParseError> + ContextError>>( + fn parse_rdf_literal< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { context( - "RDF Literal", + Context::RdfLiteral, tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), )(input) .map(|(rest_input, (string, carets, iri))| { @@ -3586,29 +3539,33 @@ pub mod new { }) } - fn parse_ident<'a, 's, E: ParseError> + ContextError>>( + fn parse_ident<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { lex_ident(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) } - fn parse_iri<'a, 's, E: ParseError> + ContextError>>( + fn parse_iri<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) } - fn parse_number<'a, 's, E: ParseError> + ContextError>>( + fn parse_number<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { - context("number", alt((parse_decimal, parse_integer)))(input) + context(Context::Number, alt((parse_decimal, parse_integer)))(input) } - fn parse_decimal<'a, 's, E: ParseError> + ContextError>>( + fn parse_decimal< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { context( - "decimal", + Context::Decimal, tuple(( opt(alt((plus, minus))), opt(lex_number), @@ -3632,10 +3589,14 @@ pub mod new { }) } - fn parse_integer<'a, 's, E: ParseError> + ContextError>>( + fn parse_integer< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { - context("integer", pair(opt(alt((plus, minus))), lex_number))(input).map( + context(Context::Integer, pair(opt(alt((plus, minus))), lex_number))(input).map( |(rest_input, (sign, number))| { ( rest_input, @@ -3652,24 +3613,28 @@ pub mod new { ) } - fn parse_exponent<'a, 's, E: ParseError> + ContextError>>( + fn parse_exponent< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Exponent<'a>, E> { context( - "exponent", + Context::Exponent, tuple((exp, opt(alt((plus, minus))), lex_number)), )(input) .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) } - fn parse_string<'a, 's, E: ParseError> + ContextError>>( + fn parse_string<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) } // /// Parse an unary term. - // fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { + // fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError, Context>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { // pair(lex_unary_prefix_operators, parse_term)(input).map( // |(rest_input, (operation, term))| { // ( @@ -3685,11 +3650,15 @@ pub mod new { // } /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_binary_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "binary term", + Context::TermBinary, pair( parse_arithmetic_product, opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), @@ -3719,12 +3688,12 @@ pub mod new { fn parse_arithmetic_product< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "arithmetic product", + Context::ArithmeticProduct, pair( parse_arithmetic_factor, opt(tuple(( @@ -3757,12 +3726,12 @@ pub mod new { fn parse_arithmetic_factor< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "arithmetic factor", + Context::ArithmeticFactor, alt(( parse_tuple_term, parse_aggregation_term, @@ -3798,12 +3767,12 @@ pub mod new { fn parse_aggregation_term< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "aggregation term", + Context::TermAggregation, tuple(( recognize(pair(hash, lex_ident)), open_paren, @@ -3835,36 +3804,48 @@ pub mod new { } /// Parse a `_` - fn parse_blank<'a, 's, E: ParseError> + ContextError>>( + fn parse_blank<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { - context("blank", underscore)(input) + context(Context::Blank, underscore)(input) .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) } /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. - fn parse_tuple_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_tuple_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { - context("tuple term", parse_tuple)(input) + context(Context::TermTuple, parse_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } /// Parse a map as a term. - fn parse_map_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_map_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { - context("map term", parse_map)(input) + context(Context::TermMap, parse_map)(input) .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } /// Parse a variable. - fn parse_variable<'a, 's, E: ParseError> + ContextError>>( + fn parse_variable< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "universal variable", + Context::UniversalVariable, recognize(pair(question_mark, lex_ident)), )(input) .map(|(rest_input, var)| { @@ -3879,11 +3860,15 @@ pub mod new { } /// Parse an existential variable. - fn parse_existential<'a, 's, E: ParseError> + ContextError>>( + fn parse_existential< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "existential variable", + Context::ExistentialVariable, recognize(pair(exclamation_mark, lex_ident)), )(input) .map(|(rest_input, existential)| { @@ -3899,11 +3884,15 @@ pub mod new { // Order of parser compinator is important, because of ordered choice and no backtracking /// Parse the operator for an infix atom. - fn parse_operation_token<'a, 's, E: ParseError> + ContextError>>( + fn parse_operation_token< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Token<'a>, E> { context( - "operators", + Context::Operators, alt((less_equal, greater_equal, equal, unequal, less, greater)), )(input) } @@ -3916,7 +3905,6 @@ pub mod new { }; use nom::error::{convert_error, VerboseError}; - use nom_supreme::error::ErrorTree; use super::*; use crate::io::{ @@ -3970,7 +3958,7 @@ pub mod new { }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program::>(input).0, + parse_program::>(input).0, Program { span: input.input, tl_doc_comment: None, @@ -4037,7 +4025,7 @@ pub mod new { }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program::>(input).0, + parse_program::>(input).0, Program { tl_doc_comment: None, span: input.input, @@ -4317,7 +4305,7 @@ pub mod new { }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program::>(input).0, + parse_program::>(input).0, Program { span: input.input, tl_doc_comment: None, @@ -4439,7 +4427,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state: errors, }; // let ast = parse_program::>(input); - let (ast, _) = parse_program::>(input); + let (ast, _) = parse_program::>(input); println!("{}", ast); assert_eq!( { @@ -4506,7 +4494,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Primitive(Primitive::Number { @@ -4529,7 +4517,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4566,7 +4554,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4603,7 +4591,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4640,7 +4628,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4677,7 +4665,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4728,7 +4716,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4779,7 +4767,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); // match result { // Ok(ast) => { @@ -4947,7 +4935,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -5097,9 +5085,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // parse_exponent::>(input) - parse_exponent::>(input) - .unwrap() - .1 + parse_exponent::>(input).unwrap().1 }, Exponent { e: T! {TokenKind::Exponent, 0,1,"e"}, @@ -5118,7 +5104,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters input, parser_state, }; - let result = parse_program::>(input); + let result = parse_program::>(input); println!("{}\n\n{:#?}", result.0, result.1); // assert!(false); } @@ -5132,8 +5118,8 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters input, parser_state, }; - dbg!(wsoc0::>(input)); - dbg!(wsoc1::>(input)); + dbg!(wsoc0::>(input)); + dbg!(wsoc1::>(input)); } #[test] @@ -5148,6 +5134,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }; let result = parse_program::>>(input); dbg!(&result); + println!("{}", result.0); } } } diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 7031f1d26..2b0832655 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -822,6 +822,12 @@ impl std::fmt::Display for Input<'_, '_> { } } +impl nom_supreme::context::ContextError for Input<'_, '_> { + fn add_context(location: I, ctx: C, other: Self) -> Self { + todo!() + } +} + pub(crate) trait ToRange { fn to_range(&self) -> Range; } From 9532b04705637dc50238900b42e1c6c9b5c75eea Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 31 May 2024 08:55:21 +0200 Subject: [PATCH 046/214] Make error reporting message a bit nicer --- nemo-language-server/src/language_server.rs | 36 +++++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index e39252745..428633b54 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -79,24 +79,46 @@ impl Backend { let (_program, errors) = parse_program_str(text); - let diagnostics = errors + use std::collections::{BTreeMap, HashSet}; + let mut error_map: BTreeMap> = BTreeMap::new(); + for error in &errors { + if let Some(set) = error_map.get_mut(&error.pos) { + set.insert(error.msg.clone()); + } else { + let mut set = HashSet::new(); + set.insert(error.msg.clone()); + error_map.insert(error.pos, set); + }; + } + + let diagnostics = error_map .into_iter() - .map(|error| Diagnostic { - message: error.msg, + .map(|(pos, error_set)| Diagnostic { + message: /*error.msg*/ { + format!("expected{}", { + let mut string = String::new(); + for s in error_set { + string.push_str(" '"); + string.push_str(s.as_str()); + string.push_str("',"); + } + string + }) + }, range: Range::new( line_col_to_position( &line_index, LineCol { - line: error.pos.line - 1, - col: error.pos.column - 1, + line: pos.line - 1, + col: pos.column - 1, }, ) .unwrap(), line_col_to_position( &line_index, LineCol { - line: error.pos.line - 1, - col: error.pos.column - 1 + 1, + line: pos.line - 1, + col: pos.column - 1 + 1, }, ) .unwrap(), From 7fc6f0de5642453f18ed32ad9e5c15df2615a3d4 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 5 Jun 2024 14:37:35 +0200 Subject: [PATCH 047/214] Remove debug printing and add prefixed name parsing --- nemo/src/io/lexer.rs | 26 +++++++++++++++++- nemo/src/io/parser.rs | 42 ++++++++++++++++++++--------- nemo/src/io/parser/ast/statement.rs | 2 +- nemo/src/io/parser/ast/term.rs | 34 ++++++++++++++++++----- 4 files changed, 84 insertions(+), 20 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index f976edb49..e50a4d652 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -61,6 +61,7 @@ pub(crate) enum Context { TermTuple, TermMap, RdfLiteral, + PrefixedConstant, Decimal, Integer, ArithmeticProduct, @@ -114,6 +115,7 @@ impl std::fmt::Display for Context { Context::TermTuple => write!(f, "tuple term"), Context::TermMap => write!(f, "map term"), Context::RdfLiteral => write!(f, "rdf literal"), + Context::PrefixedConstant => write!(f, "prefixed constant"), Context::Decimal => write!(f, "decimal"), Context::Integer => write!(f, "integer"), Context::ArithmeticProduct => write!(f, "arithmetic product"), @@ -229,6 +231,8 @@ pub(crate) enum TokenKind { // Multi-char tokens: /// Identifier for keywords and names Ident, + /// Identifier with a prefix, like `xsd:decimal` + PrefixedIdent, /// Variable like `?var` Variable, /// Existential Variable like `!var` @@ -300,6 +304,7 @@ impl std::fmt::Display for TokenKind { TokenKind::Slash => write!(f, "Slash"), TokenKind::Exponent => write!(f, "Exponent"), TokenKind::Ident => write!(f, "Ident"), + TokenKind::PrefixedIdent => write!(f, "Prefixed Ident"), TokenKind::Variable => write!(f, "Variable"), TokenKind::Existential => write!(f, "Existential"), TokenKind::Aggregate => write!(f, "Aggregate"), @@ -540,6 +545,25 @@ where Ok((rest_input, token)) } +pub(crate) fn lex_prefixed_ident<'a, 's, E>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + recognize(tuple((opt(lex_ident), colon, lex_ident)))(input).map( + |(rest_input, prefixed_ident)| { + ( + rest_input, + Token { + kind: TokenKind::PrefixedIdent, + span: prefixed_ident.input, + }, + ) + }, + ) +} + pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> where E: ParseError> + ContextError, Context>, @@ -602,7 +626,7 @@ where { context( Context::TlDocComment, - recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending)))), + recognize(many1(tuple((tag("%%%"), many0(is_not("\n")), line_ending)))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index c6406aa2d..9b60b1382 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2441,10 +2441,10 @@ pub mod new { use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, - lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, - lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, slash, - star, tilde, underscore, unequal, Context, Error, ErrorTree, ParserState, Span, Token, - TokenKind, + lex_ident, lex_iri, lex_number, lex_operators, lex_prefixed_ident, lex_string, + lex_toplevel_doc_comment, lex_whitespace, minus, open_brace, open_paren, plus, + question_mark, skip_to_dot, slash, star, tilde, underscore, unequal, Context, Error, + ErrorTree, ParserState, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, cut, map, opt, recognize}; @@ -2539,7 +2539,6 @@ pub mod new { nom::Err::Error(err) | nom::Err::Failure(err) => { let (_deepest_pos, errors) = get_deepest_errors(err); for error in errors { - dbg!(&error); input.parser_state.report_error(error); } // let error = Error(deepest_pos, format!("")); @@ -2552,10 +2551,8 @@ pub mod new { } fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { - dbg!(&e); match e { ErrorTree::Base { location, kind } => { - dbg!(&kind); let span = location.input; let err_pos = Position { offset: span.location_offset(), @@ -2575,7 +2572,6 @@ pub mod new { // let mut err_pos = Position::default(); match &**base { ErrorTree::Base { location, kind } => { - dbg!(&kind); let span = location.input; let err_pos = Position { offset: span.location_offset(), @@ -2606,11 +2602,9 @@ pub mod new { ErrorTree::Stack { base, contexts } => { let (pos, mut deepest_errors) = get_deepest_errors(base); let contexts = context_strs(contexts); - dbg!(&deepest_errors); for mut error in &mut deepest_errors { error.context.append(&mut contexts.clone()); } - dbg!(&deepest_errors); (pos, deepest_errors) } ErrorTree::Alt(error_tree) => { @@ -3308,7 +3302,7 @@ pub mod new { context( Context::NamedTuple, tuple(( - lex_ident, + alt((lex_prefixed_ident, lex_ident)), wsoc0, open_paren, wsoc0, @@ -3502,6 +3496,7 @@ pub mod new { Context::TermPrivimitive, alt(( parse_rdf_literal, + parse_prefixed_ident, parse_ident, parse_iri, parse_number, @@ -3539,6 +3534,29 @@ pub mod new { }) } + fn parse_prefixed_ident<'a, 's, E>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> + where + E: ParseError> + ContextError, Context>, + { + context( + Context::PrefixedConstant, + tuple((opt(lex_ident), colon, lex_ident)), + )(input) + .map(|(rest_input, (prefix, colon, constant))| { + ( + rest_input, + Primitive::PrefixedConstant { + span: outer_span(input.input, rest_input.input), + prefix, + colon, + constant, + }, + ) + }) + } + fn parse_ident<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { @@ -4467,7 +4485,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters error_map.insert(error.pos, set); }; } - dbg!(&error_map); + // dbg!(&error_map); println!("\n\n"); // assert!(false); let lines: Vec<_> = str.lines().collect(); diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index f641dbd8e..d9a201500 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -132,7 +132,7 @@ impl AstNode for Statement<'_> { Statement::Rule { .. } => name!("Rule"), Statement::Whitespace(_) => name!("Whitespace"), Statement::Comment(_) => name!("Comment"), - Statement::Error(_) => name!("ERROR"), + Statement::Error(_) => name!("\x1b[1;31mERROR\x1b[0m"), } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 81ed9d47f..7b33a95d8 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -165,12 +165,12 @@ impl AstNode for Term<'_> { format!("aggregation/{}", operation.span().fragment()), "file".to_string(), )), - Term::Tuple(tuple) => { - tuple.identifier.map(|identifier| ( - format!("function/{}", identifier.span().fragment()), - "file".to_string(), - )) - } + Term::Tuple(tuple) => tuple.identifier.map(|identifier| { + ( + format!("function/{}", identifier.span().fragment()), + "file".to_string(), + ) + }), _ => None, } } @@ -230,6 +230,12 @@ impl std::fmt::Display for Term<'_> { #[derive(Debug, Clone, PartialEq)] pub(crate) enum Primitive<'a> { Constant(Token<'a>), + PrefixedConstant { + span: Span<'a>, + prefix: Option>, + colon: Token<'a>, + constant: Token<'a>, + }, Number { span: Span<'a>, sign: Option>, @@ -252,6 +258,20 @@ impl AstNode for Primitive<'_> { fn children(&self) -> Option> { match self { Primitive::Constant(token) => Some(vec![token]), + Primitive::PrefixedConstant { + prefix, + colon, + constant, + .. + } => { + let mut vec: Vec<&dyn AstNode> = Vec::new(); + if let Some(prefix) = prefix { + vec.push(prefix); + } + vec.push(colon); + vec.push(constant); + Some(vec) + } Primitive::Number { sign, before, @@ -292,6 +312,7 @@ impl AstNode for Primitive<'_> { fn span(&self) -> Span { match self { Primitive::Constant(token) => token.span, + Primitive::PrefixedConstant { span, .. } => *span, Primitive::Number { span, .. } => *span, Primitive::String(token) => token.span, Primitive::Iri(token) => token.span, @@ -326,6 +347,7 @@ impl AstNode for Primitive<'_> { } match self { Primitive::Constant(_) => name!("Constant"), + Primitive::PrefixedConstant { .. } => name!("Prefixed Constant"), Primitive::Number { .. } => name!("Number"), Primitive::String(_) => name!("String"), Primitive::Iri(_) => name!("Iri"), From d393e2e7c736e333ccdce6f3db46292c7a38a8d1 Mon Sep 17 00:00:00 2001 From: logicallangs <> Date: Wed, 12 Jun 2024 12:58:35 +0000 Subject: [PATCH 048/214] update language server --- Cargo.lock | 517 +++++++++++++++--- Cargo.toml | 8 +- nemo-language-server/Cargo.toml | 3 +- nemo-language-server/src/language_server.rs | 389 +++++++------ .../src/language_server/nemo_position.rs | 28 +- nemo-language-server/src/lib.rs | 2 +- nemo/src/io/lexer.rs | 10 +- nemo/src/io/parser.rs | 4 +- nemo/src/io/parser/ast.rs | 61 ++- nemo/src/io/parser/ast/atom.rs | 30 +- nemo/src/io/parser/ast/directive.rs | 13 +- nemo/src/io/parser/ast/map.rs | 22 +- nemo/src/io/parser/ast/program.rs | 12 +- nemo/src/io/parser/ast/statement.rs | 13 +- nemo/src/io/parser/ast/term.rs | 84 ++- nemo/src/io/parser/ast/tuple.rs | 12 +- nemo/src/model/rule_model/syntax.rs | 1 - 17 files changed, 769 insertions(+), 440 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 847bd5322..36187cf68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + [[package]] name = "arbitrary" version = "1.3.2" @@ -155,9 +161,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.79" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", @@ -176,7 +182,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -200,9 +206,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.72" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -282,7 +288,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", - "regex-automata 0.4.6", + "regex-automata 0.4.7", "serde", ] @@ -312,9 +318,9 @@ checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" [[package]] name = "cc" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" [[package]] name = "cexpr" @@ -359,9 +365,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.4" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" dependencies = [ "clap_builder", "clap_derive", @@ -369,9 +375,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.2" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" dependencies = [ "anstream", "anstyle", @@ -381,9 +387,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.4" +version = "4.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -393,9 +399,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" [[package]] name = "colorchoice" @@ -413,6 +419,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -546,6 +562,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -727,6 +754,7 @@ checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", + "futures-executor", "futures-io", "futures-sink", "futures-task", @@ -749,6 +777,17 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.30" @@ -830,8 +869,8 @@ dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", ] [[package]] @@ -845,6 +884,19 @@ dependencies = [ "walkdir", ] +[[package]] +name = "gloo-utils" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037fcb07216cb3a30f7292bd0176b050b7b9a052ba830ef7d5d65f6dc64ba58e" +dependencies = [ + "js-sys", + "serde", + "serde_json", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "h2" version = "0.4.5" @@ -895,6 +947,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "howlong" version = "0.1.7" @@ -933,12 +991,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", - "futures-core", + "futures-util", "http", "http-body", "pin-project-lite", @@ -946,9 +1004,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "d0e7a4dd27b9476dc40cb050d3632d3bba3a70ddbff012285f7f8559a1e7e545" [[package]] name = "humantime" @@ -1012,14 +1070,134 @@ dependencies = [ "tracing", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f8ac670d7422d7f76b32e17a5db556510825b29ec9154f235977c9caba61036" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "4716a3a0933a1d01c2f72450e89596eb51dd34ef3c211ccd875acdf1f8fe47ed" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", + "smallvec", + "utf8_iter", ] [[package]] @@ -1032,7 +1210,7 @@ dependencies = [ "globset", "log", "memchr", - "regex-automata 0.4.6", + "regex-automata 0.4.7", "same-file", "walkdir", "winapi-util", @@ -1121,6 +1299,16 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "line-index" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67d61795376ae2683928c218fda7d7d7db136fd38c06b7552904667f0d55580a" +dependencies = [ + "nohash-hasher", + "text-size", +] + [[package]] name = "linked-hash-map" version = "0.5.6" @@ -1133,6 +1321,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -1301,7 +1495,7 @@ version = "0.5.2-dev" dependencies = [ "assert_cmd", "assert_fs", - "clap 4.5.4", + "clap 4.5.7", "colored", "dir-test", "env_logger 0.11.3", @@ -1312,6 +1506,19 @@ dependencies = [ "test-log", ] +[[package]] +name = "nemo-language-server" +version = "0.5.2-dev" +dependencies = [ + "anyhow", + "futures", + "line-index", + "nemo", + "tokio", + "tower-lsp", + "tower-service", +] + [[package]] name = "nemo-physical" version = "0.5.2-dev" @@ -1351,6 +1558,30 @@ dependencies = [ "pyo3", ] +[[package]] +name = "nemo-wasm" +version = "0.5.2-dev" +dependencies = [ + "console_error_panic_hook", + "futures", + "gloo-utils", + "js-sys", + "nemo", + "nemo-language-server", + "nemo-physical", + "thiserror", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test", + "web-sys", +] + +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "5.1.3" @@ -1494,11 +1725,21 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.9", + "libc", +] + [[package]] name = "object" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" dependencies = [ "memchr", ] @@ -1883,14 +2124,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", ] [[package]] @@ -1904,13 +2145,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -1921,9 +2162,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" @@ -2067,6 +2308,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -2190,6 +2437,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "streaming-iterator" version = "0.1.9" @@ -2236,6 +2489,17 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -2318,6 +2582,12 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "text-size" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" + [[package]] name = "textwrap" version = "0.11.0" @@ -2358,20 +2628,15 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.6.0" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "tinyvec_macros", + "displaydoc", + "zerovec", ] -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" version = "1.38.0" @@ -2382,11 +2647,24 @@ dependencies = [ "bytes", "libc", "mio", + "num_cpus", "pin-project-lite", "socket2", + "tokio-macros", "windows-sys 0.48.0", ] +[[package]] +name = "tokio-macros" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "tokio-native-tls" version = "0.3.1" @@ -2537,27 +2815,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-segmentation" version = "1.11.0" @@ -2578,9 +2841,9 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "url" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "f7c25da092f0a868cdf09e8674cd3b7ef3a7d92a24253e663a2fb85e2496de56" dependencies = [ "form_urlencoded", "idna", @@ -2588,11 +2851,23 @@ dependencies = [ "serde", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "valuable" @@ -2718,6 +2993,31 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "wasm-bindgen-test" +version = "0.3.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9bf62a58e0780af3e852044583deee40983e5886da43a271dd772379987667b" +dependencies = [ + "console_error_panic_hook", + "js-sys", + "scoped-tls", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test-macro", +] + +[[package]] +name = "wasm-bindgen-test-macro" +version = "0.3.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "web-sys" version = "0.3.69" @@ -2917,6 +3217,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -2932,6 +3244,30 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.34" @@ -2951,3 +3287,46 @@ dependencies = [ "quote", "syn 2.0.66", ] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] diff --git a/Cargo.toml b/Cargo.toml index cde9888d4..2b98272e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,16 @@ [workspace] resolver = "2" +default-members = [ + "nemo", + "nemo-cli", + "nemo-physical", + "nemo-python", +] members = [ "nemo", "nemo-cli", "nemo-physical", "nemo-python", -] -exclude = [ "nemo-language-server", "nemo-wasm", ] diff --git a/nemo-language-server/Cargo.toml b/nemo-language-server/Cargo.toml index 6e16e33f3..6f0c258dd 100644 --- a/nemo-language-server/Cargo.toml +++ b/nemo-language-server/Cargo.toml @@ -22,9 +22,10 @@ js = [] tokio = ["dep:tokio"] [dependencies] +anyhow = "1.0" line-index = "0.1.1" nemo = { path = "../nemo", default-features = false } futures = "0.3.21" -tokio = { version = "1.27.0", features = ["full"], optional = true } +tokio = { version = "1.27.0", features = ["macros", "io-util", "rt-multi-thread"], optional = true } tower-lsp = { version = "0.20.0", default-features = false } tower-service = "0.3.2" diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 428633b54..2a179937d 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -1,25 +1,26 @@ -use std::collections::HashMap; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::vec; +use anyhow::anyhow; use futures::lock::Mutex; use line_index::{LineCol, LineIndex, WideEncoding}; use nemo::io::parser::ast::program::Program; use nemo::io::parser::ast::{AstNode, Position}; use nemo::io::parser::new::parse_program_str; -use nemo_position::{ - lsp_position_to_nemo_position, nemo_position_to_lsp_position, PositionConversionError, -}; +use nemo_position::{lsp_position_to_nemo_position, PositionConversionError}; use tower_lsp::lsp_types::{ - CompletionOptions, Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, - DocumentChangeOperation, DocumentChanges, DocumentSymbol, DocumentSymbolOptions, - DocumentSymbolParams, DocumentSymbolResponse, InitializeParams, InitializeResult, - InitializedParams, Location, MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, - PrepareRenameResponse, Range, ReferenceParams, RenameOptions, RenameParams, ServerCapabilities, - TextDocumentEdit, TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, - TextEdit, Url, VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit, + Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, DocumentChangeOperation, + DocumentChanges, DocumentSymbol, DocumentSymbolOptions, DocumentSymbolParams, + DocumentSymbolResponse, InitializeParams, InitializeResult, InitializedParams, Location, + MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, PrepareRenameResponse, Range, + ReferenceParams, RenameOptions, RenameParams, ServerCapabilities, TextDocumentEdit, + TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, TextEdit, Url, + VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit, }; use tower_lsp::{Client, LanguageServer}; +use self::nemo_position::nemo_range_to_lsp_range; + mod nemo_position; #[derive(Debug)] @@ -42,13 +43,13 @@ struct TextDocumentInfo { } /// Converts a source position to a LSP position -pub(crate) fn line_col_to_position( +pub(crate) fn line_col_to_lsp_position( line_index: &LineIndex, line_col: LineCol, -) -> Result { +) -> Result { let wide_line_col = line_index .to_wide(WideEncoding::Utf16, line_col) - .ok_or(())?; + .ok_or(PositionConversionError::LspLineCol(line_col))?; Ok(tower_lsp::lsp_types::Position { line: wide_line_col.line, @@ -56,6 +57,14 @@ pub(crate) fn line_col_to_position( }) } +fn jsonrpc_error(error: anyhow::Error) -> tower_lsp::jsonrpc::Error { + tower_lsp::jsonrpc::Error { + code: tower_lsp::jsonrpc::ErrorCode::ServerError(1), + message: error.to_string().into(), + data: None, + } +} + impl Backend { pub fn new(client: Client) -> Self { Self { @@ -66,7 +75,11 @@ impl Backend { } } - async fn handle_change(&self, text_document: VersionedTextDocumentIdentifier, text: &str) { + async fn handle_change( + &self, + text_document: VersionedTextDocumentIdentifier, + text: &str, + ) -> anyhow::Result<()> { self.state.lock().await.text_document_store.insert( text_document.uri.clone(), TextDocumentInfo { @@ -79,52 +92,50 @@ impl Backend { let (_program, errors) = parse_program_str(text); - use std::collections::{BTreeMap, HashSet}; - let mut error_map: BTreeMap> = BTreeMap::new(); - for error in &errors { - if let Some(set) = error_map.get_mut(&error.pos) { + // Group errors by position and deduplicate error + let mut errors_by_posision: BTreeMap> = BTreeMap::new(); + for error in errors { + if let Some(set) = errors_by_posision.get_mut(&error.pos) { set.insert(error.msg.clone()); } else { - let mut set = HashSet::new(); - set.insert(error.msg.clone()); - error_map.insert(error.pos, set); + errors_by_posision.insert(error.pos, std::iter::once(error.msg.clone()).collect()); }; } - let diagnostics = error_map + let diagnostics = errors_by_posision .into_iter() - .map(|(pos, error_set)| Diagnostic { - message: /*error.msg*/ { - format!("expected{}", { - let mut string = String::new(); - for s in error_set { - string.push_str(" '"); - string.push_str(s.as_str()); - string.push_str("',"); - } - string - }) - }, - range: Range::new( - line_col_to_position( - &line_index, - LineCol { - line: pos.line - 1, - col: pos.column - 1, - }, - ) - .unwrap(), - line_col_to_position( - &line_index, - LineCol { - line: pos.line - 1, - col: pos.column - 1 + 1, - }, - ) - .unwrap(), - ), - ..Default::default() + .map(|(pos, error_set)| { + Ok(Diagnostic { + message: format!( + "expected {}", + error_set + .iter() + .map(|s| format!("'{s}'")) + .collect::>() + .join(", ") + ), + range: Range::new( + line_col_to_lsp_position( + &line_index, + LineCol { + line: pos.line - 1, + col: pos.column - 1, + }, + ) + .unwrap(), + line_col_to_lsp_position( + &line_index, + LineCol { + line: pos.line - 1, + col: pos.column - 1 + 1, + }, + ) + .unwrap(), + ), + ..Default::default() + }) }) + .filter_map(|result: Result<_, PositionConversionError>| result.ok()) .collect(); self.client @@ -134,20 +145,15 @@ impl Backend { Some(text_document.version), ) .await; + + Ok(()) } - async fn read_text_document_info(&self, uri: &Url) -> Option { + async fn read_text_document_info(&self, uri: &Url) -> anyhow::Result { if let Some(info) = self.state.lock().await.text_document_store.get(uri) { - let a = info.clone(); - Some(a) + Ok(info.clone()) } else { - self.client - .log_message( - MessageType::ERROR, - "could not find text document with URI {uri}", - ) - .await; - None + Err(anyhow!("could not find text document with URI {uri}")) } } } @@ -176,13 +182,6 @@ impl LanguageServer for Backend { ..Default::default() }, })), - completion_provider: Some(CompletionOptions { - work_done_progress_options: WorkDoneProgressOptions { - ..Default::default() - }, - ..Default::default() - }), - ..Default::default() }, ..Default::default() @@ -196,19 +195,43 @@ impl LanguageServer for Backend { } async fn did_open(&self, params: DidOpenTextDocumentParams) { - self.handle_change( - VersionedTextDocumentIdentifier { - uri: params.text_document.uri, - version: params.text_document.version, - }, - ¶ms.text_document.text, - ) - .await; + if let Err(error) = self + .handle_change( + VersionedTextDocumentIdentifier { + uri: params.text_document.uri, + version: params.text_document.version, + }, + ¶ms.text_document.text, + ) + .await + { + self.client + .log_message( + MessageType::ERROR, + format!("error while handling textDocument/didOpen request: {error}"), + ) + .await; + } } async fn did_change(&self, params: DidChangeTextDocumentParams) { - self.handle_change(params.text_document, ¶ms.content_changes[0].text) - .await; + if let Err(error) = self + .handle_change( + VersionedTextDocumentIdentifier { + uri: params.text_document.uri, + version: params.text_document.version, + }, + ¶ms.content_changes[0].text, + ) + .await + { + self.client + .log_message( + MessageType::ERROR, + format!("error while handling textDocument/didChange request: {error}"), + ) + .await; + } } async fn references( @@ -217,46 +240,44 @@ impl LanguageServer for Backend { ) -> tower_lsp::jsonrpc::Result>> { let info = self .read_text_document_info(¶ms.text_document_position.text_document.uri) - .await; + .await + .map_err(jsonrpc_error)?; - match info { - Some(info) => { - let text = info.text; - let line_index = LineIndex::new(&text); - let position = lsp_position_to_nemo_position( - &line_index, - params.text_document_position.position, - ) - .unwrap(); // TODO handle unwrap + let text = info.text; + let line_index = LineIndex::new(&text); + let position = + lsp_position_to_nemo_position(&line_index, params.text_document_position.position) + .map_err(Into::into) + .map_err(jsonrpc_error)?; - let program = parse_program_str(&text); - let program = program.0; + let (program, _) = parse_program_str(&text); - let node_path = find_in_ast(&program, position); + let node_path = find_in_ast(&program, position); - // Get most identifier most specific to the position - let indentified_node = node_path_deepest_identifier(&node_path); - let indentified_node = match indentified_node { - Some(indentified_node) => indentified_node, - None => return Ok(None), - }; + // Get most identifier most specific to the position + let indentified_node = node_path_deepest_identifier(&node_path); + let indentified_node = match indentified_node { + Some(indentified_node) => indentified_node, + None => return Ok(None), + }; - // Find other AST nodes with the same global identifier - let referenced_nodes = - find_by_identifier(indentified_node.scoping_node, &indentified_node.identifier); + // Find other AST nodes with the same global identifier + let referenced_nodes = + find_by_identifier(indentified_node.scoping_node, &indentified_node.identifier); - let locations = referenced_nodes - .iter() - .map(|node| Location { - uri: params.text_document_position.text_document.uri.clone(), - range: node_to_range_lsp(&line_index, *node), - }) - .collect(); + let locations = referenced_nodes + .iter() + .filter_map(|node| node_with_range(&line_index, *node)) + .map(|(_node, range)| { + Ok(Location { + uri: params.text_document_position.text_document.uri.clone(), + range, + }) + }) + .filter_map(|result: Result<_, ()>| result.ok()) + .collect(); - Ok(Some(locations)) - } - None => Ok(None), // TODO: Handle error - } + Ok(Some(locations)) } async fn document_symbol( @@ -265,28 +286,23 @@ impl LanguageServer for Backend { ) -> tower_lsp::jsonrpc::Result> { let info = self .read_text_document_info(¶ms.text_document.uri) - .await; - - match info { - Some(info) => { - let text = info.text; - let line_index = LineIndex::new(&text); + .await + .map_err(jsonrpc_error)?; - let program = parse_program_str(&text); - let program = program.0; + let text = info.text; + let line_index = LineIndex::new(&text); - let document_symbol = ast_node_to_document_symbol(&line_index, &program); + let (program, _) = parse_program_str(&text); - if let Ok(document_symbol) = document_symbol { - return Ok(document_symbol.map(|document_symbol| { - DocumentSymbolResponse::Nested(document_symbol.children.unwrap()) - })); - } + let document_symbol = ast_node_to_document_symbol(&line_index, &program) + .map_err(Into::into) + .map_err(jsonrpc_error)? + .ok_or(anyhow!("program has no document symbol")) + .map_err(jsonrpc_error)?; - Ok(None) - } - None => Ok(None), // TODO: Handle error - } + Ok(Some(DocumentSymbolResponse::Nested( + document_symbol.children.unwrap_or(vec![]), + ))) } /// Finds references to symbol that was renamed and sends edit operations to language client @@ -296,21 +312,17 @@ impl LanguageServer for Backend { ) -> tower_lsp::jsonrpc::Result> { let info = self .read_text_document_info(¶ms.text_document_position.text_document.uri) - .await; - - let info = match info { - Some(info) => info, - None => return Ok(None), - }; + .await + .map_err(jsonrpc_error)?; let text = info.text; let line_index = LineIndex::new(&text); let position = lsp_position_to_nemo_position(&line_index, params.text_document_position.position) - .unwrap(); + .map_err(Into::into) + .map_err(jsonrpc_error)?; - let program = parse_program_str(&text); - let program = program.0; + let (program, _) = parse_program_str(&text); let node_path = find_in_ast(&program, position); @@ -333,13 +345,17 @@ impl LanguageServer for Backend { edits: referenced_nodes .into_iter() .filter_map(|node| { - node.lsp_sub_node_to_rename().map(|renamed_node| { - OneOf::Left(TextEdit { - range: node_to_range_lsp(&line_index, renamed_node), - new_text: params.new_name.clone(), + node.lsp_range_to_rename().map(|renamed_node_range| { + Ok({ + OneOf::Left(TextEdit { + range: nemo_range_to_lsp_range(&line_index, renamed_node_range) + .map_err(|_error| ())?, // TODO: Print error, + new_text: params.new_name.clone(), + }) }) }) }) + .filter_map(|result: Result<_, ()>| result.ok()) .collect(), }; @@ -358,19 +374,16 @@ impl LanguageServer for Backend { ) -> tower_lsp::jsonrpc::Result> { let info = self .read_text_document_info(¶ms.text_document.uri) - .await; - - let info = match info { - Some(info) => info, - None => return Ok(None), - }; + .await + .map_err(jsonrpc_error)?; let text = info.text; let line_index = LineIndex::new(&text); - let position = lsp_position_to_nemo_position(&line_index, params.position).unwrap(); + let position = lsp_position_to_nemo_position(&line_index, params.position) + .map_err(Into::into) + .map_err(jsonrpc_error)?; - let program = parse_program_str(&text); - let program = program.0; + let (program, _) = parse_program_str(&text); let node_path = find_in_ast(&program, position); @@ -378,14 +391,18 @@ impl LanguageServer for Backend { let indentified_node = node_path_deepest_identifier(&node_path); match indentified_node { - Some(indentified_node) => { - Ok(indentified_node - .node - .lsp_sub_node_to_rename() - .map(|renamed_node| { - PrepareRenameResponse::Range(node_to_range_lsp(&line_index, renamed_node)) - })) - } + Some(indentified_node) => Ok(Some(PrepareRenameResponse::Range( + nemo_range_to_lsp_range( + &line_index, + indentified_node + .node + .lsp_range_to_rename() + .ok_or_else(|| anyhow!("identified node can not be renamed")) + .map_err(jsonrpc_error)?, + ) + .map_err(Into::into) + .map_err(jsonrpc_error)?, + ))), None => Ok(None), } } @@ -395,6 +412,15 @@ impl LanguageServer for Backend { } } +fn node_with_range<'a>( + line_index: &LineIndex, + node: &'a dyn AstNode, +) -> Option<(&'a dyn AstNode, Range)> { + nemo_range_to_lsp_range(line_index, node.range()) + .map(|range| (node, range)) // TODO: Print error, + .ok() +} + struct IdentifiedNode<'a> { node: &'a dyn AstNode, identifier: String, @@ -486,37 +512,12 @@ fn find_in_ast_recurse<'a>( ) { path.push(node); - if let Some(children) = node.children() { - for (child, next_child) in children.iter().zip(children.iter().skip(1)) { - if next_child.position() > position { - find_in_ast_recurse(*child, position, path); - return; - } - } - if let Some(child) = children.last() { + for child in node.children().iter().flatten() { + let range = child.range(); + if range.start <= position && position < range.end { find_in_ast_recurse(*child, position, path); + break; // Assume no nodes overlap } - }; -} - -fn node_to_range_lsp(line_index: &LineIndex, node: &dyn AstNode) -> Range { - Range { - start: nemo_position_to_lsp_position(line_index, node.position()).unwrap(), // TODO: Improve error handling - end: nemo_position_to_lsp_position( - line_index, - Position { - offset: node.position().offset + node.span().len(), - line: node.position().line + node.span().fragment().lines().count() as u32 - 1, - column: if node.span().fragment().lines().count() > 1 { - 1 + node.span().fragment().lines().last().unwrap().len() // TODO: Check if length is in correct encoding - as u32 - } else { - node.position().column + node.span().fragment().len() as u32 - // TODO: Check if length is in correct encoding - }, - }, - ) - .unwrap(), } } @@ -524,9 +525,7 @@ fn ast_node_to_document_symbol( line_index: &LineIndex, node: &dyn AstNode, ) -> Result, PositionConversionError> { - let range = node_to_range_lsp(line_index, node); - - let selection_range = range; + let range = nemo_range_to_lsp_range(line_index, node.range())?; if let Some((name, kind)) = node.lsp_symbol_info() { let children_results: Vec<_> = node @@ -555,7 +554,7 @@ fn ast_node_to_document_symbol( kind, name, range, - selection_range, + selection_range: range, tags: None, deprecated: None, }, diff --git a/nemo-language-server/src/language_server/nemo_position.rs b/nemo-language-server/src/language_server/nemo_position.rs index 4e155166e..782016991 100644 --- a/nemo-language-server/src/language_server/nemo_position.rs +++ b/nemo-language-server/src/language_server/nemo_position.rs @@ -8,22 +8,33 @@ //! * line: u32 index of the line, first line gets index 1 //! * offset: u32 index of the UTF-8 code point (byte) within the line, first column gets index 0 +use anyhow::anyhow; use line_index::{LineCol, LineIndex, WideEncoding, WideLineCol}; #[derive(Debug)] pub enum PositionConversionError { NemoPosition(nemo::io::parser::ast::Position), LspPosition(tower_lsp::lsp_types::Position), + LspLineCol(LineCol), +} + +impl From for anyhow::Error { + fn from(val: PositionConversionError) -> Self { + anyhow!("could not convert source code position: {:#?}", val) + } } fn line_col_to_nemo_position( line_index: &LineIndex, line_col: LineCol, -) -> Result { +) -> Result { Ok(nemo::io::parser::ast::Position { line: line_col.line + 1, column: line_col.col, - offset: line_index.offset(line_col).ok_or(())?.into(), + offset: line_index + .offset(line_col) + .ok_or(PositionConversionError::LspLineCol(line_col))? + .into(), }) } @@ -42,7 +53,7 @@ pub fn lsp_position_to_nemo_position( ) .ok_or(PositionConversionError::LspPosition(position))?; - Ok(line_col_to_nemo_position(line_index, line_col).unwrap()) + line_col_to_nemo_position(line_index, line_col) } fn nemo_position_to_line_col(position: nemo::io::parser::ast::Position) -> LineCol { @@ -67,3 +78,14 @@ pub fn nemo_position_to_lsp_position( character: wide_line_col.col, }) } + +/// Converts a Nemo range to a LSP range +pub fn nemo_range_to_lsp_range( + line_index: &LineIndex, + range: nemo::io::parser::ast::Range, +) -> Result { + Ok(tower_lsp::lsp_types::Range { + start: nemo_position_to_lsp_position(line_index, range.start)?, + end: nemo_position_to_lsp_position(line_index, range.end)?, + }) +} diff --git a/nemo-language-server/src/lib.rs b/nemo-language-server/src/lib.rs index de15066cd..646a4a250 100644 --- a/nemo-language-server/src/lib.rs +++ b/nemo-language-server/src/lib.rs @@ -14,6 +14,6 @@ pub fn create_language_server() -> (LspService, ClientSocket) { LspService::new(Backend::new) } -// // See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features +// See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features #[cfg(all(feature = "js", feature = "tokio"))] compile_error!("feature \"js\" and feature \"tokio\" cannot be enabled at the same time"); diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index e50a4d652..6072a126c 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -370,14 +370,6 @@ impl<'a> AstNode for Token<'a> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { true } @@ -386,7 +378,7 @@ impl<'a> AstNode for Token<'a> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 9b60b1382..0fe2f64e8 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -3869,7 +3869,7 @@ pub mod new { .map(|(rest_input, var)| { ( rest_input, - Term::Variable(Token { + Term::UniversalVariable(Token { kind: TokenKind::Variable, span: var.input, }), @@ -3892,7 +3892,7 @@ pub mod new { .map(|(rest_input, existential)| { ( rest_input, - Term::Existential(Token { + Term::ExistentialVariable(Token { kind: TokenKind::Existential, span: existential.input, }), diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 41c767ec7..9acc56a59 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,4 +1,3 @@ -use nom::Offset; use tower_lsp::lsp_types::SymbolKind; use crate::io::lexer::{Span, Token}; @@ -16,7 +15,32 @@ pub(crate) mod tuple; pub trait AstNode: std::fmt::Debug + Display + Sync { fn children(&self) -> Option>; fn span(&self) -> Span; - fn position(&self) -> Position; + + fn range(&self) -> Range { + let span = self.span(); + + let start_position = Position { + offset: self.span().location_offset(), + line: self.span().location_line(), + column: self.span().get_utf8_column() as u32, + }; + + let end_position = Position { + offset: start_position.offset + span.len(), + line: start_position.line + span.fragment().lines().count() as u32 - 1, + column: if span.fragment().lines().count() > 1 { + 1 + span.fragment().lines().last().unwrap().len() as u32 // Column is on new line + } else { + start_position.column + span.fragment().len() as u32 // Column is on same line + }, + }; + + Range { + start: start_position, + end: end_position, + } + } + fn is_token(&self) -> bool; fn name(&self) -> String; @@ -29,7 +53,8 @@ pub trait AstNode: std::fmt::Debug + Display + Sync { /// This can be used to restict rename operations to be local, e.g. for variable idenfiers inside of rules. fn lsp_identifier(&self) -> Option<(String, String)>; fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)>; - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode>; + /// Range of the part of the node that should be renamed or [`None`] if the node can not be renamed + fn lsp_range_to_rename(&self) -> Option; } #[derive(Debug, Clone, Copy, Hash)] @@ -64,6 +89,12 @@ impl Default for Position { } } +#[derive(Debug, Clone, Copy, Hash)] +pub struct Range { + pub start: Position, + pub end: Position, +} + /// Whitespace or Comment token #[derive(Debug, Clone, PartialEq)] pub struct Wsoc<'a> { @@ -84,14 +115,6 @@ impl AstNode for Wsoc<'_> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -109,7 +132,7 @@ impl AstNode for Wsoc<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } @@ -182,14 +205,6 @@ impl AstNode for List<'_, T> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -207,7 +222,7 @@ impl AstNode for List<'_, T> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } @@ -398,7 +413,7 @@ mod test { ws2: None, terms: Some(List { span: s!(304, 12, "?VarA"), - first: Term::Variable(Token { + first: Term::UniversalVariable(Token { kind: TokenKind::Variable, span: s!(304, 12, "?VarA"), }), @@ -425,7 +440,7 @@ mod test { ws2: None, terms: Some(List { span: s!(328, 12, "?Var, ConstB"), - first: Term::Variable(Token { + first: Term::UniversalVariable(Token { kind: TokenKind::Variable, span: s!(328, 12, "?VarA"), }), diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 5410ae9d1..bbf6e88b4 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,9 +1,9 @@ use tower_lsp::lsp_types::SymbolKind; +use super::map::Map; use super::term::Term; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, Wsoc}; -use super::{map::Map, Position}; +use super::{ast_to_ascii_tree, AstNode, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -74,15 +74,6 @@ impl AstNode for Atom<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -108,19 +99,18 @@ impl AstNode for Atom<'_> { } fn lsp_identifier(&self) -> Option<(String, String)> { - self.tuple().map(|tuple| ( + self.tuple().map(|tuple| { + ( format!("atom/{}", tuple.identifier.unwrap().span().fragment()), "file".to_string(), - )) + ) + }) } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { - None - // TODO: - // match self.tuple() { - // Some(tuple) => Some(&tuple.identifier.unwrap()), - // None => None, - // } + fn lsp_range_to_rename(&self) -> Option { + self.tuple() + .and_then(|tuple| tuple.identifier) + .map(|identifier| identifier.range()) } fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 415b584d4..86c50a315 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,7 +1,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::map::Map; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -230,15 +230,6 @@ impl AstNode for Directive<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -268,7 +259,7 @@ impl AstNode for Directive<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index cdafbd7f9..061231e13 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,7 +1,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; use std::fmt::Debug; @@ -44,14 +44,6 @@ impl AstNode for Map<'_> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -64,7 +56,7 @@ impl AstNode for Map<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } @@ -109,14 +101,6 @@ impl AstNode for Pair<'_, K, V> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -134,7 +118,7 @@ impl AstNode for Pair<'_, K, V> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 9331e59d5..f2f111461 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,6 +1,6 @@ use tower_lsp::lsp_types::SymbolKind; -use super::{ast_to_ascii_tree, statement::Statement, AstNode, Position}; +use super::{ast_to_ascii_tree, statement::Statement, AstNode, Position, Range}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -29,14 +29,6 @@ impl AstNode for Program<'_> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -63,7 +55,7 @@ impl AstNode for Program<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index d9a201500..63014a6d3 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -2,7 +2,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::atom::Atom; use super::directive::Directive; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -100,15 +100,6 @@ impl AstNode for Statement<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -140,7 +131,7 @@ impl AstNode for Statement<'_> { Some(("statement".to_string(), "statement".to_string())) } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 7b33a95d8..beda1993d 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -2,15 +2,15 @@ use tower_lsp::lsp_types::SymbolKind; use super::map::Map; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub enum Term<'a> { Primitive(Primitive<'a>), - Variable(Token<'a>), - Existential(Token<'a>), + UniversalVariable(Token<'a>), + ExistentialVariable(Token<'a>), // TODO: Is whitespace needed? Figure out how unary terms look UnaryPrefix { span: Span<'a>, @@ -43,8 +43,8 @@ impl AstNode for Term<'_> { fn children(&self) -> Option> { match self { Term::Primitive(token) => Some(vec![token]), - Term::Variable(token) => Some(vec![token]), - Term::Existential(token) => Some(vec![token]), + Term::UniversalVariable(token) => Some(vec![token]), + Term::ExistentialVariable(token) => Some(vec![token]), Term::UnaryPrefix { operation, term, .. } => Some(vec![operation, &**term]), @@ -100,8 +100,8 @@ impl AstNode for Term<'_> { fn span(&self) -> Span { match self { Term::Primitive(t) => t.span(), - Term::Variable(t) => t.span(), - Term::Existential(t) => t.span(), + Term::UniversalVariable(t) => t.span(), + Term::ExistentialVariable(t) => t.span(), Term::UnaryPrefix { span, .. } => *span, Term::Binary { span, .. } => *span, Term::Aggregation { span, .. } => *span, @@ -111,15 +111,6 @@ impl AstNode for Term<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -138,8 +129,8 @@ impl AstNode for Term<'_> { } match self { Term::Primitive(_) => name!("Primitive"), - Term::Variable(_) => name!("Variable"), - Term::Existential(_) => name!("Existential Variable"), + Term::UniversalVariable(_) => name!("Variable"), + Term::ExistentialVariable(_) => name!("Existential Variable"), Term::UnaryPrefix { .. } => name!("Unary Term"), Term::Binary { .. } => name!("Binary Term"), Term::Aggregation { .. } => name!("Aggregation"), @@ -157,7 +148,7 @@ impl AstNode for Term<'_> { fn lsp_identifier(&self) -> Option<(String, String)> { match self { - Term::Variable(t) => Some(( + Term::UniversalVariable(t) => Some(( format!("variable/{}", t.span().fragment()), "statement".to_string(), )), @@ -175,31 +166,31 @@ impl AstNode for Term<'_> { } } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { - None - // TODO: - // match self { - // Term::Variable(t) => Some(t), - // Term::Aggregation { operation, .. } => Some(operation), - // Term::Tuple(tuple) => { - // if let Some(identifier) = tuple.identifier { - // Some(identifier) - // } else { - // None - // } - // } - // // Term::Function(named_tuple) => Some(&named_tuple.identifier), - // _ => None, - // } + fn lsp_range_to_rename(&self) -> Option { + match self { + Term::Primitive(_) => None, + Term::UniversalVariable(t) => Some(t.range()), + Term::UnaryPrefix { .. } => None, + Term::Blank { .. } => None, + Term::ExistentialVariable(t) => Some(t.range()), + Term::Binary { .. } => None, + Term::Aggregation { operation, .. } => Some(operation.range()), + Term::Tuple(tuple) => tuple.identifier.map(|identifier| identifier.range()), + Term::Map(_map) => None, + } } fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { match self { Term::Primitive(_) => Some((String::from("Primitive term"), SymbolKind::CONSTANT)), - Term::Variable(t) => Some((format!("Variable: {}", t.span()), SymbolKind::VARIABLE)), + Term::UniversalVariable(t) => { + Some((format!("Variable: {}", t.span()), SymbolKind::VARIABLE)) + } Term::UnaryPrefix { .. } => Some((String::from("Unary prefix"), SymbolKind::OPERATOR)), Term::Blank { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), - Term::Existential { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), + Term::ExistentialVariable { .. } => { + Some((String::from("Existential"), SymbolKind::VARIABLE)) + } Term::Binary { .. } => Some((String::from("Binary term"), SymbolKind::OPERATOR)), Term::Aggregation { operation, .. } => Some(( format!("Aggregation: {}", operation.span.fragment()), @@ -215,7 +206,7 @@ impl AstNode for Term<'_> { Some((String::from("Tuple"), SymbolKind::ARRAY)) } } - Term::Map(map) => Some((String::from("Map"), SymbolKind::ARRAY)), + Term::Map(_map) => Some((String::from("Map"), SymbolKind::ARRAY)), } } } @@ -320,15 +311,6 @@ impl AstNode for Primitive<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -359,7 +341,7 @@ impl AstNode for Primitive<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } @@ -397,10 +379,6 @@ impl AstNode for Exponent<'_> { todo!() } - fn position(&self) -> Position { - todo!() - } - fn is_token(&self) -> bool { todo!() } @@ -413,7 +391,7 @@ impl AstNode for Exponent<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 459d22962..b6e5a0bca 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -1,7 +1,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -44,14 +44,6 @@ impl AstNode for Tuple<'_> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -69,7 +61,7 @@ impl AstNode for Tuple<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/model/rule_model/syntax.rs b/nemo/src/model/rule_model/syntax.rs index 7f2f38326..e3b9cad78 100644 --- a/nemo/src/model/rule_model/syntax.rs +++ b/nemo/src/model/rule_model/syntax.rs @@ -1,7 +1,6 @@ //! Constants for strings that are relevant to the syntax of rules. //! These are kept in one location, since they are required in various //! places related to parsing and display. -use nemo_physical::datavalues::syntax; /// The "predicate name" used for the CSV format in import/export directives. pub(crate) const FILE_FORMAT_CSV: &str = "csv"; From 29295fa3d9020d9f9f6413bb0af09c9cb918f50b Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Fri, 28 Jun 2024 13:13:08 +0200 Subject: [PATCH 049/214] Add test cases that highlight discrepencies between old and new parser --- nemo/src/io/lexer.rs | 1 + nemo/src/io/parser.rs | 126 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 6072a126c..8338c8f1c 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1008,6 +1008,7 @@ mod tests { // FIXME: change the name of this test according to the correct name for `?X > 3` // (Constraints are Rules with an empty Head) + #[ignore] #[test] fn constraints() { let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 0fe2f64e8..2db963fb5 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -5154,5 +5154,131 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters dbg!(&result); println!("{}", result.0); } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_language_tag() { + let test_string = "fact(\"テスト\"@ja)."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_rdf_literal() { + let test_string = "fact(\"2023\"^^xsd:gYear)."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_floating_point_numbers() { + // https://regex101.com/r/ObowxD/5 + + let valid_numbers = vec![ + "0.2", + "4534.34534345", + ".456456", + "1.", + "1e545", + "1.1e435", + ".1e232", + "1.e343", + "112E+12", + "12312.1231", + ".1231", + "1231", + "-1e+0", + "1e-1", + ]; + + let invalid_numbers = vec!["3", "E9", ".e3", "7E"]; + + for valid in valid_numbers { + let input = Span::new(valid); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + + let result = parse_decimal::>>(input); + assert!(result.is_ok()) + } + + for invalid in invalid_numbers { + let input = Span::new(invalid); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + + let result = parse_decimal::>>(input); + assert!(result.is_err()) + } + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_complex_comparison() { + let test_string = "complex(?X, ?Y) :- data(?X, ?Y), ABS(?X - ?Y) >= ?X * ?X."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_negation() { + let test_string = "R(?x, ?y, ?z) :- S(?x, ?y, ?z), ~T(?x, ?y), ~ T(a, ?z)."; // should allow for spaces + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_trailing_comma() { + let test_string = "head(?X) :- body( (2,), (3, 4, ), ?X) ."; // should allow for spaces + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } } } From d35b6dd852309c3cbce7881a05961603aaf5e1b9 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Fri, 28 Jun 2024 13:10:18 +0200 Subject: [PATCH 050/214] Update integration tests to reflect new input syntax --- resources/testcases/basic/join.rls | 6 +++--- resources/testcases/basic/negation.rls | 8 ++++---- resources/testcases/basic/projection.rls | 2 +- resources/testcases/basic/union.rls | 6 +++--- .../regression/planning_engine/repeated_variables/run.rls | 6 +++--- .../regression/stratification/binary_negated.rls | 4 ++-- .../regression/symmetric_transitive_closure/run.rls | 4 ++-- resources/testcases/regression/wildcards/run.rls | 2 +- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/resources/testcases/basic/join.rls b/resources/testcases/basic/join.rls index 9b3492c4f..996a4c204 100644 --- a/resources/testcases/basic/join.rls +++ b/resources/testcases/basic/join.rls @@ -1,6 +1,6 @@ -@source sourceA[3]: load-csv("sources/dataA.csv"). -@source sourceB[3]: load-csv("sources/dataB.csv"). -@source sourceC[3]: load-csv("sources/dataC.csv"). +@import sourceA :- csv { resource = "sources/dataA.csv" } . +@import sourceB :- csv { resource = "sources/dataB.csv" } . +@import sourceC :- csv { resource = "sources/dataC.csv" } . J1(?X, ?Y, ?Z) :- sourceA(?X, ?Z, ?Y), sourceB(?X, ?Y, ?T) . J2(?X, ?Y, ?Z) :- sourceA(?Z, ?Y, ?X), sourceC(?X, ?Y, ?T) . diff --git a/resources/testcases/basic/negation.rls b/resources/testcases/basic/negation.rls index 713b14fd6..0b0ee5862 100644 --- a/resources/testcases/basic/negation.rls +++ b/resources/testcases/basic/negation.rls @@ -1,7 +1,7 @@ -@source main[3]: load-csv("sources/main.csv"). -@source s1[1]: load-csv("sources/S1.csv"). -@source s2[2]: load-csv("sources/S2.csv"). -@source s3[3]: load-csv("sources/S3.csv"). +@import main :- csv { resource = "sources/main.csv" }. +@import s1 :- csv { resource = "sources/S1.csv" }. +@import s2 :- csv { resource = "sources/S2.csv" }. +@import s3 :- csv { resource = "sources/S3.csv" }. singlePositionX(?X, ?Y, ?Z) :- main(?X, ?Y, ?Z), ~s1(?X) . singlePositionY(?X, ?Y, ?Z) :- main(?X, ?Y, ?Z), ~s1(?Y) . diff --git a/resources/testcases/basic/projection.rls b/resources/testcases/basic/projection.rls index a3c7b92d3..57da6db6f 100644 --- a/resources/testcases/basic/projection.rls +++ b/resources/testcases/basic/projection.rls @@ -1,4 +1,4 @@ -@source data[3]: load-csv("sources/data.csv"). +@import data :- csv { resource = "sources/data.csv" }. A(?X, ?Z) :- data(?X, ?Y, ?Z) . B(?Y, ?X) :- A(?X, ?Y) . diff --git a/resources/testcases/basic/union.rls b/resources/testcases/basic/union.rls index 1402addc5..4258d3e1c 100644 --- a/resources/testcases/basic/union.rls +++ b/resources/testcases/basic/union.rls @@ -1,6 +1,6 @@ -@source sourceA[3]: load-csv("sources/dataA.csv"). -@source sourceB[3]: load-csv("sources/dataB.csv"). -@source sourceC[3]: load-csv("sources/dataC.csv"). +@import sourceA :- csv { resource = "sources/dataA.csv" }. +@import sourceB :- csv { resource = "sources/dataB.csv" }. +@import sourceC :- csv { resource = "sources/dataC.csv" }. ABC(?X, ?Y, ?Z) :- sourceA(?X, ?Y, ?Z) . ABC(?X, ?Y, ?Z) :- sourceB(?X, ?Y, ?Z) . diff --git a/resources/testcases/regression/planning_engine/repeated_variables/run.rls b/resources/testcases/regression/planning_engine/repeated_variables/run.rls index 49cb4ece7..b4d830ec7 100644 --- a/resources/testcases/regression/planning_engine/repeated_variables/run.rls +++ b/resources/testcases/regression/planning_engine/repeated_variables/run.rls @@ -1,6 +1,6 @@ -@source sourceA[3]: load-csv("sources/dataA.csv"). -@source sourceB[3]: load-csv("sources/dataB.csv"). -@source sourceC[3]: load-csv("sources/dataC.csv"). +@import sourceA :- csv { resource = "sources/dataA.csv" }. +@import sourceB :- csv { resource = "sources/dataB.csv" }. +@import sourceC :- csv { resource = "sources/dataC.csv" }. RepeatBody(?R, ?S) :- sourceA(?X, ?X, ?R), sourceB(?S, ?Y, ?Y) . RepeatHead(?X, ?Y, ?X, ?Y, ?Z, ?Z, ?X) :- sourceA(?X, ?Z, ?Y), sourceB(?X, ?Y, ?T) . diff --git a/resources/testcases/regression/stratification/binary_negated.rls b/resources/testcases/regression/stratification/binary_negated.rls index 0393f4ead..82ee56981 100644 --- a/resources/testcases/regression/stratification/binary_negated.rls +++ b/resources/testcases/regression/stratification/binary_negated.rls @@ -1,5 +1,5 @@ -@source equal[2]: load-csv("sources/equal.csv"). -@source data[2]: load-csv("sources/data.csv"). +@import equal :- csv { resource = "sources/equal.csv" }. +@import data :- csv { resource = "sources/data.csv" }. equal(b, b) . equal(c, c) . diff --git a/resources/testcases/regression/symmetric_transitive_closure/run.rls b/resources/testcases/regression/symmetric_transitive_closure/run.rls index 9eedc37ab..a1b24c09b 100644 --- a/resources/testcases/regression/symmetric_transitive_closure/run.rls +++ b/resources/testcases/regression/symmetric_transitive_closure/run.rls @@ -1,5 +1,5 @@ -@source city[1]: load-csv("city.csv"). -@source conn[2]: load-csv("conn.csv"). +@import city :- csv { resource = "city.csv" }. +@import conn :- csv { resource = "conn.csv" }. connected(?X,?Y) :- city(?X), city(?Y), conn(?X,?Y). conn(?X,?Y) :- conn(?Y,?X). diff --git a/resources/testcases/regression/wildcards/run.rls b/resources/testcases/regression/wildcards/run.rls index c323f7bfd..64c1ef223 100644 --- a/resources/testcases/regression/wildcards/run.rls +++ b/resources/testcases/regression/wildcards/run.rls @@ -1,4 +1,4 @@ -@source input[3]: load-csv("sources/main.csv"). +@import input :- csv { resource = "sources/main.csv" }. result(?x) :- input(?x, _, _). From 715e4b5097e1bddeccb90a8ba2c5bb121ccc6851 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 1 Jul 2024 11:21:44 +0200 Subject: [PATCH 051/214] Remove Whitespace in AST Nodes --- nemo/src/io/lexer.rs | 11 +- nemo/src/io/parser.rs | 377 ++++++---------------------- nemo/src/io/parser/ast.rs | 68 ++--- nemo/src/io/parser/ast/atom.rs | 10 - nemo/src/io/parser/ast/directive.rs | 84 ------- nemo/src/io/parser/ast/map.rs | 20 -- nemo/src/io/parser/ast/statement.rs | 25 -- nemo/src/io/parser/ast/term.rs | 20 -- nemo/src/io/parser/ast/tuple.rs | 12 - 9 files changed, 101 insertions(+), 526 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 8338c8f1c..6b8811654 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -6,7 +6,7 @@ use super::parser::new::context; use nom::{ branch::alt, bytes::complete::{is_not, tag, take, take_till}, - character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, + character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace0, multispace1}, combinator::{all_consuming, cut, map, opt, recognize}, error::ParseError, multi::{many0, many1}, @@ -674,14 +674,15 @@ where }) } -pub(crate) fn skip_to_dot<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Token<'a>) +pub(crate) fn skip_to_statement_end<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Token<'a>) where E: ParseError> + ContextError, Context>, { - let (rest_input, error_input) = recognize(pair( + let (rest_input, error_input) = recognize(tuple(( take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), opt(tag(".")), - ))(input) + multispace0, + )))(input) .expect("Skipping to the next dot should not fail!"); ( rest_input, @@ -1129,6 +1130,6 @@ mod tests { input, parser_state: errors, }; - dbg!(super::skip_to_dot::>(input)); + dbg!(super::skip_to_statement_end::>(input)); } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 2db963fb5..1f2e4e270 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2443,10 +2443,11 @@ pub mod new { exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, lex_operators, lex_prefixed_ident, lex_string, lex_toplevel_doc_comment, lex_whitespace, minus, open_brace, open_paren, plus, - question_mark, skip_to_dot, slash, star, tilde, underscore, unequal, Context, Error, - ErrorTree, ParserState, Span, Token, TokenKind, + question_mark, skip_to_statement_end, slash, star, tilde, underscore, unequal, Context, + Error, ErrorTree, ParserState, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; + use nom::character::complete::multispace0; use nom::combinator::{all_consuming, cut, map, opt, recognize}; use nom::error::{ErrorKind, ParseError}; use nom::sequence::{delimited, pair}; @@ -2518,7 +2519,7 @@ pub mod new { context: vec![context], }; // errors.report_error(err); - let (rest_input, token) = skip_to_dot::>>(input); + let (rest_input, token) = skip_to_statement_end::>>(input); Ok((rest_input, Statement::Error(token))) } Err(err) => Err(err), @@ -2709,19 +2710,26 @@ pub mod new { Context::Program, pair( opt(lex_toplevel_doc_comment::>>), - many0(recover( - report_error(alt(( - // TODO: Discuss wether directives should only get parsed at the beginning of the source file - parse_rule, - parse_fact, - parse_whitespace, - parse_directive, - parse_comment, - ))), - "failed to parse statement", - Context::Program, - input.parser_state, - )), + delimited( + multispace0, + many0(recover( + report_error(delimited( + multispace0, + alt(( + // TODO: Discuss wether directives should only get parsed at the beginning of the source file + parse_rule, + parse_fact, + parse_directive, + parse_comment, + )), + multispace0, + )), + "failed to parse statement", + Context::Program, + input.parser_state, + )), + multispace0, + ), ), )(input); match result { @@ -2755,17 +2763,6 @@ pub mod new { parse_program::>>(input) } - /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Statement<'a>, E> { - lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) - } - /// Parse normal comments that start with a `%` and ends at the line ending. fn parse_comment< 'a, @@ -2786,14 +2783,13 @@ pub mod new { Context::Fact, tuple((opt(lex_doc_comment), parse_normal_atom, wsoc0, dot)), )(input) - .map(|(rest_input, (doc_comment, atom, ws, dot))| { + .map(|(rest_input, (doc_comment, atom, _ws, dot))| { ( rest_input, Statement::Fact { span: outer_span(input.input, rest_input.input), doc_comment, atom, - ws, dot, }, ) @@ -2818,18 +2814,15 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { + |(rest_input, (doc_comment, head, _ws1, arrow, _ws2, body, _ws3, dot))| { ( rest_input, Statement::Rule { span: outer_span(input.input, rest_input.input), doc_comment, head, - ws1, arrow, - ws2, body, - ws3, dot, }, ) @@ -2894,23 +2887,19 @@ pub mod new { dot, )), )(input) - .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { - ( - rest_input, - Directive::Base { - span: outer_span(input.input, rest_input.input), - doc_comment, - kw: Token { - kind: TokenKind::Base, - span: kw.input, + .map( + |(rest_input, (doc_comment, _kw, _ws1, base_iri, _ws2, dot))| { + ( + rest_input, + Directive::Base { + span: outer_span(input.input, rest_input.input), + doc_comment, + base_iri, + dot, }, - ws1, - base_iri, - ws2, - dot, - }, - ) - }) + ) + }, + ) } /// Parse the prefix directive. @@ -2938,24 +2927,17 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { + |(rest_input, (doc_comment, _kw, _ws1, prefix, _ws2, prefix_iri, _ws3, dot))| { ( rest_input, Directive::Prefix { span: outer_span(input.input, rest_input.input), doc_comment, - kw: Token { - kind: TokenKind::Prefix, - span: kw.input, - }, - ws1, prefix: Token { kind: TokenKind::Ident, span: prefix.input, }, - ws2, prefix_iri, - ws3, dot, }, ) @@ -2990,23 +2972,18 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { + |( + rest_input, + (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot), + )| { ( rest_input, Directive::Import { span: outer_span(input.input, rest_input.input), doc_comment, - kw: Token { - kind: TokenKind::Import, - span: kw.input, - }, - ws1, predicate, - ws2, arrow, - ws3, map, - ws4, dot, }, ) @@ -3041,23 +3018,18 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { + |( + rest_input, + (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot), + )| { ( rest_input, Directive::Export { span: outer_span(input.input, rest_input.input), doc_comment, - kw: Token { - kind: TokenKind::Export, - span: kw.input, - }, - ws1, predicate, - ws2, arrow, - ws3, map, - ws4, dot, }, ) @@ -3088,19 +3060,13 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { + |(rest_input, (doc_comment, _kw, _ws1, predicates, _ws2, dot))| { ( rest_input, Directive::Output { span: outer_span(input.input, rest_input.input), doc_comment, - kw: Token { - kind: TokenKind::Output, - span: kw.input, - }, - ws1, predicates, - ws2, dot, }, ) @@ -3152,7 +3118,15 @@ pub mod new { List { span: outer_span(input.input, rest_input.input), first, - rest: if rest.is_empty() { None } else { Some(rest) }, + rest: if rest.is_empty() { + None + } else { + Some( + rest.into_iter() + .map(|(_ws1, comma, _ws2, t)| (comma, t)) + .collect(), + ) + }, }, ) }) @@ -3239,15 +3213,13 @@ pub mod new { Context::InfixAtom, tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), )(input) - .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { + .map(|(rest_input, (lhs, _ws1, operation, _ws2, rhs))| { ( rest_input, Atom::InfixAtom { span: outer_span(input.input, rest_input.input), lhs, - ws1, operation, - ws2, rhs, }, ) @@ -3272,17 +3244,14 @@ pub mod new { )), )(input) .map( - |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { + |(rest_input, (identifier, _ws1, open_paren, _ws2, terms, _ws3, close_paren))| { ( rest_input, Tuple { span: outer_span(input.input, rest_input.input), identifier, - ws1, open_paren, - ws2, terms, - ws3, close_paren, }, ) @@ -3312,17 +3281,14 @@ pub mod new { )), )(input) .map( - |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { + |(rest_input, (identifier, _ws1, open_paren, _ws2, terms, _ws3, close_paren))| { ( rest_input, Tuple { span: outer_span(input.input, rest_input.input), identifier: Some(identifier), - ws1, open_paren, - ws2, terms, - ws3, close_paren, }, ) @@ -3348,17 +3314,14 @@ pub mod new { )), )(input) .map( - |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { + |(rest_input, (identifier, _ws1, open_brace, _ws2, pairs, _ws3, close_brace))| { ( rest_input, Map { span: outer_span(input.input, rest_input.input), identifier, - ws1, open_brace, - ws2, pairs, - ws3, close_brace, }, ) @@ -3418,15 +3381,13 @@ pub mod new { Context::Pair, tuple((parse_term, wsoc0, equal, wsoc0, parse_term)), )(input) - .map(|(rest_input, (key, ws1, equal, ws2, value))| { + .map(|(rest_input, (key, _ws1, equal, _ws2, value))| { ( rest_input, Pair { span: outer_span(input.input, rest_input.input), key, - ws1, equal, - ws2, value, }, ) @@ -3685,13 +3646,11 @@ pub mod new { .map(|(rest_input, (lhs, opt))| { ( rest_input, - if let Some((ws1, operation, ws2, rhs)) = opt { + if let Some((_ws1, operation, _ws2, rhs)) = opt { Term::Binary { span: outer_span(input.input, rest_input.input), lhs: Box::new(lhs), - ws1, operation, - ws2, rhs: Box::new(rhs), } } else { @@ -3725,13 +3684,11 @@ pub mod new { .map(|(rest_input, (lhs, opt))| { ( rest_input, - if let Some((ws1, operation, ws2, rhs)) = opt { + if let Some((_ws1, operation, _ws2, rhs)) = opt { Term::Binary { span: outer_span(input.input, rest_input.input), lhs: Box::new(lhs), - ws1, operation, - ws2, rhs: Box::new(rhs), } } else { @@ -3801,7 +3758,7 @@ pub mod new { )), )(input) .map( - |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { + |(rest_input, (operation, open_paren, _ws1, terms, _ws2, close_paren))| { ( rest_input, Term::Aggregation { @@ -3811,9 +3768,7 @@ pub mod new { span: operation.input, }, open_paren, - ws1, terms: Box::new(terms), - ws2, close_paren, }, ) @@ -3989,12 +3944,10 @@ pub mod new { kind: TokenKind::Ident, span: s!(0, 1, "a"), }), - ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(1, 1, "("), }, - ws2: None, terms: Some(List { span: s!(2, 1, "B,C"), first: Term::Primitive(Primitive::Constant(Token { @@ -4002,25 +3955,21 @@ pub mod new { span: s!(2, 1, "B"), })), rest: Some(vec![( - None, Token { kind: TokenKind::Comma, span: s!(3, 1, ",") }, - None, Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(4, 1, "C"), })), )]), }), - ws3: None, close_paren: Token { kind: TokenKind::CloseParen, span: s!(5, 1, ")"), }, }), - ws: None, dot: Token { kind: TokenKind::Dot, span: s!(6, 1, ".") @@ -4051,22 +4000,10 @@ pub mod new { Statement::Directive(Directive::Base { span: s!(0, 1, "@base ."), doc_comment: None, - kw: Token { - kind: TokenKind::Base, - span: s!(0, 1, "@base"), - }, - ws1: Some(Wsoc { - span: s!(5, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(5, 1, " ") - }] - }), base_iri: Token { kind: TokenKind::Iri, span: s!(6, 1, "") }, - ws2: None, dot: Token { kind: TokenKind::Dot, span: s!(31, 1, ".") @@ -4079,27 +4016,14 @@ pub mod new { "@prefix rdfs:." ), doc_comment: None, - kw: Token { - kind: TokenKind::Prefix, - span: s!(32, 1, "@prefix"), - }, - ws1: Some(Wsoc { - span: s!(39, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(39, 1, " ") - }] - }), prefix: Token { kind: TokenKind::Ident, span: s!(40, 1, "rdfs:"), }, - ws2: None, prefix_iri: Token { kind: TokenKind::Iri, span: s!(45, 1, ""), }, - ws3: None, dot: Token { kind: TokenKind::Dot, span: s!(84, 1, ".") @@ -4112,39 +4036,24 @@ pub mod new { r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# ), doc_comment: None, - kw: Token { - kind: TokenKind::Import, - span: s!(85, 1, "@import"), - }, - ws1: Wsoc { - span: s!(92, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(92, 1, " "), - }] - }, predicate: Token { kind: TokenKind::Ident, span: s!(93, 1, "sourceA"), }, - ws2: None, arrow: Token { kind: TokenKind::Arrow, span: s!(100, 1, ":-"), }, - ws3: None, map: Map { span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), identifier: Some(Token { kind: TokenKind::Ident, span: s!(102, 1, "csv") }), - ws1: None, open_brace: Token { kind: TokenKind::OpenBrace, span: s!(105, 1, "{") }, - ws2: None, pairs: Some(List { span: s!(106, 1, "resource=\"sources/dataA.csv\""), first: Pair { @@ -4153,12 +4062,10 @@ pub mod new { kind: TokenKind::Ident, span: s!(106, 1, "resource"), })), - ws1: None, equal: Token { kind: TokenKind::Equal, span: s!(114, 1, "="), }, - ws2: None, value: Term::Primitive(Primitive::String(Token { kind: TokenKind::String, span: s!(115, 1, "\"sources/dataA.csv\""), @@ -4166,13 +4073,11 @@ pub mod new { }, rest: None, }), - ws3: None, close_brace: Token { kind: TokenKind::CloseBrace, span: s!(134, 1, "}") }, }, - ws4: None, dot: Token { kind: TokenKind::Dot, span: s!(135, 1, ".") @@ -4181,47 +4086,30 @@ pub mod new { Statement::Directive(Directive::Export { span: s!(136, 1, "@export a:-csv{}."), doc_comment: None, - kw: Token { - kind: TokenKind::Export, - span: s!(136, 1, "@export"), - }, - ws1: Wsoc { - span: s!(143, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(143, 1, " "), - }] - }, predicate: Token { kind: TokenKind::Ident, span: s!(144, 1, "a"), }, - ws2: None, arrow: Token { kind: TokenKind::Arrow, span: s!(145, 1, ":-"), }, - ws3: None, map: Map { span: s!(147, 1, "csv{}"), identifier: Some(Token { kind: TokenKind::Ident, span: s!(147, 1, "csv"), }), - ws1: None, open_brace: Token { kind: TokenKind::OpenBrace, span: s!(150, 1, "{"), }, - ws2: None, pairs: None, - ws3: None, close_brace: Token { kind: TokenKind::CloseBrace, span: s!(151, 1, "}"), }, }, - ws4: None, dot: Token { kind: TokenKind::Dot, span: s!(152, 1, "."), @@ -4230,17 +4118,6 @@ pub mod new { Statement::Directive(Directive::Output { span: s!(153, 1, "@output a, b, c."), doc_comment: None, - kw: Token { - kind: TokenKind::Output, - span: s!(153, 1, "@output") - }, - ws1: Wsoc { - span: s!(160, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(160, 1, " "), - }] - }, predicates: Some(List { span: s!(161, 1, "a, b, c"), first: Token { @@ -4249,36 +4126,20 @@ pub mod new { }, rest: Some(vec![ ( - None, Token { kind: TokenKind::Comma, span: s!(162, 1, ","), }, - Some(Wsoc { - span: s!(163, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(163, 1, " "), - }] - }), Token { kind: TokenKind::Ident, span: s!(164, 1, "b"), }, ), ( - None, Token { kind: TokenKind::Comma, span: s!(165, 1, ","), }, - Some(Wsoc { - span: s!(166, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(166, 1, " "), - }] - }), Token { kind: TokenKind::Ident, span: s!(167, 1, "c"), @@ -4286,7 +4147,6 @@ pub mod new { ), ]), }), - ws2: None, dot: Token { kind: TokenKind::Dot, span: s!(168, 1, "."), @@ -4337,12 +4197,10 @@ pub mod new { kind: TokenKind::Ident, span: s!(0, 1, "some"), }), - ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(4, 1, "(") }, - ws2: None, terms: Some(List { span: s!(5, 1, "Fact, with, whitespace"), first: Term::Primitive(Primitive::Constant(Token { @@ -4351,36 +4209,20 @@ pub mod new { })), rest: Some(vec![ ( - None, Token { kind: TokenKind::Comma, span: s!(9, 1, ","), }, - Some(Wsoc { - span: s!(10, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(10, 1, " "), - }] - }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(11, 1, "with") })), ), ( - None, Token { kind: TokenKind::Comma, span: s!(15, 1, ","), }, - Some(Wsoc { - span: s!(16, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(16, 1, " "), - }] - }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(17, 1, "whitespace") @@ -4388,28 +4230,16 @@ pub mod new { ), ]), }), - ws3: None, close_paren: Token { kind: TokenKind::CloseParen, span: s!(27, 1, ")") }, }), - ws: Some(Wsoc { - span: s!(28, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(28, 1, " "), - }] - }), dot: Token { kind: TokenKind::Dot, span: s!(29, 1, "."), }, }, - Statement::Whitespace(Token { - kind: TokenKind::Whitespace, - span: s!(30, 1, " ") - }), Statement::Comment(Token { kind: TokenKind::Comment, span: s!(31, 1, "% and a super useful comment\n") @@ -4447,18 +4277,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters // let ast = parse_program::>(input); let (ast, _) = parse_program::>(input); println!("{}", ast); - assert_eq!( - { - let mut string_from_tokens = String::new(); - for token in get_all_tokens(&ast) { - string_from_tokens.push_str(token.span().fragment()); - } - println!("String from Tokens:\n"); - println!("{}\n", string_from_tokens); - string_from_tokens - }, - *input.input.fragment(), - ); + // With the removal of whitespace in the AST this does not work anymore. + // assert_eq!( + // { + // let mut string_from_tokens = String::new(); + // for token in get_all_tokens(&ast) { + // string_from_tokens.push_str(token.span().fragment()); + // } + // println!("String from Tokens:\n"); + // println!("{}\n", string_from_tokens); + // string_from_tokens + // }, + // *input.input.fragment(), + // ); } #[test] @@ -4548,9 +4379,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0, 1, "35"}, exponent: None, })), - ws1: None, operation: T! {Plus, 2, 1, "+"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(3, 1, "7"), sign: None, @@ -4585,9 +4414,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0,1,"6"}, exponent: None, })), - ws1: None, operation: T! {Star, 1,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(2, 1, "7"), sign: None, @@ -4622,9 +4449,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0, 1, "49"}, exponent: None, })), - ws1: None, operation: T! {Minus, 2, 1, "-"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(3, 1, "7"), sign: None, @@ -4659,9 +4484,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0, 1, "84"}, exponent: None, })), - ws1: None, operation: T! {Slash, 2, 1, "/"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(3, 1, "2"), sign: None, @@ -4698,9 +4521,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0,1,"5"}, exponent: None, })), - ws1: None, operation: T! {Star, 1,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(2, 1, "7"), sign: None, @@ -4710,9 +4531,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters exponent: None, })), }), - ws1: None, operation: T! {Plus, 3,1,"+"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(4, 1, "7"), sign: None, @@ -4747,9 +4566,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0,1,"7"}, exponent: None })), - ws1: None, operation: T! {Plus, 1,1,"+"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(2, 1, "5*7"), lhs: Box::new(Term::Primitive(Primitive::Number { @@ -4760,9 +4577,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 2,1,"5"}, exponent: None })), - ws1: None, operation: T! {Star, 3,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(4, 1, "7"), sign: None, @@ -4816,9 +4631,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(0, 1, "(15+3*2-(7+35)*8)"), identifier: None, - ws1: None, open_paren: T!(OpenParen, 0, 1, "("), - ws2: None, terms: Some(List { span: s!(1, 1, "15+3*2-(7+35)*8"), first: Term::Binary { @@ -4831,9 +4644,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 1,1,"15"}, exponent: None, })), - ws1: None, operation: T! {Plus, 3,1,"+"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(4, 1, "3*2-(7+35)*8"), lhs: Box::new(Term::Binary { @@ -4846,9 +4657,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 4,1,"3"}, exponent: None, })), - ws1: None, operation: T! {Star, 5,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(6, 1, "2"), sign: None, @@ -4858,17 +4667,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters exponent: None, })), }), - ws1: None, operation: T! {Minus, 7,1,"-"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(8, 1, "(7+35)*8"), lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(8, 1, "(7+35)"), identifier: None, - ws1: None, open_paren: T! {OpenParen, 8, 1, "("}, - ws2: None, terms: Some(List { span: s!(9, 1, "7+35"), first: Term::Binary { @@ -4883,9 +4688,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters exponent: None, } )), - ws1: None, operation: T! {Plus, 10,1,"+"}, - ws2: None, rhs: Box::new(Term::Primitive( Primitive::Number { span: s!(11, 1, "35"), @@ -4899,12 +4702,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, rest: None }), - ws3: None, close_paren: T! {CloseParen, 13,1,")"}, }))), - ws1: None, operation: T! {Star, 14,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(15, 1, "8"), sign: None, @@ -4918,12 +4718,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, rest: None }), - ws3: None, close_paren: T!(CloseParen, 16, 1, ")") }))), - ws1: None, operation: T! {Slash, 17,1,"/"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(18, 1, "3"), sign: None, @@ -4966,9 +4763,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0,1,"15"}, exponent: None, })), - ws1: None, operation: T! {Plus, 2,1,"+"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(3, 1, "3*2-(7+35)*8/3"), lhs: Box::new(Term::Binary { @@ -4981,9 +4776,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 3,1,"3"}, exponent: None, })), - ws1: None, operation: T! {Star, 4,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(5, 1, "2"), sign: None, @@ -4993,17 +4786,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters exponent: None, })), }), - ws1: None, operation: T! {Minus, 6,1,"-"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(7, 1, "(7+35)*8/3"), lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(7, 1, "(7+35)"), identifier: None, - ws1: None, open_paren: T! {OpenParen, 7,1,"("}, - ws2: None, terms: Some(List { span: s!(8, 1, "7+35"), first: Term::Binary { @@ -5016,9 +4805,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 8,1,"7"}, exponent: None, })), - ws1: None, operation: T! {Plus, 9,1,"+"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(10, 1, "35"), sign: None, @@ -5030,12 +4817,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, rest: None, }), - ws3: None, close_paren: T! {CloseParen, 12,1,")"}, }))), - ws1: None, operation: T! {Star, 13,1,"*"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(14, 1, "8/3"), lhs: Box::new(Term::Primitive(Primitive::Number { @@ -5046,9 +4830,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 14,1,"8"}, exponent: None, })), - ws1: None, operation: T! {Slash, 15, 1, "/"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(16, 1, "3"), sign: None, @@ -5219,6 +5001,8 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }; let result = parse_decimal::>>(input); + // dbg!(&input); + // dbg!(&result); assert!(result.is_ok()) } @@ -5248,6 +5032,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; let result = parse_program::>>(input); + // dbg!(&result); assert!(result.1.is_empty()); } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 9acc56a59..84c7bcbea 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -151,15 +151,15 @@ impl Display for Wsoc<'_> { pub struct List<'a, T> { pub span: Span<'a>, pub first: T, - // ([ws]?[,][ws]?[T])* - pub rest: Option>, Token<'a>, Option>, T)>>, + // (,T)* + pub rest: Option, T)>>, } impl List<'_, T> { pub fn to_vec(&self) -> Vec { let mut vec = Vec::new(); vec.push(self.first.clone()); if let Some(rest) = &self.rest { - for (_, _, _, item) in rest { + for (_, item) in rest { vec.push(item.clone()); } } @@ -175,7 +175,7 @@ impl IntoIterator for List<'_, T> { let mut vec = Vec::new(); vec.push(self.first); if let Some(rest) = self.rest { - for (_, _, _, item) in rest { + for (_, item) in rest { vec.push(item); } } @@ -187,14 +187,8 @@ impl AstNode for List<'_, T> { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(&self.first); if let Some(rest) = &self.rest { - for (ws1, delim, ws2, item) in rest { - if let Some(ws) = ws1 { - vec.push(ws); - }; + for (delim, item) in rest { vec.push(delim); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(item); } }; @@ -312,30 +306,19 @@ mod test { kind:TokenKind::DocComment, span:s!(84,3,"%% This is the prefix used for datatypes\n") }), - kw: Token{ - kind:TokenKind::Prefix, - span:s!(125,4,"@prefix") - }, - ws1:Some(Wsoc {span: s!(132, 4, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(132,4," ")}] }), prefix: Token { kind: TokenKind::PrefixIdent, span: s!(133, 4, "xsd:"), }, - ws2: Some(Wsoc {span: s!(137, 4, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(137,4," ")}] }), prefix_iri: Token { kind: TokenKind::Iri, span: s!(138, 4, ""), }, - ws3: None, dot: Token{ kind:TokenKind::Dot, span:s!(173,4,".") } }), - Statement::Whitespace(Token { - kind: TokenKind::Whitespace, - span: s!(174, 4, "\n\n"), - }), Statement::Comment(Token { kind: TokenKind::Comment, span: s!(176, 6, "% Facts\n"), @@ -352,12 +335,10 @@ mod test { kind: TokenKind::Ident, span: s!(222, 8, "somePredicate"), }), - ws1:None , open_paren:Token{ kind:TokenKind::OpenParen, span:s!(235,8,"(") } , - ws2:None , terms: Some(List { span: s!(236, 8, "ConstA, ConstB"), first: Term::Primitive(Primitive::Constant(Token { @@ -365,34 +346,26 @@ mod test { span: s!(236, 8, "ConstA"), })), rest: Some(vec![( - None, Token { kind: TokenKind::Comma, span: s!(242, 8, ","), }, - Some(Wsoc {span: s!(243, 8, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(243,8," "),}] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(244, 8, "ConstB"), })), )]), }), - ws3: None , close_paren:Token { kind: TokenKind::CloseParen, span:s!(250,8,")") } }), - ws: None, dot: Token { kind: TokenKind::Dot, span: s!(251,8,".") } }, - Statement::Whitespace(Token { - kind: TokenKind::Whitespace, - span: s!(252, 8, "\n\n"), - }), Statement::Comment(Token { kind: TokenKind::Comment, span: s!(254, 10, "% Rules\n"), @@ -408,9 +381,7 @@ mod test { kind: TokenKind::Ident, span: s!(295, 12, "someHead"), }), - ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(303,12,"(") }, - ws2: None, terms: Some(List { span: s!(304, 12, "?VarA"), first: Term::UniversalVariable(Token { @@ -419,14 +390,11 @@ mod test { }), rest: None, }), - ws3: None, close_paren: Token { kind: TokenKind::CloseParen, span: s!(309,12,")") }, }), rest: None, }, - ws1: Some(Wsoc {span: s!(310, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(310,12," ")}] }), arrow: Token{kind:TokenKind::Arrow, span:s!(311,12,":-")}, - ws2: Some(Wsoc {span: s!(313, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}] }), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), first: Atom::Positive(Tuple { @@ -435,9 +403,7 @@ mod test { kind: TokenKind::Ident, span: s!(314, 12, "somePredicate"), }), - ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(327,12,"(") }, - ws2: None, terms: Some(List { span: s!(328, 12, "?Var, ConstB"), first: Term::UniversalVariable(Token { @@ -445,30 +411,22 @@ mod test { span: s!(328, 12, "?VarA"), }), rest: Some(vec![( - None, Token { kind: TokenKind::Comma, span: s!(333, 12, ","), }, - Some(Wsoc {span: s!(334, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(334,12," "),}] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(335, 12, "ConstB"), })), )]), }), - ws3: None, close_paren: Token { kind: TokenKind::CloseParen, span: s!(341, 12,")") }, }), rest: None, }, - ws3: None, dot: Token{kind:TokenKind::Dot,span:s!(342, 12,".")}, }, - Statement::Whitespace(Token { - kind: TokenKind::Whitespace, - span: s!(343, 12, " "), - }), Statement::Comment(Token { kind: TokenKind::Comment, span: s!(346, 12, "% all constants that are in relation with ConstB\n"), @@ -481,12 +439,14 @@ mod test { println!("{}", token); } - assert_eq!(input, { - let mut result = String::new(); - for token in &tokens1 { - result.push_str(token.span().fragment()); - } - result - }); + // This doesn't work anymore, because the whitespace and keywords got removed from + // from the AST, so you can't directly recreate the input exactly. + // assert_eq!(input, { + // let mut result = String::new(); + // for token in &tokens1 { + // result.push_str(token.span().fragment()); + // } + // result + // }); } } diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index bbf6e88b4..6ad2d77ed 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -18,9 +18,7 @@ pub enum Atom<'a> { InfixAtom { span: Span<'a>, lhs: Term<'a>, - ws1: Option>, operation: Token<'a>, - ws2: Option>, rhs: Term<'a>, }, Map(Map<'a>), @@ -43,21 +41,13 @@ impl AstNode for Atom<'_> { Atom::Negative { neg, atom, .. } => Some(vec![neg, atom]), Atom::InfixAtom { lhs, - ws1, operation, - ws2, rhs, .. } => { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(lhs); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(operation); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(rhs); Some(vec) } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 86c50a315..11424d7e4 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -11,60 +11,40 @@ pub enum Directive<'a> { Base { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Option>, base_iri: Token<'a>, - ws2: Option>, dot: Token<'a>, }, // "@prefix wikidata: ." Prefix { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Option>, prefix: Token<'a>, - ws2: Option>, prefix_iri: Token<'a>, - ws3: Option>, dot: Token<'a>, }, // "@import table :- csv{resource="path/to/file.csv"} ." Import { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Wsoc<'a>, predicate: Token<'a>, - ws2: Option>, arrow: Token<'a>, - ws3: Option>, map: Map<'a>, - ws4: Option>, dot: Token<'a>, }, // "@export result :- turtle{resource="out.ttl"} ." Export { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Wsoc<'a>, predicate: Token<'a>, - ws2: Option>, arrow: Token<'a>, - ws3: Option>, map: Map<'a>, - ws4: Option>, dot: Token<'a>, }, // "@output A, B, C." Output { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Wsoc<'a>, predicates: Option>>, - ws2: Option>, dot: Token<'a>, }, } @@ -73,10 +53,7 @@ impl AstNode for Directive<'_> { match self { Directive::Base { doc_comment, - kw, - ws1, base_iri, - ws2, dot, .. } => { @@ -84,25 +61,14 @@ impl AstNode for Directive<'_> { if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(base_iri); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(dot); Some(vec) } Directive::Prefix { doc_comment, - kw, - ws1, prefix, - ws2, prefix_iri, - ws3, dot, .. } => { @@ -110,31 +76,16 @@ impl AstNode for Directive<'_> { if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(prefix); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(prefix_iri); - if let Some(ws) = ws3 { - vec.push(ws); - }; vec.push(dot); Some(vec) } Directive::Import { doc_comment, - kw, - ws1, predicate, - ws2, arrow, - ws3, map, - ws4, dot, .. } => { @@ -142,33 +93,17 @@ impl AstNode for Directive<'_> { if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - vec.push(ws1); vec.push(predicate); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(arrow); - if let Some(ws) = ws3 { - vec.push(ws); - }; vec.push(map); - if let Some(ws) = ws4 { - vec.push(ws); - }; vec.push(dot); Some(vec) } Directive::Export { doc_comment, - kw, - ws1, predicate, - ws2, arrow, - ws3, map, - ws4, dot, .. } => { @@ -176,44 +111,25 @@ impl AstNode for Directive<'_> { if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - vec.push(ws1); vec.push(predicate); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(arrow); - if let Some(ws) = ws3 { - vec.push(ws); - }; vec.push(map); - if let Some(ws) = ws4 { - vec.push(ws); - }; vec.push(dot); Some(vec) } Directive::Output { span, doc_comment, - kw, - ws1, predicates, - ws2, dot, } => { let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - vec.push(ws1); if let Some(p) = predicates { vec.push(p); }; - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(dot); Some(vec) } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 061231e13..509d07e2b 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -10,11 +10,8 @@ use std::fmt::Debug; pub struct Map<'a> { pub span: Span<'a>, pub identifier: Option>, - pub ws1: Option>, pub open_brace: Token<'a>, - pub ws2: Option>, pub pairs: Option, Term<'a>>>>, - pub ws3: Option>, pub close_brace: Token<'a>, } impl AstNode for Map<'_> { @@ -23,19 +20,10 @@ impl AstNode for Map<'_> { if let Some(identifier) = &self.identifier { vec.push(identifier); }; - if let Some(ws) = &self.ws1 { - vec.push(ws); - } vec.push(&self.open_brace); - if let Some(ws) = &self.ws2 { - vec.push(ws); - } if let Some(pairs) = &self.pairs { vec.push(pairs); }; - if let Some(ws) = &self.ws3 { - vec.push(ws); - } vec.push(&self.close_brace); Some(vec) } @@ -77,22 +65,14 @@ impl std::fmt::Display for Map<'_> { pub struct Pair<'a, K, V> { pub span: Span<'a>, pub key: K, - pub ws1: Option>, pub equal: Token<'a>, - pub ws2: Option>, pub value: V, } impl AstNode for Pair<'_, K, V> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(&self.key); - if let Some(ws) = &self.ws1 { - vec.push(ws); - } vec.push(&self.equal); - if let Some(ws) = &self.ws2 { - vec.push(ws); - } vec.push(&self.value); Some(vec) } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 63014a6d3..6322b0a34 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -13,21 +13,16 @@ pub enum Statement<'a> { span: Span<'a>, doc_comment: Option>, atom: Atom<'a>, - ws: Option>, dot: Token<'a>, }, Rule { span: Span<'a>, doc_comment: Option>, head: List<'a, Atom<'a>>, - ws1: Option>, arrow: Token<'a>, - ws2: Option>, body: List<'a, Atom<'a>>, - ws3: Option>, dot: Token<'a>, }, - Whitespace(Token<'a>), Comment(Token<'a>), Error(Token<'a>), } @@ -38,7 +33,6 @@ impl AstNode for Statement<'_> { Statement::Fact { doc_comment, atom, - ws, dot, .. } => { @@ -47,20 +41,14 @@ impl AstNode for Statement<'_> { vec.push(dc); }; vec.push(atom); - if let Some(ws) = ws { - vec.push(ws); - } vec.push(dot); Some(vec) } Statement::Rule { doc_comment, head, - ws1, arrow, - ws2, body, - ws3, dot, .. } => { @@ -69,21 +57,11 @@ impl AstNode for Statement<'_> { vec.push(dc); }; vec.push(head); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(arrow); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(body); - if let Some(ws) = ws3 { - vec.push(ws); - }; vec.push(dot); Some(vec) } - Statement::Whitespace(ws) => Some(vec![ws]), Statement::Comment(c) => Some(vec![c]), Statement::Error(t) => Some(vec![t]), } @@ -94,7 +72,6 @@ impl AstNode for Statement<'_> { Statement::Directive(directive) => directive.span(), Statement::Fact { span, .. } => *span, Statement::Rule { span, .. } => *span, - Statement::Whitespace(ws) => ws.span(), Statement::Comment(c) => c.span(), Statement::Error(t) => t.span, } @@ -121,7 +98,6 @@ impl AstNode for Statement<'_> { Statement::Directive(_) => name!("Directive"), Statement::Fact { .. } => name!("Fact"), Statement::Rule { .. } => name!("Rule"), - Statement::Whitespace(_) => name!("Whitespace"), Statement::Comment(_) => name!("Comment"), Statement::Error(_) => name!("\x1b[1;31mERROR\x1b[0m"), } @@ -140,7 +116,6 @@ impl AstNode for Statement<'_> { Statement::Directive(_) => "Directive", Statement::Fact { .. } => "Fact", Statement::Rule { .. } => "Rule", - Statement::Whitespace(_ws) => return None, Statement::Comment(_) => return None, Statement::Error(_) => "Invalid", }; diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index beda1993d..db51d80e7 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -20,18 +20,14 @@ pub enum Term<'a> { Binary { span: Span<'a>, lhs: Box>, - ws1: Option>, operation: Token<'a>, - ws2: Option>, rhs: Box>, }, Aggregation { span: Span<'a>, operation: Token<'a>, open_paren: Token<'a>, - ws1: Option>, terms: Box>>, - ws2: Option>, close_paren: Token<'a>, }, Tuple(Box>), @@ -50,43 +46,27 @@ impl AstNode for Term<'_> { } => Some(vec![operation, &**term]), Term::Binary { lhs, - ws1, operation, - ws2, rhs, .. } => { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(&**lhs); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(operation); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(&**rhs); Some(vec) } Term::Aggregation { operation, open_paren, - ws1, terms, - ws2, close_paren, .. } => { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(operation); vec.push(open_paren); - if let Some(ws) = ws1 { - vec.push(ws); - } vec.push(&**terms); - if let Some(ws) = ws2 { - vec.push(ws); - } vec.push(close_paren); Some(vec) } diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index b6e5a0bca..9dd84df22 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -9,11 +9,8 @@ use ascii_tree::write_tree; pub struct Tuple<'a> { pub span: Span<'a>, pub identifier: Option>, - pub ws1: Option>, pub open_paren: Token<'a>, - pub ws2: Option>, pub terms: Option>>, - pub ws3: Option>, pub close_paren: Token<'a>, } @@ -23,19 +20,10 @@ impl AstNode for Tuple<'_> { if let Some(identifier) = &self.identifier { vec.push(identifier); } - if let Some(ws) = &self.ws1 { - vec.push(ws); - } vec.push(&self.open_paren); - if let Some(ws) = &self.ws2 { - vec.push(ws); - } if let Some(terms) = &self.terms { vec.push(terms); } - if let Some(ws) = &self.ws3 { - vec.push(ws); - } vec.push(&self.close_paren); Some(vec) } From 09febecabd648ca6fe74864b6cd57c64707b9aeb Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 1 Jul 2024 13:25:10 +0200 Subject: [PATCH 052/214] Remove Tokens --- nemo/src/io/lexer.rs | 561 ++++------------------------ nemo/src/io/parser.rs | 286 ++++---------- nemo/src/io/parser/ast.rs | 163 ++++---- nemo/src/io/parser/ast/atom.rs | 6 +- nemo/src/io/parser/ast/directive.rs | 36 +- nemo/src/io/parser/ast/map.rs | 8 +- nemo/src/io/parser/ast/program.rs | 2 +- nemo/src/io/parser/ast/statement.rs | 16 +- nemo/src/io/parser/ast/term.rs | 58 +-- nemo/src/io/parser/ast/tuple.rs | 6 +- nemo/src/io/parser/types.rs | 162 -------- 11 files changed, 263 insertions(+), 1041 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 6b8811654..00b91e7d9 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -161,6 +161,35 @@ impl ToRange for Span<'_> { start..end } } +impl AstNode for Span<'_> { + fn children(&self) -> Option> { + None + } + + fn span(&self) -> Span { + *self + } + + fn is_token(&self) -> bool { + true + } + + fn name(&self) -> String { + self.fragment().to_string() + } + + fn lsp_identifier(&self) -> Option<(String, String)> { + todo!() + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + todo!() + } + + fn lsp_range_to_rename(&self) -> Option { + todo!() + } +} pub(crate) fn to_range(span: Span<'_>) -> Range { let start = span.location_offset(); @@ -408,13 +437,13 @@ macro_rules! syntax { ($func_name: ident, $tag_str: literal, $token: expr) => { pub(crate) fn $func_name<'a, 's, E>( input: Input<'a, 's>, - ) -> IResult, Token<'a>, E> + ) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { map( context(Context::Tag($tag_str), tag($tag_str)), - |span: Input| Token::new($token, span.input), + |input: Input| input.input, )(input) } }; @@ -440,7 +469,7 @@ syntax!(at, "@", TokenKind::At); syntax!(exp_lower, "e", TokenKind::Exponent); syntax!(exp_upper, "E", TokenKind::Exponent); -pub(crate) fn exp<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn exp<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -449,7 +478,7 @@ where pub(crate) fn lex_punctuations<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> +) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -488,7 +517,7 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_operators<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -509,13 +538,7 @@ where )(input) } -// pub(crate) fn lex_unary_prefix_operators<'a, 's>( -// input: Input<'a, 's>, -// ) -> IResult, Token<'a>> { -// alt((plus, minus))(input) -// } - -pub(crate) fn lex_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -527,36 +550,27 @@ where )), )(input)?; let token = match *ident.input.fragment() { - "base" => Token::new(TokenKind::Base, ident.input), - "prefix" => Token::new(TokenKind::Prefix, ident.input), - "import" => Token::new(TokenKind::Import, ident.input), - "export" => Token::new(TokenKind::Export, ident.input), - "output" => Token::new(TokenKind::Output, ident.input), - _ => Token::new(TokenKind::Ident, ident.input), + "base" => ident.input, + "prefix" => ident.input, + "import" => ident.input, + "export" => ident.input, + "output" => ident.input, + _ => ident.input, }; Ok((rest_input, token)) } pub(crate) fn lex_prefixed_ident<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> +) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - recognize(tuple((opt(lex_ident), colon, lex_ident)))(input).map( - |(rest_input, prefixed_ident)| { - ( - rest_input, - Token { - kind: TokenKind::PrefixedIdent, - span: prefixed_ident.input, - }, - ) - }, - ) + recognize(tuple((opt(lex_ident), colon, lex_ident)))(input) + .map(|(rest_input, prefixed_ident)| (rest_input, prefixed_ident.input)) } -pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -564,18 +578,17 @@ where Context::Iri, recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) + .map(|(rest, result)| (rest, result.input)) } -pub(crate) fn lex_number<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_number<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - context(Context::Number, digit1)(input) - .map(|(rest_input, result)| (rest_input, Token::new(TokenKind::Number, result.input))) + context(Context::Number, digit1)(input).map(|(rest_input, result)| (rest_input, result.input)) } -pub(crate) fn lex_string<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_string<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -583,10 +596,10 @@ where Context::String, recognize(delimited(tag("\""), is_not("\""), cut(tag("\"")))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) + .map(|(rest, result)| (rest, result.input)) } -pub(crate) fn lex_comment<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_comment<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -594,12 +607,12 @@ where Context::Comment, recognize(tuple((tag("%"), many0(is_not("\n")), line_ending))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) + .map(|(rest, result)| (rest, result.input)) } pub(crate) fn lex_doc_comment<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> +) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -607,12 +620,12 @@ where Context::DocComment, recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending)))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) + .map(|(rest, result)| (rest, result.input)) } pub(crate) fn lex_toplevel_doc_comment<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> +) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -620,10 +633,10 @@ where Context::TlDocComment, recognize(many1(tuple((tag("%%%"), many0(is_not("\n")), line_ending)))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) + .map(|(rest, result)| (rest, result.input)) } -pub(crate) fn lex_comments<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_comments<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -633,48 +646,21 @@ where )(input) } -pub(crate) fn lex_whitespace<'a, 's, E>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context(Context::Whitespace, multispace1)(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) -} - -pub(crate) fn lex_illegal<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_whitespace<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - context(Context::Illegal, take(1usize))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) + context(Context::Whitespace, multispace1)(input).map(|(rest, result)| (rest, result.input)) } -pub(crate) fn lex_tokens<'a, 's, E>( - input: Input<'a, 's>, -) -> IResult, Vec>, E> +pub(crate) fn lex_illegal<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - all_consuming(many0(alt(( - lex_iri, - lex_operators, - lex_punctuations, - lex_ident, - lex_number, - lex_string, - lex_comments, - lex_whitespace, - lex_illegal, - ))))(input) - .map(|(span, mut vec)| { - vec.append(&mut vec![Token::new(TokenKind::Eof, span.input)]); - (span, vec) - }) + context(Context::Illegal, take(1usize))(input).map(|(rest, result)| (rest, result.input)) } -pub(crate) fn skip_to_statement_end<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Token<'a>) +pub(crate) fn skip_to_statement_end<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Span<'a>) where E: ParseError> + ContextError, Context>, { @@ -684,13 +670,7 @@ where multispace0, )))(input) .expect("Skipping to the next dot should not fail!"); - ( - rest_input, - Token { - kind: TokenKind::Error, - span: error_input.input, - }, - ) + (rest_input, error_input.input) } #[cfg(test)] @@ -709,420 +689,7 @@ mod tests { } #[test] - fn empty_input() { - let input = Span::new(""); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![T!(Eof, 0, 1, "")] - ) - } - - #[test] - fn base() { - let input = Span::new("@base"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] - ) - } - - #[test] - fn prefix() { - let input = Span::new("@prefix"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Prefix, 1, 1, "prefix"), - T!(Eof, 7, 1, ""), - ] - ) - } - - #[test] - fn output() { - let input = Span::new("@output"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Output, 1, 1, "output"), - T!(Eof, 7, 1, ""), - ] - ) - } - - #[test] - fn import() { - let input = Span::new("@import"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Import, 1, 1, "import"), - T!(Eof, 7, 1, ""), - ] - ) - } - - #[test] - fn export() { - let input = Span::new("@export"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Export, 1, 1, "export"), - T!(Eof, 7, 1, ""), - ] - ) - } - - #[test] - fn idents_with_keyword_prefix() { - let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Ident, 1, 1, "baseA"), - T!(Comma, 6, 1, ","), - T!(Whitespace, 7, 1, " "), - T!(At, 8, 1, "@"), - T!(Ident, 9, 1, "prefixB"), - T!(Comma, 16, 1, ","), - T!(Whitespace, 17, 1, " "), - T!(At, 18, 1, "@"), - T!(Ident, 19, 1, "importC"), - T!(Comma, 26, 1, ","), - T!(Whitespace, 27, 1, " "), - T!(At, 28, 1, "@"), - T!(Ident, 29, 1, "exportD"), - T!(Comma, 36, 1, ","), - T!(Whitespace, 37, 1, " "), - T!(At, 38, 1, "@"), - T!(Ident, 39, 1, "outputE"), - T!(Dot, 46, 1, "."), - T!(Eof, 47, 1, ""), - ] - ) - } - - #[test] - fn tokenize() { - let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Ident, 0, 1, "P"), - T!(OpenParen, 1, 1, "("), - T!(QuestionMark, 2, 1, "?"), - T!(Ident, 3, 1, "X"), - T!(CloseParen, 4, 1, ")"), - T!(Whitespace, 5, 1, " "), - T!(Arrow, 6, 1, ":-"), - T!(Whitespace, 8, 1, " "), - T!(Ident, 9, 1, "A"), - T!(OpenParen, 10, 1, "("), - T!(QuestionMark, 11, 1, "?"), - T!(Ident, 12, 1, "X"), - T!(CloseParen, 13, 1, ")"), - T!(Dot, 14, 1, "."), - T!(Whitespace, 15, 1, "\t\n "), - T!(Ident, 21, 2, "A"), - T!(OpenParen, 22, 2, "("), - T!(Ident, 23, 2, "Human"), - T!(CloseParen, 28, 2, ")"), - T!(Dot, 29, 2, "."), - T!(Eof, 30, 2, ""), - ] - ) - } - - #[test] - fn comment() { - let input = Span::new(" % Some Comment\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Whitespace, 0, 1, " "), - T!(Comment, 4, 1, "% Some Comment\n"), - T!(Eof, 19, 2, ""), - // T!(Comment, Span::new(0, 1, "% Some Comment\n")), - // T!(Eof, Span::new(15, 2, "")) - ] - ) - } - - #[test] - fn ident() { - let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Ident, 0, 1, "some_Ident"), - T!(OpenParen, 10, 1, "("), - T!(Ident, 11, 1, "Alice"), - T!(CloseParen, 16, 1, ")"), - T!(Dot, 17, 1, "."), - T!(Whitespace, 18, 1, " "), - T!(Comment, 19, 1, "%comment at the end of a line\n"), - T!(Eof, 49, 2, ""), - ] - ) - } - - #[test] - fn forbidden_ident() { - let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Underscore, 0, 1, "_"), - T!(Ident, 1, 1, "someIdent"), - T!(OpenParen, 10, 1, "("), - T!(Ident, 11, 1, "Alice"), - T!(CloseParen, 16, 1, ")"), - T!(Dot, 17, 1, "."), - T!(Whitespace, 18, 1, " "), - T!(Comment, 19, 1, "%comment at the end of a line\n"), - T!(Eof, 49, 2, ""), - ] - ) - } - - #[test] - fn iri() { - let input = Span::new(""); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Iri, 0, 1, ""), - T!(Eof, 31, 1, ""), - ] - ) - } - - #[test] - fn iri_pct_enc() { - let input = Span::new("\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Iri, 0, 1, ""), - T!(Whitespace, 37, 1, "\n"), - T!(Eof, 38, 2, ""), - ] - ) - } - - // FIXME: change the name of this test according to the correct name for `?X > 3` - // (Constraints are Rules with an empty Head) - #[ignore] - #[test] - fn constraints() { - let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Ident, 0, 1, "A"), - T!(OpenParen, 1, 1, "("), - T!(QuestionMark, 2, 1, "?"), - T!(Ident, 3, 1, "X"), - T!(CloseParen, 4, 1, ")"), - T!(Arrow, 5, 1, ":-"), - T!(Ident, 7, 1, "B"), - T!(OpenParen, 8, 1, "("), - T!(QuestionMark, 9, 1, "?"), - T!(Ident, 10, 1, "X"), - T!(CloseParen, 11, 1, ")"), - T!(Comma, 12, 1, ","), - T!(QuestionMark, 13, 1, "?"), - T!(Ident, 14, 1, "X"), - T!(Less, 15, 1, "<"), - T!(Number, 16, 1, "42"), - T!(Comma, 18, 1, ","), - T!(QuestionMark, 19, 1, "?"), - T!(Ident, 20, 1, "X"), - T!(Greater, 21, 1, ">"), - T!(Number, 22, 1, "3"), - T!(Dot, 23, 1, "."), - T!(Eof, 24, 1, ""), - ] - ) - } - - #[test] - fn pct_enc_comment() { - let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Comment, 0, 1, "%d4 this should be a comment,\n"), - T!( - Comment, - 30, - 2, - "% but the lexer can't distinguish a percent encoded value\n" - ), - T!(Comment, 88, 3, "% in an iri from a comment :(\n"), - T!(Eof, 118, 4, ""), - ] - ) - } - - #[test] - fn fact() { - let input = Span::new("somePred(term1, term2)."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Ident, 0, 1, "somePred"), - T!(OpenParen, 8, 1, "("), - T!(Ident, 9, 1, "term1"), - T!(Comma, 14, 1, ","), - T!(Whitespace, 15, 1, " "), - T!(Ident, 16, 1, "term2"), - T!(CloseParen, 21, 1, ")"), - T!(Dot, 22, 1, "."), - T!(Eof, 23, 1, ""), - ] - ) - } - - #[test] - fn whitespace() { - let input = Span::new(" \t \n\n\t \n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Whitespace, 0, 1, " \t \n\n\t \n"), - T!(Eof, 12, 4, ""), - ] - ) - } - - #[test] - fn skip_to_dot() { + fn skip_to_statement_end() { let input = Span::new("some ?broken :- rule). A(Fact)."); let refcell = RefCell::new(Vec::new()); let errors = ParserState { errors: &refcell }; diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 1f2e4e270..3e41b894a 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2519,8 +2519,8 @@ pub mod new { context: vec![context], }; // errors.report_error(err); - let (rest_input, token) = skip_to_statement_end::>>(input); - Ok((rest_input, Statement::Error(token))) + let (rest_input, span) = skip_to_statement_end::>>(input); + Ok((rest_input, Statement::Error(span))) } Err(err) => Err(err), } @@ -2879,7 +2879,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Base), + verify(lex_ident, |token| *token.fragment() == "base"), )), wsoc0, lex_iri, @@ -2916,7 +2916,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), + verify(lex_ident, |token| *token.fragment() == "prefix"), )), wsoc0, recognize(pair(opt(lex_ident), colon)), @@ -2933,10 +2933,7 @@ pub mod new { Directive::Prefix { span: outer_span(input.input, rest_input.input), doc_comment, - prefix: Token { - kind: TokenKind::Ident, - span: prefix.input, - }, + prefix: prefix.input, prefix_iri, dot, }, @@ -2959,7 +2956,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Import), + verify(lex_ident, |token| *token.fragment() == "import"), )), wsoc1, lex_ident, @@ -3005,7 +3002,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Export), + verify(lex_ident, |token| *token.fragment() == "export"), )), wsoc1, lex_ident, @@ -3051,7 +3048,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Output), + verify(lex_ident, |token| *token.fragment() == "output"), )), wsoc1, opt(parse_list(lex_ident)), @@ -3485,10 +3482,7 @@ pub mod new { Primitive::RdfLiteral { span: outer_span(input.input, rest_input.input), string, - carets: Token { - kind: TokenKind::Caret, - span: carets.input, - }, + carets: carets.input, iri, }, ) @@ -3763,10 +3757,7 @@ pub mod new { rest_input, Term::Aggregation { span: outer_span(input.input, rest_input.input), - operation: Token { - kind: TokenKind::Aggregate, - span: operation.input, - }, + operation: operation.input, open_paren, terms: Box::new(terms), close_paren, @@ -3821,15 +3812,7 @@ pub mod new { Context::UniversalVariable, recognize(pair(question_mark, lex_ident)), )(input) - .map(|(rest_input, var)| { - ( - rest_input, - Term::UniversalVariable(Token { - kind: TokenKind::Variable, - span: var.input, - }), - ) - }) + .map(|(rest_input, var)| (rest_input, Term::UniversalVariable(var.input))) } /// Parse an existential variable. @@ -3844,15 +3827,7 @@ pub mod new { Context::ExistentialVariable, recognize(pair(exclamation_mark, lex_ident)), )(input) - .map(|(rest_input, existential)| { - ( - rest_input, - Term::ExistentialVariable(Token { - kind: TokenKind::Existential, - span: existential.input, - }), - ) - }) + .map(|(rest_input, existential)| (rest_input, Term::ExistentialVariable(existential.input))) } // Order of parser compinator is important, because of ordered choice and no backtracking @@ -3863,7 +3838,7 @@ pub mod new { E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, - ) -> IResult, Token<'a>, E> { + ) -> IResult, Span<'a>, E> { context( Context::Operators, alt((less_equal, greater_equal, equal, unequal, less, greater)), @@ -3890,9 +3865,7 @@ pub mod new { macro_rules! T { ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { - Token::new($tok_kind, unsafe { - Span::new_from_raw_offset($offset, $line, $str, ()) - }) + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } }; } macro_rules! s { @@ -3940,40 +3913,19 @@ pub mod new { doc_comment: None, atom: Atom::Positive(Tuple { span: s!(0, 1, "a(B,C)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(0, 1, "a"), - }), - open_paren: Token { - kind: TokenKind::OpenParen, - span: s!(1, 1, "("), - }, + identifier: Some(s!(0, 1, "a"),), + open_paren: s!(1, 1, "("), terms: Some(List { span: s!(2, 1, "B,C"), - first: Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(2, 1, "B"), - })), + first: Term::Primitive(Primitive::Constant(s!(2, 1, "B"),)), rest: Some(vec![( - Token { - kind: TokenKind::Comma, - span: s!(3, 1, ",") - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(4, 1, "C"), - })), + s!(3, 1, ","), + Term::Primitive(Primitive::Constant(s!(4, 1, "C"),)), )]), }), - close_paren: Token { - kind: TokenKind::CloseParen, - span: s!(5, 1, ")"), - }, + close_paren: s!(5, 1, ")"), }), - dot: Token { - kind: TokenKind::Dot, - span: s!(6, 1, ".") - } + dot: s!(6, 1, ".") }], } ); @@ -4000,14 +3952,8 @@ pub mod new { Statement::Directive(Directive::Base { span: s!(0, 1, "@base ."), doc_comment: None, - base_iri: Token { - kind: TokenKind::Iri, - span: s!(6, 1, "") - }, - dot: Token { - kind: TokenKind::Dot, - span: s!(31, 1, ".") - }, + base_iri: s!(6, 1, ""), + dot: s!(31, 1, "."), }), Statement::Directive(Directive::Prefix { span: s!( @@ -4016,18 +3962,9 @@ pub mod new { "@prefix rdfs:." ), doc_comment: None, - prefix: Token { - kind: TokenKind::Ident, - span: s!(40, 1, "rdfs:"), - }, - prefix_iri: Token { - kind: TokenKind::Iri, - span: s!(45, 1, ""), - }, - dot: Token { - kind: TokenKind::Dot, - span: s!(84, 1, ".") - } + prefix: s!(40, 1, "rdfs:"), + prefix_iri: s!(45, 1, ""), + dot: s!(84, 1, ".") }), Statement::Directive(Directive::Import { span: s!( @@ -4036,121 +3973,59 @@ pub mod new { r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# ), doc_comment: None, - predicate: Token { - kind: TokenKind::Ident, - span: s!(93, 1, "sourceA"), - }, - arrow: Token { - kind: TokenKind::Arrow, - span: s!(100, 1, ":-"), - }, + predicate: s!(93, 1, "sourceA"), + arrow: s!(100, 1, ":-"), map: Map { span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(102, 1, "csv") - }), - open_brace: Token { - kind: TokenKind::OpenBrace, - span: s!(105, 1, "{") - }, + identifier: Some(s!(102, 1, "csv")), + open_brace: s!(105, 1, "{"), pairs: Some(List { span: s!(106, 1, "resource=\"sources/dataA.csv\""), first: Pair { span: s!(106, 1, "resource=\"sources/dataA.csv\""), - key: Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(106, 1, "resource"), - })), - equal: Token { - kind: TokenKind::Equal, - span: s!(114, 1, "="), - }, - value: Term::Primitive(Primitive::String(Token { - kind: TokenKind::String, - span: s!(115, 1, "\"sources/dataA.csv\""), - })), + key: Term::Primitive(Primitive::Constant(s!( + 106, 1, "resource" + ),)), + equal: s!(114, 1, "="), + value: Term::Primitive(Primitive::String(s!( + 115, + 1, + "\"sources/dataA.csv\"" + ),)), }, rest: None, }), - close_brace: Token { - kind: TokenKind::CloseBrace, - span: s!(134, 1, "}") - }, + close_brace: s!(134, 1, "}"), }, - dot: Token { - kind: TokenKind::Dot, - span: s!(135, 1, ".") - } + dot: s!(135, 1, ".") }), Statement::Directive(Directive::Export { span: s!(136, 1, "@export a:-csv{}."), doc_comment: None, - predicate: Token { - kind: TokenKind::Ident, - span: s!(144, 1, "a"), - }, - arrow: Token { - kind: TokenKind::Arrow, - span: s!(145, 1, ":-"), - }, + predicate: s!(144, 1, "a"), + arrow: s!(145, 1, ":-"), map: Map { span: s!(147, 1, "csv{}"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(147, 1, "csv"), - }), - open_brace: Token { - kind: TokenKind::OpenBrace, - span: s!(150, 1, "{"), - }, + identifier: Some(s!(147, 1, "csv"),), + open_brace: s!(150, 1, "{"), + pairs: None, - close_brace: Token { - kind: TokenKind::CloseBrace, - span: s!(151, 1, "}"), - }, - }, - dot: Token { - kind: TokenKind::Dot, - span: s!(152, 1, "."), + close_brace: s!(151, 1, "}"), }, + dot: s!(152, 1, "."), }), Statement::Directive(Directive::Output { span: s!(153, 1, "@output a, b, c."), doc_comment: None, predicates: Some(List { span: s!(161, 1, "a, b, c"), - first: Token { - kind: TokenKind::Ident, - span: s!(161, 1, "a"), - }, + first: s!(161, 1, "a"), rest: Some(vec![ - ( - Token { - kind: TokenKind::Comma, - span: s!(162, 1, ","), - }, - Token { - kind: TokenKind::Ident, - span: s!(164, 1, "b"), - }, - ), - ( - Token { - kind: TokenKind::Comma, - span: s!(165, 1, ","), - }, - Token { - kind: TokenKind::Ident, - span: s!(167, 1, "c"), - }, - ), + (s!(162, 1, ","), s!(164, 1, "b"),), + (s!(165, 1, ","), s!(167, 1, "c"),), ]), }), - dot: Token { - kind: TokenKind::Dot, - span: s!(168, 1, "."), - } + dot: s!(168, 1, "."), }), ], } @@ -4193,57 +4068,31 @@ pub mod new { doc_comment: None, atom: Atom::Positive(Tuple { span: s!(0, 1, "some(Fact, with, whitespace)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(0, 1, "some"), - }), - open_paren: Token { - kind: TokenKind::OpenParen, - span: s!(4, 1, "(") - }, + identifier: Some(s!(0, 1, "some"),), + open_paren: s!(4, 1, "("), terms: Some(List { span: s!(5, 1, "Fact, with, whitespace"), - first: Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(5, 1, "Fact"), - })), + first: Term::Primitive(Primitive::Constant(s!(5, 1, "Fact"),)), rest: Some(vec![ ( - Token { - kind: TokenKind::Comma, - span: s!(9, 1, ","), - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(11, 1, "with") - })), + s!(9, 1, ","), + Term::Primitive(Primitive::Constant(s!(11, 1, "with"))), ), ( - Token { - kind: TokenKind::Comma, - span: s!(15, 1, ","), - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(17, 1, "whitespace") - })), + s!(15, 1, ","), + Term::Primitive(Primitive::Constant(s!( + 17, + 1, + "whitespace" + ))), ), ]), }), - close_paren: Token { - kind: TokenKind::CloseParen, - span: s!(27, 1, ")") - }, + close_paren: s!(27, 1, ")"), }), - dot: Token { - kind: TokenKind::Dot, - span: s!(29, 1, "."), - }, + dot: s!(29, 1, "."), }, - Statement::Comment(Token { - kind: TokenKind::Comment, - span: s!(31, 1, "% and a super useful comment\n") - }) + Statement::Comment(s!(31, 1, "% and a super useful comment\n")) ], } ); @@ -4305,7 +4154,8 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }; // let result = parse_program::>(input); let (ast, errors) = parse_program::>>(input); - println!("{}\n\n{:#?}", ast, errors); + // println!("{}\n\n{:#?}", ast, errors); + println!("{}\n\n", ast); let mut error_map: BTreeMap> = BTreeMap::new(); for error in errors { if let Some(set) = error_map.get_mut(&error.pos) { diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 84c7bcbea..5bfbffeb2 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -41,6 +41,7 @@ pub trait AstNode: std::fmt::Debug + Display + Sync { } } + // FIXME: With the removal of tokens is this method still usefull and/or should be renamed? fn is_token(&self) -> bool; fn name(&self) -> String; @@ -99,7 +100,7 @@ pub struct Range { #[derive(Debug, Clone, PartialEq)] pub struct Wsoc<'a> { pub span: Span<'a>, - pub token: Vec>, + pub token: Vec>, } impl AstNode for Wsoc<'_> { fn children(&self) -> Option> { @@ -152,7 +153,7 @@ pub struct List<'a, T> { pub span: Span<'a>, pub first: T, // (,T)* - pub rest: Option, T)>>, + pub rest: Option, T)>>, } impl List<'_, T> { pub fn to_vec(&self) -> Vec { @@ -250,7 +251,7 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { if let Some(children) = node.children() { for child in children { if child.is_token() { - vec.push(Tree::Leaf(vec![format!("{}", child)])); + vec.push(Tree::Leaf(vec![format!("\x1b[93m{:?}\x1b[0m", child.name())])); } else { vec.push(ast_to_ascii_tree(child)); } @@ -295,142 +296,108 @@ mod test { let span = Span::new(input); let ast = Program { span, - tl_doc_comment: Some(Token { - kind: TokenKind::TlDocComment, - span: s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") - }), + tl_doc_comment: Some( + s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") + ), statements: vec![ Statement::Directive(Directive::Prefix { span:s!(125,4,"@prefix xsd: ."), - doc_comment:Some(Token { - kind:TokenKind::DocComment, - span:s!(84,3,"%% This is the prefix used for datatypes\n") - }), - prefix: Token { - kind: TokenKind::PrefixIdent, - span: s!(133, 4, "xsd:"), - }, - prefix_iri: Token { - kind: TokenKind::Iri, - span: s!(138, 4, ""), - }, - dot: Token{ - kind:TokenKind::Dot, - span:s!(173,4,".") - } - }), - Statement::Comment(Token { - kind: TokenKind::Comment, - span: s!(176, 6, "% Facts\n"), + doc_comment:Some( + s!(84,3,"%% This is the prefix used for datatypes\n") + ), + prefix: + s!(133, 4, "xsd:"), + prefix_iri: + s!(138, 4, ""), + dot: + s!(173,4,".") }), + Statement::Comment( + s!(176, 6, "% Facts\n"), + ), Statement::Fact { span:s!(222,8,"somePredicate(ConstA, ConstB)."), - doc_comment: Some(Token { - kind: TokenKind::DocComment, - span:s!(184,7,"%% This is just an example predicate.\n") - }), + doc_comment: Some( + s!(184,7,"%% This is just an example predicate.\n") + ), atom: Atom::Positive(Tuple { span: s!(222,8,"somePredicate(ConstA, ConstB)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(222, 8, "somePredicate"), - }), - open_paren:Token{ - kind:TokenKind::OpenParen, - span:s!(235,8,"(") - } , + identifier: Some( + s!(222, 8, "somePredicate"), + ), + open_paren: + s!(235,8,"(") + , terms: Some(List { span: s!(236, 8, "ConstA, ConstB"), - first: Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(236, 8, "ConstA"), - })), + first: Term::Primitive(Primitive::Constant( s!(236, 8, "ConstA"), + )), rest: Some(vec![( - Token { - kind: TokenKind::Comma, - span: s!(242, 8, ","), - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(244, 8, "ConstB"), - })), + s!(242, 8, ","), + Term::Primitive(Primitive::Constant( s!(244, 8, "ConstB"), + )), )]), }), - close_paren:Token { - kind: TokenKind::CloseParen, - span:s!(250,8,")") - } + close_paren: + s!(250,8,")") }), - dot: Token { - kind: TokenKind::Dot, - span: s!(251,8,".") - } + dot: + s!(251,8,".") + }, - Statement::Comment(Token { - kind: TokenKind::Comment, - span: s!(254, 10, "% Rules\n"), - }), + Statement::Comment( + s!(254, 10, "% Rules\n"), + ), Statement::Rule { span: s!(295,12,"someHead(?VarA) :- somePredicate(?VarA, ConstB)."), - doc_comment: Some(Token { kind: TokenKind::DocComment, span: s!(262,11,"%% This is just an example rule.\n") }), + doc_comment: Some(s!(262,11,"%% This is just an example rule.\n")), head: List { span: s!(295, 12, "someHead(?VarA)"), first: Atom::Positive(Tuple { span: s!(295,12,"someHead(?VarA)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(295, 12, "someHead"), - }), - open_paren: Token { kind: TokenKind::OpenParen, span: s!(303,12,"(") }, + identifier: Some( + s!(295, 12, "someHead"), + ), + open_paren: s!(303,12,"(") , terms: Some(List { span: s!(304, 12, "?VarA"), - first: Term::UniversalVariable(Token { - kind: TokenKind::Variable, - span: s!(304, 12, "?VarA"), - }), + first: Term::UniversalVariable( s!(304, 12, "?VarA"), + ), rest: None, }), - close_paren: Token { kind: TokenKind::CloseParen, span: s!(309,12,")") }, + close_paren: s!(309,12,")") , }), rest: None, }, - arrow: Token{kind:TokenKind::Arrow, span:s!(311,12,":-")}, + arrow: s!(311,12,":-"), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), first: Atom::Positive(Tuple { span: s!(314, 12,"somePredicate(?VarA, ConstB)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(314, 12, "somePredicate"), - }), - open_paren: Token { kind: TokenKind::OpenParen, span: s!(327,12,"(") }, + identifier: Some( + s!(314, 12, "somePredicate"), + ), + open_paren: s!(327,12,"("), terms: Some(List { span: s!(328, 12, "?Var, ConstB"), - first: Term::UniversalVariable(Token { - kind: TokenKind::Variable, - span: s!(328, 12, "?VarA"), - }), + first: Term::UniversalVariable( s!(328, 12, "?VarA"), + ), rest: Some(vec![( - Token { - kind: TokenKind::Comma, - span: s!(333, 12, ","), - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(335, 12, "ConstB"), - })), + s!(333, 12, ","), + + Term::Primitive(Primitive::Constant(s!(335, 12, "ConstB"), + )), )]), }), - close_paren: Token { kind: TokenKind::CloseParen, span: s!(341, 12,")") }, + close_paren: s!(341, 12,")") , }), rest: None, }, - dot: Token{kind:TokenKind::Dot,span:s!(342, 12,".")}, + dot: s!(342, 12,"."), }, - Statement::Comment(Token { - kind: TokenKind::Comment, - span: s!(346, 12, "% all constants that are in relation with ConstB\n"), - }), + Statement::Comment( + s!(346, 12, "% all constants that are in relation with ConstB\n"), + ), ], }; println!("{}", ast); diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 6ad2d77ed..47ccc2e08 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -12,13 +12,13 @@ pub enum Atom<'a> { Positive(Tuple<'a>), Negative { span: Span<'a>, - neg: Token<'a>, + neg: Span<'a>, atom: Tuple<'a>, }, InfixAtom { span: Span<'a>, lhs: Term<'a>, - operation: Token<'a>, + operation: Span<'a>, rhs: Term<'a>, }, Map(Map<'a>), @@ -106,7 +106,7 @@ impl AstNode for Atom<'_> { fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { match self.tuple() { Some(tuple) => Some(( - format!("Atom: {}", tuple.identifier.unwrap().span.fragment()), + format!("Atom: {}", tuple.identifier.unwrap().fragment()), SymbolKind::FUNCTION, )), None => Some((String::from("Atom"), SymbolKind::FUNCTION)), diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 11424d7e4..4ccf406dd 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -10,42 +10,42 @@ pub enum Directive<'a> { // "@base ." Base { span: Span<'a>, - doc_comment: Option>, - base_iri: Token<'a>, - dot: Token<'a>, + doc_comment: Option>, + base_iri: Span<'a>, + dot: Span<'a>, }, // "@prefix wikidata: ." Prefix { span: Span<'a>, - doc_comment: Option>, - prefix: Token<'a>, - prefix_iri: Token<'a>, - dot: Token<'a>, + doc_comment: Option>, + prefix: Span<'a>, + prefix_iri: Span<'a>, + dot: Span<'a>, }, // "@import table :- csv{resource="path/to/file.csv"} ." Import { span: Span<'a>, - doc_comment: Option>, - predicate: Token<'a>, - arrow: Token<'a>, + doc_comment: Option>, + predicate: Span<'a>, + arrow: Span<'a>, map: Map<'a>, - dot: Token<'a>, + dot: Span<'a>, }, // "@export result :- turtle{resource="out.ttl"} ." Export { span: Span<'a>, - doc_comment: Option>, - predicate: Token<'a>, - arrow: Token<'a>, + doc_comment: Option>, + predicate: Span<'a>, + arrow: Span<'a>, map: Map<'a>, - dot: Token<'a>, + dot: Span<'a>, }, // "@output A, B, C." Output { span: Span<'a>, - doc_comment: Option>, - predicates: Option>>, - dot: Token<'a>, + doc_comment: Option>, + predicates: Option>>, + dot: Span<'a>, }, } impl AstNode for Directive<'_> { diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 509d07e2b..143640d54 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -9,10 +9,10 @@ use std::fmt::Debug; #[derive(Debug, Clone, PartialEq)] pub struct Map<'a> { pub span: Span<'a>, - pub identifier: Option>, - pub open_brace: Token<'a>, + pub identifier: Option>, + pub open_brace: Span<'a>, pub pairs: Option, Term<'a>>>>, - pub close_brace: Token<'a>, + pub close_brace: Span<'a>, } impl AstNode for Map<'_> { fn children(&self) -> Option> { @@ -65,7 +65,7 @@ impl std::fmt::Display for Map<'_> { pub struct Pair<'a, K, V> { pub span: Span<'a>, pub key: K, - pub equal: Token<'a>, + pub equal: Span<'a>, pub value: V, } impl AstNode for Pair<'_, K, V> { diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index f2f111461..7b080bcab 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -7,7 +7,7 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub struct Program<'a> { pub span: Span<'a>, - pub tl_doc_comment: Option>, + pub tl_doc_comment: Option>, pub statements: Vec>, } impl AstNode for Program<'_> { diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 6322b0a34..c9420ec06 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -11,20 +11,20 @@ pub enum Statement<'a> { Directive(Directive<'a>), Fact { span: Span<'a>, - doc_comment: Option>, + doc_comment: Option>, atom: Atom<'a>, - dot: Token<'a>, + dot: Span<'a>, }, Rule { span: Span<'a>, - doc_comment: Option>, + doc_comment: Option>, head: List<'a, Atom<'a>>, - arrow: Token<'a>, + arrow: Span<'a>, body: List<'a, Atom<'a>>, - dot: Token<'a>, + dot: Span<'a>, }, - Comment(Token<'a>), - Error(Token<'a>), + Comment(Span<'a>), + Error(Span<'a>), } impl AstNode for Statement<'_> { fn children(&self) -> Option> { @@ -73,7 +73,7 @@ impl AstNode for Statement<'_> { Statement::Fact { span, .. } => *span, Statement::Rule { span, .. } => *span, Statement::Comment(c) => c.span(), - Statement::Error(t) => t.span, + Statement::Error(t) => *t, } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index db51d80e7..32fde33ba 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -9,30 +9,30 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub enum Term<'a> { Primitive(Primitive<'a>), - UniversalVariable(Token<'a>), - ExistentialVariable(Token<'a>), + UniversalVariable(Span<'a>), + ExistentialVariable(Span<'a>), // TODO: Is whitespace needed? Figure out how unary terms look UnaryPrefix { span: Span<'a>, - operation: Token<'a>, + operation: Span<'a>, term: Box>, }, Binary { span: Span<'a>, lhs: Box>, - operation: Token<'a>, + operation: Span<'a>, rhs: Box>, }, Aggregation { span: Span<'a>, - operation: Token<'a>, - open_paren: Token<'a>, + operation: Span<'a>, + open_paren: Span<'a>, terms: Box>>, - close_paren: Token<'a>, + close_paren: Span<'a>, }, Tuple(Box>), Map(Box>), - Blank(Token<'a>), + Blank(Span<'a>), } impl AstNode for Term<'_> { @@ -173,13 +173,13 @@ impl AstNode for Term<'_> { } Term::Binary { .. } => Some((String::from("Binary term"), SymbolKind::OPERATOR)), Term::Aggregation { operation, .. } => Some(( - format!("Aggregation: {}", operation.span.fragment()), + format!("Aggregation: {}", operation.fragment()), SymbolKind::OPERATOR, )), Term::Tuple(tuple) => { if let Some(identifier) = tuple.identifier { Some(( - format!("Function: {}", identifier.span.fragment()), + format!("Function: {}", identifier.fragment()), SymbolKind::OPERATOR, )) } else { @@ -200,28 +200,28 @@ impl std::fmt::Display for Term<'_> { #[derive(Debug, Clone, PartialEq)] pub(crate) enum Primitive<'a> { - Constant(Token<'a>), + Constant(Span<'a>), PrefixedConstant { span: Span<'a>, - prefix: Option>, - colon: Token<'a>, - constant: Token<'a>, + prefix: Option>, + colon: Span<'a>, + constant: Span<'a>, }, Number { span: Span<'a>, - sign: Option>, - before: Option>, - dot: Option>, - after: Token<'a>, + sign: Option>, + before: Option>, + dot: Option>, + after: Span<'a>, exponent: Option>, }, - String(Token<'a>), - Iri(Token<'a>), + String(Span<'a>), + Iri(Span<'a>), RdfLiteral { span: Span<'a>, - string: Token<'a>, - carets: Token<'a>, - iri: Token<'a>, + string: Span<'a>, + carets: Span<'a>, + iri: Span<'a>, }, } @@ -282,11 +282,11 @@ impl AstNode for Primitive<'_> { fn span(&self) -> Span { match self { - Primitive::Constant(token) => token.span, + Primitive::Constant(span) => *span, Primitive::PrefixedConstant { span, .. } => *span, Primitive::Number { span, .. } => *span, - Primitive::String(token) => token.span, - Primitive::Iri(token) => token.span, + Primitive::String(span) => *span, + Primitive::Iri(span) => *span, Primitive::RdfLiteral { span, .. } => *span, } } @@ -339,9 +339,9 @@ impl std::fmt::Display for Primitive<'_> { #[derive(Debug, Clone, PartialEq)] pub(crate) struct Exponent<'a> { - pub(crate) e: Token<'a>, - pub(crate) sign: Option>, - pub(crate) number: Token<'a>, + pub(crate) e: Span<'a>, + pub(crate) sign: Option>, + pub(crate) number: Span<'a>, } impl AstNode for Exponent<'_> { diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 9dd84df22..d3f5e7625 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -8,10 +8,10 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub struct Tuple<'a> { pub span: Span<'a>, - pub identifier: Option>, - pub open_paren: Token<'a>, + pub identifier: Option>, + pub open_paren: Span<'a>, pub terms: Option>>, - pub close_paren: Token<'a>, + pub close_paren: Span<'a>, } impl AstNode for Tuple<'_> { diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 2b0832655..4ba1b045c 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -428,168 +428,6 @@ impl FromExternalError, DataValueCreationError> for LocatedParseError { } } -use crate::io::lexer::Token; - -#[derive(Debug, Copy, Clone, PartialEq)] -pub(crate) struct Tokens<'a> { - pub(crate) tok: &'a [Token<'a>], -} -impl<'a> Tokens<'a> { - fn new(vec: &'a [Token]) -> Tokens<'a> { - Tokens { tok: vec } - } -} -impl<'a> AsBytes for Tokens<'a> { - fn as_bytes(&self) -> &[u8] { - todo!() - } -} -impl<'a, T> nom::Compare for Tokens<'a> { - fn compare(&self, t: T) -> nom::CompareResult { - todo!() - } - - fn compare_no_case(&self, t: T) -> nom::CompareResult { - todo!() - } -} -// impl<'a> nom::ExtendInto for Tokens<'a> { -// type Item; - -// type Extender; - -// fn new_builder(&self) -> Self::Extender { -// todo!() -// } - -// fn extend_into(&self, acc: &mut Self::Extender) { -// todo!() -// } -// } -impl<'a, T> nom::FindSubstring for Tokens<'a> { - fn find_substring(&self, substr: T) -> Option { - todo!() - } -} -impl<'a, T> nom::FindToken for Tokens<'a> { - fn find_token(&self, token: T) -> bool { - todo!() - } -} -impl<'a> InputIter for Tokens<'a> { - type Item = &'a Token<'a>; - - type Iter = std::iter::Enumerate>>; - - type IterElem = std::slice::Iter<'a, Token<'a>>; - - fn iter_indices(&self) -> Self::Iter { - self.tok.iter().enumerate() - } - - fn iter_elements(&self) -> Self::IterElem { - self.tok.iter() - } - - fn position

(&self, predicate: P) -> Option - where - P: Fn(Self::Item) -> bool, - { - self.tok.iter().position(predicate) - } - - fn slice_index(&self, count: usize) -> Result { - if self.tok.len() >= count { - Ok(count) - } else { - Err(nom::Needed::Unknown) - } - } -} -impl<'a> InputLength for Tokens<'a> { - fn input_len(&self) -> usize { - self.tok.len() - } -} -impl<'a> InputTake for Tokens<'a> { - fn take(&self, count: usize) -> Self { - Tokens { - tok: &self.tok[0..count], - } - } - - fn take_split(&self, count: usize) -> (Self, Self) { - ( - Tokens { - tok: &self.tok[count..self.tok.len()], - }, - Tokens { - tok: &self.tok[0..count], - }, - ) - } -} -impl<'a> InputTakeAtPosition for Tokens<'a> { - type Item = &'a Token<'a>; - - fn split_at_position>( - &self, - predicate: P, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } - - fn split_at_position1>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } - - fn split_at_position_complete>( - &self, - predicate: P, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } - - fn split_at_position1_complete>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } -} -impl<'a> nom::Offset for Tokens<'a> { - fn offset(&self, second: &Self) -> usize { - todo!() - } -} -impl<'a, R> nom::ParseTo for Tokens<'a> { - fn parse_to(&self) -> Option { - todo!() - } -} -impl<'a, R> nom::Slice for Tokens<'a> { - fn slice(&self, range: R) -> Self { - todo!() - } -} - #[derive(Debug, Clone, Copy)] pub(crate) struct Input<'a, 's> { pub(crate) input: crate::io::lexer::Span<'a>, From 71a124f26d3212d1eaff5126bbbb70268f974d41 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 1 Jul 2024 07:18:05 +0200 Subject: [PATCH 053/214] Adjust logical model (WIP) --- nemo/src/lib.rs | 1 + nemo/src/rule_model.rs | 6 ++ nemo/src/rule_model/component.rs | 24 ++++++ nemo/src/rule_model/component/variable.rs | 96 +++++++++++++++++++++++ nemo/src/rule_model/error.rs | 9 +++ nemo/src/rule_model/origin.rs | 10 +++ 6 files changed, 146 insertions(+) create mode 100644 nemo/src/rule_model.rs create mode 100644 nemo/src/rule_model/component.rs create mode 100644 nemo/src/rule_model/component/variable.rs create mode 100644 nemo/src/rule_model/error.rs create mode 100644 nemo/src/rule_model/origin.rs diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 246ab7f6c..89f3d962d 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -29,6 +29,7 @@ pub mod io; pub mod execution; pub mod model; +pub mod rule_model; pub mod util; mod program_analysis; diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs new file mode 100644 index 000000000..b30b5c65d --- /dev/null +++ b/nemo/src/rule_model.rs @@ -0,0 +1,6 @@ +//! This module defines the representation of nemo programs + +pub(crate) mod origin; + +pub mod component; +pub mod error; diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/component.rs new file mode 100644 index 000000000..803185dfb --- /dev/null +++ b/nemo/src/rule_model/component.rs @@ -0,0 +1,24 @@ +//! This module defines a logical component and ... + +use std::fmt::{Debug, Display}; + +use crate::io::parser::ast::AstNode; + +use super::{ + error::ProgramConstructionError, + origin::{ComponentOrigin, OriginParseReference}, +}; + +pub trait ProgramComponent: Debug + Display { + type Node<'a>: AstNode; + + fn from_ast_node<'a>(node: Self::Node<'a>, origin: OriginParseReference) -> Self; + + fn parse(string: &str) -> Result + where + Self: Sized; + + fn origin(&self) -> &ComponentOrigin; +} + +pub mod variable; diff --git a/nemo/src/rule_model/component/variable.rs b/nemo/src/rule_model/component/variable.rs new file mode 100644 index 000000000..02a5f202e --- /dev/null +++ b/nemo/src/rule_model/component/variable.rs @@ -0,0 +1,96 @@ +use std::fmt::Display; + +use crate::{ + io::parser::ast::term::Term, + rule_model::{ + error::ProgramConstructionError, + origin::{ComponentOrigin, OriginParseReference}, + }, +}; + +use super::ProgramComponent; + +/// Name of a variable +#[derive(Debug, Clone)] +pub struct VariableName(String); + +impl VariableName { + fn new(name: String) -> Result { + // TODO: Validate name + if name.is_empty() { + return Err(ProgramConstructionError::InvalidVariableName(name)); + } + + Ok(Self::new_unvalidated(name)) + } + + fn new_unvalidated(name: String) -> Self { + Self(name) + } +} + +impl Display for VariableName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Debug)] +pub struct UniversalVariale { + origin: ComponentOrigin, + + name: Option, +} + +impl UniversalVariale { + fn from_term(term: Term) -> Self { + todo!() + } +} + +impl Display for UniversalVariale { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.name { + Some(name) => write!(f, "?{}", name), + None => write!(f, "_"), + } + } +} + +impl ProgramComponent for UniversalVariale { + type Node<'a> = Term<'a>; + + fn from_ast_node<'a>(node: Term<'a>, origin: OriginParseReference) -> Self { + if let Term::UniversalVariable(token) = node { + let string = token.span.to_string(); + } + + todo!() + } + + fn parse(string: &str) -> Result { + todo!() + } + + fn origin(&self) -> &ComponentOrigin { + &self.origin + } +} + +#[derive(Debug)] +pub struct ExistentialVariable { + origin: ComponentOrigin, + + name: VariableName, +} + +#[derive(Debug)] +pub enum Variable { + Universal(UniversalVariale), + Existential(ExistentialVariable), +} + +mod test { + #[test] + fn create_variable() {} +} diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs new file mode 100644 index 000000000..8f9331064 --- /dev/null +++ b/nemo/src/rule_model/error.rs @@ -0,0 +1,9 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ProgramConstructionError { + #[error("invalid variable name: {0}")] + InvalidVariableName(String), + #[error("parse error")] // TODO: Return parser error here + ParseError, +} diff --git a/nemo/src/rule_model/origin.rs b/nemo/src/rule_model/origin.rs new file mode 100644 index 000000000..e2660002b --- /dev/null +++ b/nemo/src/rule_model/origin.rs @@ -0,0 +1,10 @@ +//! This module defines + +pub(crate) type OriginParseReference = usize; + +#[derive(Debug)] +pub enum ComponentOrigin { + Created, + Parsed(OriginParseReference), + Something(Box), +} From 813fb595df8893b123dbcfd972509a7ebec4c59d Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 2 Jul 2024 22:24:54 +0200 Subject: [PATCH 054/214] Rebuild logical model --- nemo/src/io/parser/ast.rs | 2 +- nemo/src/io/parser/ast/term.rs | 2 +- nemo/src/rule_model.rs | 1 + nemo/src/rule_model/component.rs | 37 ++- nemo/src/rule_model/component/atom.rs | 98 +++++++ nemo/src/rule_model/component/fact.rs | 74 ++++++ .../src/rule_model/component/import_export.rs | 243 ++++++++++++++++++ nemo/src/rule_model/component/literal.rs | 59 +++++ nemo/src/rule_model/component/rule.rs | 224 ++++++++++++++++ nemo/src/rule_model/component/term.rs | 133 ++++++++++ .../rule_model/component/term/aggregate.rs | 96 +++++++ .../src/rule_model/component/term/function.rs | 107 ++++++++ nemo/src/rule_model/component/term/map.rs | 69 +++++ .../rule_model/component/term/operation.rs | 195 ++++++++++++++ .../rule_model/component/term/primitive.rs | 97 +++++++ .../component/term/primitive/ground.rs | 134 ++++++++++ .../component/term/primitive/variable.rs | 140 ++++++++++ .../term/primitive/variable/existential.rs | 103 ++++++++ .../term/primitive/variable/universal.rs | 123 +++++++++ nemo/src/rule_model/component/term/tuple.rs | 63 +++++ nemo/src/rule_model/component/variable.rs | 96 ------- nemo/src/rule_model/error.rs | 17 +- nemo/src/rule_model/origin.rs | 18 +- nemo/src/rule_model/program.rs | 26 ++ 24 files changed, 2039 insertions(+), 118 deletions(-) create mode 100644 nemo/src/rule_model/component/atom.rs create mode 100644 nemo/src/rule_model/component/fact.rs create mode 100644 nemo/src/rule_model/component/import_export.rs create mode 100644 nemo/src/rule_model/component/literal.rs create mode 100644 nemo/src/rule_model/component/rule.rs create mode 100644 nemo/src/rule_model/component/term.rs create mode 100644 nemo/src/rule_model/component/term/aggregate.rs create mode 100644 nemo/src/rule_model/component/term/function.rs create mode 100644 nemo/src/rule_model/component/term/map.rs create mode 100644 nemo/src/rule_model/component/term/operation.rs create mode 100644 nemo/src/rule_model/component/term/primitive.rs create mode 100644 nemo/src/rule_model/component/term/primitive/ground.rs create mode 100644 nemo/src/rule_model/component/term/primitive/variable.rs create mode 100644 nemo/src/rule_model/component/term/primitive/variable/existential.rs create mode 100644 nemo/src/rule_model/component/term/primitive/variable/universal.rs create mode 100644 nemo/src/rule_model/component/term/tuple.rs delete mode 100644 nemo/src/rule_model/component/variable.rs create mode 100644 nemo/src/rule_model/program.rs diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 5bfbffeb2..931813397 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -143,7 +143,7 @@ impl AstNode for Wsoc<'_> { } impl Display for Wsoc<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 32fde33ba..07feb03f7 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -381,7 +381,7 @@ impl AstNode for Exponent<'_> { } impl std::fmt::Display for Exponent<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() } } diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs index b30b5c65d..f3422305c 100644 --- a/nemo/src/rule_model.rs +++ b/nemo/src/rule_model.rs @@ -4,3 +4,4 @@ pub(crate) mod origin; pub mod component; pub mod error; +pub mod program; diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/component.rs index 803185dfb..5a925fb77 100644 --- a/nemo/src/rule_model/component.rs +++ b/nemo/src/rule_model/component.rs @@ -1,24 +1,33 @@ -//! This module defines a logical component and ... +//! This module defines the logical components that make up a program. -use std::fmt::{Debug, Display}; +pub mod atom; +pub mod fact; +pub mod import_export; +pub mod literal; +pub mod rule; +pub mod term; -use crate::io::parser::ast::AstNode; +use std::fmt::{Debug, Display}; -use super::{ - error::ProgramConstructionError, - origin::{ComponentOrigin, OriginParseReference}, -}; +use super::{error::ProgramConstructionError, origin::Origin}; -pub trait ProgramComponent: Debug + Display { - type Node<'a>: AstNode; +/// Trait implemented by objects that are part of the logical rule model of the nemo language. +pub trait ProgramComponent: Debug + Display + Clone + PartialEq + Eq { + /// Construct this object from a string. + fn parse(_string: &str) -> Result + where + Self: Sized; - fn from_ast_node<'a>(node: Self::Node<'a>, origin: OriginParseReference) -> Self; + /// Return the [Origin] of this component. + fn origin(&self) -> &Origin; - fn parse(string: &str) -> Result + /// Set the [Origin] of this component. + fn set_origin(self, origin: Origin) -> Self where Self: Sized; - fn origin(&self) -> &ComponentOrigin; + /// Validate this component + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized; } - -pub mod variable; diff --git a/nemo/src/rule_model/component/atom.rs b/nemo/src/rule_model/component/atom.rs new file mode 100644 index 000000000..7943ed9c7 --- /dev/null +++ b/nemo/src/rule_model/component/atom.rs @@ -0,0 +1,98 @@ +//! This module defines an [Atom]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; + +use super::{ + term::{Identifier, Term}, + ProgramComponent, +}; + +/// An atom +#[derive(Debug, Clone, Eq)] +pub struct Atom { + /// Origin of this component. + origin: Origin, + + /// Predicate name associated with this atom + name: Identifier, + /// Subterms of the function + terms: Vec, +} + +impl Atom { + /// Create a new [Atom]. + pub fn new(name: &str, subterms: Vec) -> Self { + Self { + origin: Origin::Created, + name: Identifier::new(name.to_string()), + terms: subterms, + } + } + + /// Return an iterator over the subterms of this atom. + pub fn subterms(&self) -> impl Iterator { + self.terms.iter() + } + + /// Return an mutable iterator over the subterms of this atom. + pub fn subterms_mut(&mut self) -> impl Iterator { + self.terms.iter_mut() + } +} + +impl Display for Atom { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Atom { + fn eq(&self, other: &Self) -> bool { + self.origin == other.origin && self.name == other.name && self.terms == other.terms + } +} + +impl Hash for Atom { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.terms.hash(state); + } +} + +impl ProgramComponent for Atom { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + if !self.name.is_valid() { + todo!() + } + + for term in self.subterms() { + term.validate()?; + } + + Ok(()) + } +} diff --git a/nemo/src/rule_model/component/fact.rs b/nemo/src/rule_model/component/fact.rs new file mode 100644 index 000000000..69afd6fdb --- /dev/null +++ b/nemo/src/rule_model/component/fact.rs @@ -0,0 +1,74 @@ +//! This module defines [Fact]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::origin::Origin; + +use super::{term::Term, ProgramComponent}; + +/// A (ground) fact +#[derive(Debug, Clone, Eq)] +pub struct Fact { + /// Origin of this component + origin: Origin, + + terms: Vec, +} + +impl Fact { + /// Return an iterator over the subterms of this fact. + pub fn subterms(&self) -> impl Iterator { + self.terms.iter() + } + + /// Return an mutable iterator over the subterms of this fact. + pub fn subterms_mut(&mut self) -> impl Iterator { + self.terms.iter_mut() + } +} + +impl Display for Fact { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Fact { + fn eq(&self, other: &Self) -> bool { + self.terms == other.terms + } +} + +impl Hash for Fact { + fn hash(&self, state: &mut H) { + self.terms.hash(state); + } +} + +impl ProgramComponent for Fact { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + todo!() + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/import_export.rs b/nemo/src/rule_model/component/import_export.rs new file mode 100644 index 000000000..37f09f3d8 --- /dev/null +++ b/nemo/src/rule_model/component/import_export.rs @@ -0,0 +1,243 @@ +//! Import and export directives are a direct representation of the syntactic information +//! given in rule files. + +use std::{fmt::Display, hash::Hash}; + +use nemo_physical::datavalues::MapDataValue; + +use crate::rule_model::origin::Origin; + +use super::{term::Identifier, ProgramComponent}; + +/// The different supported variants of the RDF format. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RdfVariant { + /// An unspecified format, using the resource name as a heuristic. + #[default] + Unspecified, + /// RDF 1.1 N-Triples + NTriples, + /// RDF 1.1 N-Quads + NQuads, + /// RDF 1.1 Turtle + Turtle, + /// RDF 1.1 RDF/XML + RDFXML, + /// RDF 1.1 TriG + TriG, +} + +impl Display for RdfVariant { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NTriples => write!(f, "RDF N-Triples"), + Self::NQuads => write!(f, "RDF N-Quads"), + Self::Turtle => write!(f, "RDF Turtle"), + Self::RDFXML => write!(f, "RDF/XML"), + Self::TriG => write!(f, "RDF TriG"), + Self::Unspecified => write!(f, "RDF"), + } + } +} + +/// Supported file formats. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum FileFormat { + /// Comma-separated values + CSV, + /// Delimiter-separated values + DSV, + /// Tab-separated values + TSV, + /// RDF Triples or Quads, with the given format variant. + RDF(RdfVariant), + /// JSON objects + JSON, +} + +impl Display for FileFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::DSV => write!(f, "DSV"), + Self::CSV => write!(f, "CSV"), + Self::TSV => write!(f, "TSV"), + Self::JSON => write!(f, "JSON"), + Self::RDF(variant) => write!(f, "{variant}"), + } + } +} + +/// An import/export specification. This object captures all information that is typically +/// present in an import or export directive in a Nemo program, including the main format, +/// optional attributes that define additional parameters, and an indentifier to map the data +/// to or from (i.e., a predicate name). +#[derive(Debug, Clone, Eq)] +pub(crate) struct ImportExportDirective { + /// Origin of this component + origin: Origin, + + /// The predicate we're handling. + predicate: Identifier, + /// The file format and resource we're using. + format: FileFormat, + /// The attributes we've been given. + attributes: MapDataValue, +} + +impl PartialEq for ImportExportDirective { + fn eq(&self, other: &Self) -> bool { + self.predicate == other.predicate + && self.format == other.format + && self.attributes == other.attributes + } +} + +impl Hash for ImportExportDirective { + fn hash(&self, state: &mut H) { + self.predicate.hash(state); + self.format.hash(state); + self.attributes.hash(state); + } +} + +/// An import specification. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ImportDirective(pub(crate) ImportExportDirective); + +impl ImportDirective { + /// Create a new [ImportDirective]. + pub fn new(predicate: Identifier, format: FileFormat, attributes: MapDataValue) -> Self { + Self(ImportExportDirective { + origin: Origin::default(), + predicate, + format, + attributes, + }) + } + + /// Return the predicate. + pub fn predicate(&self) -> &Identifier { + &self.0.predicate + } + + /// Return the file format. + pub fn file_format(&self) -> FileFormat { + self.0.format + } + + /// Return the attributes. + pub fn attributes(&self) -> &MapDataValue { + &self.0.attributes + } +} + +impl From for ImportDirective { + fn from(value: ImportExportDirective) -> Self { + Self(value) + } +} + +impl Display for ImportDirective { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl ProgramComponent for ImportDirective { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.0.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.0.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +/// An export specification. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExportDirective(pub(crate) ImportExportDirective); + +impl ExportDirective { + /// Create a new [ExportDirective]. + pub fn new(predicate: Identifier, format: FileFormat, attributes: MapDataValue) -> Self { + Self(ImportExportDirective { + origin: Origin::default(), + predicate, + format, + attributes, + }) + } + + /// Return the predicate. + pub fn predicate(&self) -> &Identifier { + &self.0.predicate + } + + /// Return the file format. + pub fn file_format(&self) -> FileFormat { + self.0.format + } + + /// Return the attributes. + pub fn attributes(&self) -> &MapDataValue { + &self.0.attributes + } +} + +impl From for ExportDirective { + fn from(value: ImportExportDirective) -> Self { + Self(value) + } +} + +impl Display for ExportDirective { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl ProgramComponent for ExportDirective { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.0.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.0.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/literal.rs b/nemo/src/rule_model/component/literal.rs new file mode 100644 index 000000000..3514b65b1 --- /dev/null +++ b/nemo/src/rule_model/component/literal.rs @@ -0,0 +1,59 @@ +//! This module defines [Literal] + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::error::ProgramConstructionError; + +use super::{atom::Atom, term::operation::Operation, ProgramComponent}; + +/// A literal that can either be a positive or negative atom or an operation +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Literal { + /// Positive atom + Positive(Atom), + /// Negative atom + Negative(Atom), + /// Operation + Operation(Operation), +} + +impl Display for Literal { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl ProgramComponent for Literal { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &crate::rule_model::origin::Origin { + match self { + Literal::Positive(positive) => positive.origin(), + Literal::Negative(negative) => negative.origin(), + Literal::Operation(operation) => operation.origin(), + } + } + + fn set_origin(self, origin: crate::rule_model::origin::Origin) -> Self + where + Self: Sized, + { + match self { + Literal::Positive(positive) => Literal::Positive(positive.set_origin(origin)), + Literal::Negative(negative) => Literal::Negative(negative.set_origin(origin)), + Literal::Operation(operation) => Literal::Operation(operation.set_origin(origin)), + } + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/rule.rs b/nemo/src/rule_model/component/rule.rs new file mode 100644 index 000000000..e06f3fef4 --- /dev/null +++ b/nemo/src/rule_model/component/rule.rs @@ -0,0 +1,224 @@ +//! This module defines [Rule] and [RuleBuilder] + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::origin::Origin; + +use super::{atom::Atom, literal::Literal, term::operation::Operation, ProgramComponent}; + +/// A rule +#[derive(Debug, Clone, Eq)] +pub struct Rule { + /// Origin of this component + origin: Origin, + + /// Name of the rule + name: Option, + + /// Head of the rule + head: Vec, + /// Body of the rule + body: Vec, +} + +impl Rule { + /// Create a new [Rule]. + pub fn new(head: Vec, body: Vec) -> Self { + Self { + origin: Origin::Created, + name: None, + head, + body, + } + } + + /// Set the name of the rule. + pub fn set_name(mut self, name: &str) -> Self { + self.name = Some(name.to_string()); + self + } +} + +impl Display for Rule { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Rule { + fn eq(&self, other: &Self) -> bool { + self.head == other.head && self.body == other.body + } +} + +impl Hash for Rule { + fn hash(&self, state: &mut H) { + self.head.hash(state); + self.body.hash(state); + } +} + +impl ProgramComponent for Rule { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +/// Builder for a rule +#[derive(Debug, Default)] +pub struct RuleBuilder { + /// Origin of the rule + origin: Origin, + + /// Builder for the head of the rule + head: RuleHeadBuilder, + /// Builder for the body of the rule + body: RuleBodyBuilder, +} + +impl RuleBuilder { + /// Set the [Origin] of the built rule. + pub fn origin(mut self, origin: Origin) -> Self { + self.origin = origin; + self + } + + /// Return a builder for the body of the rule. + pub fn body(self) -> RuleBodySubBuilder { + RuleBodySubBuilder { builder: self } + } + + /// Return a builder for the head of the rule. + pub fn head(self) -> RuleHeadSubBuilder { + RuleHeadSubBuilder { builder: self } + } + + /// Finish building and return a [Rule]. + pub fn finalize(self) -> Rule { + Rule::new(self.head.finalize(), self.body.finalize()).set_origin(self.origin) + } +} + +/// Builder for the rule body +#[derive(Debug, Default)] +pub struct RuleBodyBuilder { + /// Current list of [Literal]s + literals: Vec, +} + +impl RuleBodyBuilder { + /// Add a positive atom to the body of the rule. + pub fn add_positive_atom(mut self, atom: Atom) -> Self { + self.literals.push(Literal::Positive(atom)); + self + } + + /// Add a negative atom to the body of the rule. + pub fn add_negative_atom(mut self, atom: Atom) -> Self { + self.literals.push(Literal::Negative(atom)); + self + } + + /// Add an operation to the body of the rule. + pub fn add_operation(mut self, opreation: Operation) -> Self { + self.literals.push(Literal::Operation(opreation)); + self + } + + /// Finish building and return a list of [Literal]s. + pub fn finalize(self) -> Vec { + self.literals + } +} + +/// Subbuilder for building the body of a rule +#[derive(Debug)] +pub struct RuleBodySubBuilder { + builder: RuleBuilder, +} + +impl RuleBodySubBuilder { + /// Add a positive atom to the body of the rule. + pub fn add_positive_atom(mut self, atom: Atom) -> Self { + self.builder.body = self.builder.body.add_positive_atom(atom); + self + } + + /// Add a negative atom to the body of the rule. + pub fn add_negative_atom(mut self, atom: Atom) -> Self { + self.builder.body = self.builder.body.add_negative_atom(atom); + self + } + + /// Add an operation to the body of the rule. + pub fn add_operation(mut self, opreation: Operation) -> Self { + self.builder.body = self.builder.body.add_operation(opreation); + self + } + + /// Return to the [RuleBuilder] + pub fn done(self) -> RuleBuilder { + self.builder + } +} + +/// Builder for the rule head +#[derive(Debug, Default)] +pub struct RuleHeadBuilder { + /// Current list of [Atom]s + atoms: Vec, +} + +impl RuleHeadBuilder { + /// Add another atom to the head of the rule. + pub fn add_atom(mut self, atom: Atom) -> Self { + self.atoms.push(atom); + self + } + + /// Finish building and return a list of [Atom]s. + pub fn finalize(self) -> Vec { + self.atoms + } +} + +/// Subbuilder for building the head of a rule +#[derive(Debug)] +pub struct RuleHeadSubBuilder { + builder: RuleBuilder, +} + +impl RuleHeadSubBuilder { + /// Add another atom to the head of the rule. + pub fn add_atom(mut self, atom: Atom) -> Self { + self.builder.head = self.builder.head.add_atom(atom); + self + } + + /// Return to the [RuleBuilder] + pub fn done(self) -> RuleBuilder { + self.builder + } +} diff --git a/nemo/src/rule_model/component/term.rs b/nemo/src/rule_model/component/term.rs new file mode 100644 index 000000000..2616ff44a --- /dev/null +++ b/nemo/src/rule_model/component/term.rs @@ -0,0 +1,133 @@ +//! This module defines [Term]. + +use std::fmt::{Debug, Display}; + +use function::FunctionTerm; +use map::Map; +use nemo_physical::datavalues::AnyDataValue; +use operation::Operation; +use primitive::{ground::GroundTerm, variable::Variable, Primitive}; + +use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; + +use super::ProgramComponent; + +pub mod aggregate; +pub mod function; +pub mod map; +pub mod operation; +pub mod primitive; +pub mod tuple; + +/// Name of a term +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Identifier(String); + +impl Identifier { + /// Create a new [Identifier]. + pub fn new(name: String) -> Self { + Self(name) + } + + /// Validate term name. + pub fn is_valid(&self) -> bool { + !self.0.is_empty() + } +} + +/// TODO +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum Term { + /// Unstructured, primitive term + Primitive(Primitive), + /// Abstract function over a list of terms + FunctionTerm(FunctionTerm), + /// Map of terms + Map(Map), + /// Operation applied to a list of terms + Operation(Operation), +} + +impl Term { + /// Create a universal variable term. + pub fn universal_variable(name: &str) -> Self { + Self::Primitive(Primitive::Variable(Variable::universal(name))) + } + + /// Create a anynmous variable term. + pub fn anonymous_variable() -> Self { + Self::Primitive(Primitive::Variable(Variable::anonymous())) + } + + /// Create a existential variable term. + pub fn existential_variable(name: &str) -> Self { + Self::Primitive(Primitive::Variable(Variable::existential(name))) + } + + /// Create an integer term + pub fn integer(number: i64) -> Self { + Self::Primitive(Primitive::Ground(GroundTerm::new( + AnyDataValue::new_integer_from_i64(number), + ))) + } +} + +impl Display for Term { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Term::Primitive(primitive) => write!(f, "{}", primitive), + Term::FunctionTerm(function) => write!(f, "{}", function), + Term::Map(map) => write!(f, "{}", map), + Term::Operation(operation) => write!(f, "{}", operation), + } + } +} + +impl ProgramComponent for Term { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + match self { + Term::Primitive(primitive) => primitive.origin(), + Term::FunctionTerm(function) => function.origin(), + Term::Map(map) => map.origin(), + Term::Operation(operation) => operation.origin(), + } + } + + fn set_origin(self, origin: Origin) -> Self + where + Self: Sized, + { + match self { + Term::Primitive(primitive) => Term::Primitive(primitive.set_origin(origin)), + Term::FunctionTerm(function) => Term::FunctionTerm(function.set_origin(origin)), + Term::Map(map) => Term::Map(map.set_origin(origin)), + Term::Operation(operation) => Term::Operation(operation.set_origin(origin)), + } + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +// impl ASTConstructable for Term { +// type Node<'a> = crate::io::parser::ast::term::Term<'a>; + +// fn from_ast_node<'a>( +// node: Self::Node<'a>, +// origin: crate::rule_model::origin::ExternalReference, +// context: &super::ASTContext, +// ) -> Self { +// todo!() +// } +// } diff --git a/nemo/src/rule_model/component/term/aggregate.rs b/nemo/src/rule_model/component/term/aggregate.rs new file mode 100644 index 000000000..1d6764d75 --- /dev/null +++ b/nemo/src/rule_model/component/term/aggregate.rs @@ -0,0 +1,96 @@ +//! This module defines [Aggregate] + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +use super::{primitive::variable::Variable, Term}; + +/// Aggregate operation on logical values +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum AggregateKind { + /// Count of distinct values + CountValues, + /// Minimum numerical value + MinNumber, + /// Maximum numerical value + MaxNumber, + /// Sum of numerical values + SumOfNumbers, +} + +/// An aggregate +#[derive(Debug, Clone, Eq)] +pub struct Aggregate { + /// Origin of this component + origin: Origin, + + /// Type of aggrgate operation + kind: AggregateKind, + /// Expression over which to aggragte + aggregate: Term, + /// Distinct variables + distinct: Vec, +} + +impl Aggregate { + /// Create a new [Aggregate]. + pub fn new(kind: AggregateKind, aggregate: Term, distinct: Vec) -> Self { + Self { + origin: Origin::default(), + kind, + aggregate, + distinct, + } + } +} + +impl Display for Aggregate { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Aggregate { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind + && self.aggregate == other.aggregate + && self.distinct == other.distinct + } +} + +impl Hash for Aggregate { + fn hash(&self, state: &mut H) { + self.kind.hash(state); + self.aggregate.hash(state); + self.distinct.hash(state); + } +} + +impl ProgramComponent for Aggregate { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/term/function.rs b/nemo/src/rule_model/component/term/function.rs new file mode 100644 index 000000000..84942a5ab --- /dev/null +++ b/nemo/src/rule_model/component/term/function.rs @@ -0,0 +1,107 @@ +//! This module defines [FunctionTerm] + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{ + component::ProgramComponent, error::ProgramConstructionError, origin::Origin, +}; + +use super::{Identifier, Term}; + +/// Function term +#[derive(Debug, Clone, Eq)] +pub struct FunctionTerm { + /// Origin of this component + origin: Origin, + + /// Name of the function + name: Identifier, + /// Subterms of the function + terms: Vec, +} + +impl FunctionTerm { + /// Create a new [FunctionTerm]. + pub fn new(name: &str, subterms: Vec) -> Self { + Self { + origin: Origin::Created, + name: Identifier::new(name.to_string()), + terms: subterms, + } + } + + /// Return an iterator over the subterms of this function term. + pub fn subterms(&self) -> impl Iterator { + self.terms.iter() + } + + /// Return an mutable iterator over the subterms of this function terms. + pub fn subterms_mut(&mut self) -> impl Iterator { + self.terms.iter_mut() + } +} + +impl Display for FunctionTerm { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for FunctionTerm { + fn eq(&self, other: &Self) -> bool { + self.origin == other.origin && self.name == other.name && self.terms == other.terms + } +} + +impl PartialOrd for FunctionTerm { + fn partial_cmp(&self, other: &Self) -> Option { + match self.name.partial_cmp(&other.name) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + self.terms.partial_cmp(&other.terms) + } +} + +impl Hash for FunctionTerm { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.terms.hash(state); + } +} + +impl ProgramComponent for FunctionTerm { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + if !self.name.is_valid() { + todo!() + } + + for term in self.subterms() { + term.validate()? + } + + Ok(()) + } +} diff --git a/nemo/src/rule_model/component/term/map.rs b/nemo/src/rule_model/component/term/map.rs new file mode 100644 index 000000000..a353d323b --- /dev/null +++ b/nemo/src/rule_model/component/term/map.rs @@ -0,0 +1,69 @@ +//! This module defines [Map] + +use std::{collections::BTreeMap, fmt::Display, hash::Hash}; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +use super::Term; + +/// Map term +#[derive(Debug, Clone, Eq)] +pub struct Map { + /// Origin of this component + origin: Origin, + + /// Map associating [Term]s with [Term]s + map: BTreeMap, +} + +impl Display for Map { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Map { + fn eq(&self, other: &Self) -> bool { + self.map == other.map + } +} + +impl PartialOrd for Map { + fn partial_cmp(&self, other: &Self) -> Option { + self.map.partial_cmp(&other.map) + } +} + +impl Hash for Map { + fn hash(&self, state: &mut H) { + self.map.hash(state); + } +} + +impl ProgramComponent for Map { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + todo!() + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/term/operation.rs b/nemo/src/rule_model/component/term/operation.rs new file mode 100644 index 000000000..f847d8896 --- /dev/null +++ b/nemo/src/rule_model/component/term/operation.rs @@ -0,0 +1,195 @@ +//! This module defines [Operation]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +use super::Term; + +/// Supported operations +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum OperationKind { + /// Equality + Equal, + /// Inequality + Unequals, + /// Sum of numeric values + NumericSum, + /// Subtraction between two numeric values + NumericSubtraction, + /// Product of numeric values + NumericProduct, + /// Division between two numeric values + NumericDivision, + /// Logarithm of a numeric value to some numeric base + NumericLogarithm, + /// Numeric value raised to another numeric value + NumericPower, + /// Remainder of a division between two numeric values + NumericRemainder, + /// Numeric greater than comparison + NumericGreaterthan, + /// Numeric greater than or equals comparison + NumericGreaterthaneq, + /// Numeric less than comparison + NumericLessthan, + /// Numeric less than or equals comparison + NumericLessthaneq, + /// Lexicographic comparison between strings + StringCompare, + /// Check whether string is contained in another, correspondng to SPARQL function CONTAINS. + StringContains, + /// String starting at some start position + StringSubstring, + /// First part of a string split by some other string + StringBefore, + /// Second part of a string split by some other string + StringAfter, + /// Whether string starts with a certain string + StringStarts, + /// Whether string ends with a certain string + StringEnds, + /// Boolean negation + BooleanNegation, + /// Cast to double + CastToDouble, + /// Cast to float + CastToFloat, + /// Cast to integer + CastToInteger, + /// Canonical string representation of a value + CanonicalString, + /// Check if value is an integer + CheckIsInteger, + /// Check if value is a float + CheckIsFloat, + /// Check if value is a double + CheckIsDouble, + /// Check if value is an iri + CheckIsIri, + /// Check if value is numeric + CheckIsNumeric, + /// Check if value is a null + CheckIsNull, + /// Check if value is a string + CheckIsString, + /// Get datatype of a value + Datatype, + /// Get language tag of a languaged tagged string + LanguageTag, + /// Lexical value + LexicalValue, + /// Absolute value of a numeric value + NumericAbsolute, + /// Cosine of a numeric value + NumericCosine, + /// Rounding up of a numeric value + NumericCeil, + /// Rounding down of a numeric value + NumericFloor, + /// Additive inverse of a numeric value + NumericNegation, + /// Rounding of a numeric value + NumericRound, + /// Sine of a numeric value + NumericSine, + /// Square root of a numeric value + NumericSquareroot, + /// Tangent of a numeric value + NumericTangent, + /// Length of a string value + StringLength, + /// Reverse of a string value + StringReverse, + /// String converted to lowercase letters + StringLowercase, + /// String converted to uppercase letters + StringUppercase, + /// Bitwise and operation + BitAnd, + /// Bitwise or operation + BitOr, + /// Bitwise xor operation + BitXor, + /// Conjunction of boolean values + BooleanConjunction, + /// Disjunction of boolean values + BooleanDisjunction, + /// Minimum of numeric values + NumericMinimum, + /// Maximum of numeric values + NumericMaximum, + /// Lukasiewicz norm of numeric values + NumericLukasiewicz, + /// Concatentation of two string values, correspondng to SPARQL function CONCAT. + StringConcatenation, +} + +/// Operation that can be applied to terms +#[derive(Debug, Clone, Eq)] +pub struct Operation { + /// Origin of this component + origin: Origin, + + /// The kind of operation + kind: OperationKind, + /// The input arguments for the operation + subterms: Vec, +} + +impl Display for Operation { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Operation { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.subterms == other.subterms + } +} + +impl PartialOrd for Operation { + fn partial_cmp(&self, other: &Self) -> Option { + match self.kind.partial_cmp(&other.kind) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + self.subterms.partial_cmp(&other.subterms) + } +} + +impl Hash for Operation { + fn hash(&self, state: &mut H) { + self.kind.hash(state); + self.subterms.hash(state); + } +} + +impl ProgramComponent for Operation { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + todo!() + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/term/primitive.rs b/nemo/src/rule_model/component/term/primitive.rs new file mode 100644 index 000000000..15d065b21 --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive.rs @@ -0,0 +1,97 @@ +//! This module defines [PrimitiveTerm]. + +pub mod ground; +pub mod variable; + +use std::{fmt::Display, hash::Hash}; + +use ground::GroundTerm; +use variable::Variable; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +/// Primitive term +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum Primitive { + /// Variable + Variable(Variable), + /// Ground term + Ground(GroundTerm), +} + +impl Primitive { + /// Return `true` when this term is not a variable and `false` otherwise. + pub fn is_ground(&self) -> bool { + matches!(self, Self::Ground(_)) + } +} + +impl Display for Primitive { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Primitive::Variable(variable) => variable.fmt(f), + Primitive::Ground(ground) => ground.fmt(f), + } + } +} + +impl ProgramComponent for Primitive { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + match self { + Self::Variable(variable) => variable.origin(), + Self::Ground(ground) => ground.origin(), + } + } + + fn set_origin(self, origin: Origin) -> Self + where + Self: Sized, + { + match self { + Self::Variable(variable) => Self::Variable(variable.set_origin(origin)), + Self::Ground(ground) => Self::Ground(ground.set_origin(origin)), + } + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + match self { + Primitive::Variable(variable) => variable.validate(), + Primitive::Ground(ground) => ground.validate(), + } + } +} + +// impl ASTConstructable for Primitive { +// type Node<'a> = Term<'a>; + +// fn from_ast_node<'a>( +// node: Self::Node<'a>, +// origin: ExternalReference, +// context: &ASTContext, +// ) -> Self { +// match node { +// Term::Primitive(primitive) => { +// Primitive::Ground(GroundTerm::from_ast_node(primitive, origin, context)) +// } +// Term::Blank(token) => { +// let value: AnyDataValue = todo!(); + +// Primitive::Ground(GroundTerm::create_parsed(value, origin)) +// } +// Term::UniversalVariable(_) | Term::ExistentialVariable(_) => { +// Primitive::Variable(Variable::from_ast_node(node, origin, context)) +// } +// _ => unreachable!("TODO"), +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/primitive/ground.rs b/nemo/src/rule_model/component/term/primitive/ground.rs new file mode 100644 index 000000000..2cc76e577 --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive/ground.rs @@ -0,0 +1,134 @@ +//! This module defines [GroundTerm]. + +use std::{fmt::Display, hash::Hash}; + +use nemo_physical::datavalues::AnyDataValue; + +use crate::rule_model::{ + component::ProgramComponent, error::ProgramConstructionError, origin::Origin, +}; + +/// Primitive ground term +#[derive(Debug, Clone, Eq)] +pub struct GroundTerm { + /// Origin of this component + origin: Origin, + /// Value of this term + value: AnyDataValue, +} + +impl GroundTerm { + /// Create a new [GroundTerm]. + pub fn new(value: AnyDataValue) -> Self { + Self { + origin: Origin::Created, + value, + } + } +} + +impl Display for GroundTerm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.value.fmt(f) + } +} + +impl PartialEq for GroundTerm { + fn eq(&self, other: &Self) -> bool { + self.value == other.value + } +} + +impl PartialOrd for GroundTerm { + fn partial_cmp(&self, other: &Self) -> Option { + self.value.partial_cmp(&other.value) + } +} + +impl Hash for GroundTerm { + fn hash(&self, state: &mut H) { + self.value.hash(state); + } +} + +impl ProgramComponent for GroundTerm { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + Ok(()) + } +} + +// impl ASTConstructable for GroundTerm { +// type Node<'a> = Primitive<'a>; + +// fn from_ast_node<'a>( +// node: Self::Node<'a>, +// origin: ExternalReference, +// context: &ASTContext, +// ) -> Self { +// match node { +// Primitive::Constant(token) => { +// Self::create_parsed(AnyDataValue::new_iri(token.to_string()), origin) +// } +// Primitive::PrefixedConstant { +// prefix, constant, .. +// } => { +// let prefixed_constant = prefix +// .map(|token| { +// context +// .prefixes +// .get(&token.to_string()) +// .cloned() +// .unwrap_or(token.to_string()) // TODO: We could also panic here +// }) +// .unwrap_or(String::from("")) +// + &constant.to_string(); + +// Self::create_parsed(AnyDataValue::new_iri(prefixed_constant), origin) +// } +// Primitive::Number { +// span, +// sign, +// before, +// dot, +// after, +// exponent, +// } => { +// // TODO: Create number values +// // Self::create_parsed(AnyDataValue:: span.to_string(), origin) +// todo!() +// } +// Primitive::String(string) => { +// Self::create_parsed(AnyDataValue::new_plain_string(string.to_string()), origin) +// } +// Primitive::Iri(iri) => { +// Self::create_parsed(AnyDataValue::new_iri(iri.to_string()), origin) +// } +// Primitive::RdfLiteral { string, iri, .. } => Self::create_parsed( +// AnyDataValue::new_other(string.to_string(), iri.to_string()), +// origin, +// ), +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/primitive/variable.rs b/nemo/src/rule_model/component/term/primitive/variable.rs new file mode 100644 index 000000000..d98ea3caf --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive/variable.rs @@ -0,0 +1,140 @@ +//! This module defines [Variable] + +use std::fmt::Display; + +use existential::ExistentialVariable; +use universal::UniversalVariable; + +use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; + +use super::ProgramComponent; + +pub mod existential; +pub mod universal; + +/// Name of a variable +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct VariableName(String); + +impl VariableName { + /// Create a new [VariableName]. + fn new(name: String) -> Self { + Self(name) + } + + /// Validate variable name. + pub fn is_valid(&self) -> bool { + !self.0.is_empty() + } +} + +impl Display for VariableName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +/// Variable that can be bound to a specific value +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum Variable { + /// Universal variable + Universal(UniversalVariable), + /// Existential variable + Existential(ExistentialVariable), +} + +impl Variable { + /// Create a new universal variable. + pub fn universal(name: &str) -> Self { + Self::Universal(UniversalVariable::new(name)) + } + + /// Create a new existential variable. + pub fn existential(name: &str) -> Self { + Self::Existential(ExistentialVariable::new(name)) + } + + /// Create a new anonymous variable. + pub fn anonymous() -> Self { + Self::Universal(UniversalVariable::new_anonymous()) + } + + /// Return the name of the variable or `None` if it is anonymous + pub fn name(&self) -> Option { + match self { + Variable::Universal(variable) => variable.name(), + Variable::Existential(variable) => Some(variable.name()), + } + } +} + +impl Display for Variable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Variable::Universal(variable) => variable.fmt(f), + Variable::Existential(variable) => variable.fmt(f), + } + } +} + +impl ProgramComponent for Variable { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + match self { + Variable::Universal(variable) => variable.origin(), + Variable::Existential(variable) => variable.origin(), + } + } + + fn set_origin(self, origin: Origin) -> Self + where + Self: Sized, + { + match self { + Variable::Universal(variable) => Self::Universal(variable.set_origin(origin)), + Variable::Existential(variable) => Self::Existential(variable.set_origin(origin)), + } + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + match &self { + Variable::Universal(universal) => { + universal.validate()?; + } + Variable::Existential(existential) => { + existential.validate()?; + } + } + + Ok(()) + } +} + +// impl ASTConstructable for Variable { +// type Node<'a> = Term<'a>; + +// fn from_ast_node<'a>( +// node: Self::Node<'a>, +// origin: ExternalReference, +// context: &ASTContext, +// ) -> Self { +// match node { +// Term::UniversalVariable(_) => { +// Variable::Universal(UniversalVariable::from_ast_node(node, origin, context)) +// } +// Term::ExistentialVariable(_) => { +// Variable::Existential(ExistentialVariable::from_ast_node(node, origin, context)) +// } +// _ => unreachable!("TODO"), +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/primitive/variable/existential.rs b/nemo/src/rule_model/component/term/primitive/variable/existential.rs new file mode 100644 index 000000000..f584aab57 --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive/variable/existential.rs @@ -0,0 +1,103 @@ +//! This module defines [ExistentialVariable]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{ + component::ProgramComponent, error::ProgramConstructionError, origin::Origin, +}; + +use super::VariableName; + +/// Variable that allows to assert the existence of an object +#[derive(Debug, Clone, Eq)] +pub struct ExistentialVariable { + /// Origin of this component + origin: Origin, + + /// Name of the variable + name: VariableName, +} + +impl ExistentialVariable { + /// Create a new [ExistentialVariable]. + pub fn new(name: &str) -> Self { + Self { + origin: Origin::Created, + name: VariableName::new(name.to_string()), + } + } + + /// Return the name of this variable. + pub fn name(&self) -> String { + self.name.to_string() + } +} + +impl Display for ExistentialVariable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!{}", self.name) + } +} + +impl PartialEq for ExistentialVariable { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} + +impl PartialOrd for ExistentialVariable { + fn partial_cmp(&self, other: &Self) -> Option { + self.name.partial_cmp(&other.name) + } +} + +impl Hash for ExistentialVariable { + fn hash(&self, state: &mut H) { + self.name.hash(state); + } +} + +impl ProgramComponent for ExistentialVariable { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +// impl ASTConstructable for ExistentialVariable { +// type Node<'a> = Term<'a>; + +// fn from_ast_node<'a>(node: Term<'a>, origin: ExternalReference, _context: &ASTContext) -> Self { +// if let Term::UniversalVariable(token) = node { +// let name = token.span.to_string(); + +// Self { +// origin: Origin::External(origin), +// name: VariableName::new(name), +// } +// } else { +// unreachable!("TODO") +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/primitive/variable/universal.rs b/nemo/src/rule_model/component/term/primitive/variable/universal.rs new file mode 100644 index 000000000..e2dae3680 --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive/variable/universal.rs @@ -0,0 +1,123 @@ +//! This module defines [UniversalVariable]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{ + component::ProgramComponent, error::ProgramConstructionError, origin::Origin, +}; + +use super::VariableName; + +/// Variable that can be bound to a specific value +/// +/// Universal variables may not have a name, +/// in which case we call them anonymous. +#[derive(Debug, Clone, Eq)] +pub struct UniversalVariable { + /// Origin of this component + origin: Origin, + + /// Name of the variable + /// + /// This can be `None` in case this is an anonymous variable. + name: Option, +} + +impl UniversalVariable { + /// Create a new named [UniversalVariable] + pub fn new(name: &str) -> Self { + Self { + origin: Origin::Created, + name: Some(VariableName::new(name.to_string())), + } + } + + /// Create a new anonymous [UniversalVariable] + pub fn new_anonymous() -> Self { + Self { + origin: Origin::Created, + name: None, + } + } + + /// Return the name of this variable, + /// or `None` if the variable is unnamed. + pub fn name(&self) -> Option { + self.name.as_ref().map(|name| name.to_string()) + } + + /// Return `true` if this is an anonymous variable, + /// and `false` otherwise + pub fn is_anonymous(&self) -> bool { + self.name.is_none() + } +} + +impl Display for UniversalVariable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.name { + Some(name) => write!(f, "?{}", name), + None => write!(f, "_"), + } + } +} + +impl PartialEq for UniversalVariable { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} + +impl PartialOrd for UniversalVariable { + fn partial_cmp(&self, other: &Self) -> Option { + self.name.partial_cmp(&other.name) + } +} + +impl Hash for UniversalVariable { + fn hash(&self, state: &mut H) { + self.name.hash(state); + } +} + +impl ProgramComponent for UniversalVariable { + fn parse(_string: &str) -> Result { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +// impl ASTConstructable for UniversalVariable { +// type Node<'a> = Term<'a>; + +// fn from_ast_node<'a>(node: Term<'a>, origin: ExternalReference, _context: &ASTContext) -> Self { +// if let Term::UniversalVariable(token) = node { +// let name = token.span.to_string(); + +// Self { +// origin: Origin::External(origin), +// name: Some(VariableName::new(name)), +// } +// } else { +// unreachable!("TODO") +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/tuple.rs b/nemo/src/rule_model/component/term/tuple.rs new file mode 100644 index 000000000..ff010f14c --- /dev/null +++ b/nemo/src/rule_model/component/term/tuple.rs @@ -0,0 +1,63 @@ +//! This module defines [Tuple]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +use super::Term; + +/// An ordered list of terms +#[derive(Debug, Clone, Eq)] +pub struct Tuple { + /// Origin of this component + origin: Origin, + + /// Ordered list of terms contained in this tuple + terms: Vec, +} + +impl Display for Tuple { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Tuple { + fn eq(&self, other: &Self) -> bool { + self.terms == other.terms + } +} + +impl Hash for Tuple { + fn hash(&self, state: &mut H) { + self.terms.hash(state); + } +} + +impl ProgramComponent for Tuple { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + todo!() + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/variable.rs b/nemo/src/rule_model/component/variable.rs deleted file mode 100644 index 02a5f202e..000000000 --- a/nemo/src/rule_model/component/variable.rs +++ /dev/null @@ -1,96 +0,0 @@ -use std::fmt::Display; - -use crate::{ - io::parser::ast::term::Term, - rule_model::{ - error::ProgramConstructionError, - origin::{ComponentOrigin, OriginParseReference}, - }, -}; - -use super::ProgramComponent; - -/// Name of a variable -#[derive(Debug, Clone)] -pub struct VariableName(String); - -impl VariableName { - fn new(name: String) -> Result { - // TODO: Validate name - if name.is_empty() { - return Err(ProgramConstructionError::InvalidVariableName(name)); - } - - Ok(Self::new_unvalidated(name)) - } - - fn new_unvalidated(name: String) -> Self { - Self(name) - } -} - -impl Display for VariableName { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - -#[derive(Debug)] -pub struct UniversalVariale { - origin: ComponentOrigin, - - name: Option, -} - -impl UniversalVariale { - fn from_term(term: Term) -> Self { - todo!() - } -} - -impl Display for UniversalVariale { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match &self.name { - Some(name) => write!(f, "?{}", name), - None => write!(f, "_"), - } - } -} - -impl ProgramComponent for UniversalVariale { - type Node<'a> = Term<'a>; - - fn from_ast_node<'a>(node: Term<'a>, origin: OriginParseReference) -> Self { - if let Term::UniversalVariable(token) = node { - let string = token.span.to_string(); - } - - todo!() - } - - fn parse(string: &str) -> Result { - todo!() - } - - fn origin(&self) -> &ComponentOrigin { - &self.origin - } -} - -#[derive(Debug)] -pub struct ExistentialVariable { - origin: ComponentOrigin, - - name: VariableName, -} - -#[derive(Debug)] -pub enum Variable { - Universal(UniversalVariale), - Existential(ExistentialVariable), -} - -mod test { - #[test] - fn create_variable() {} -} diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 8f9331064..236195a5e 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -1,9 +1,24 @@ +//! This module defines [ProgramConstructionError] + use thiserror::Error; +use super::component::{ + atom::Atom, + fact::Fact, + term::{primitive::variable::Variable, Term}, +}; + +/// Error returned during the construction of objects from nemo's logical rule model #[derive(Error, Debug)] pub enum ProgramConstructionError { - #[error("invalid variable name: {0}")] + #[error("variable \"{0}\" has an invalid name")] InvalidVariableName(String), + #[error("term \"{0}\" has an invalid name")] + InvalidIdentifier(String), + #[error("atom \"{0}\" has an invalid name")] + InvalidAtomName(String), + #[error("fact {0} contains ")] + NonGroundFact(Fact), #[error("parse error")] // TODO: Return parser error here ParseError, } diff --git a/nemo/src/rule_model/origin.rs b/nemo/src/rule_model/origin.rs index e2660002b..994ae7d52 100644 --- a/nemo/src/rule_model/origin.rs +++ b/nemo/src/rule_model/origin.rs @@ -1,10 +1,18 @@ //! This module defines -pub(crate) type OriginParseReference = usize; +pub(crate) type ExternalReference = usize; -#[derive(Debug)] -pub enum ComponentOrigin { +/// Origin of a program component +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Origin { + /// Component was created via a constructor Created, - Parsed(OriginParseReference), - Something(Box), + /// Component was created due translation from an external input, e.g., parsing + External(ExternalReference), +} + +impl Default for Origin { + fn default() -> Self { + Self::Created + } } diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs new file mode 100644 index 000000000..8f884614c --- /dev/null +++ b/nemo/src/rule_model/program.rs @@ -0,0 +1,26 @@ +//! This module defines [Program]. + +use super::component::{ + fact::Fact, + import_export::{ExportDirective, ImportDirective}, + rule::Rule, +}; + +/// Representation of a nemo program +#[derive(Debug)] +pub struct Program { + /// Imported resources + imports: Vec, + /// Exported resources + exports: Vec, + /// Rules + rules: Vec, + /// Facts + facts: Vec, +} + +impl Program { + pub fn from_ast() -> Self { + todo!() + } +} From bf02e1968be25057b6a95e3bb03638dcfd8b5728 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 3 Jul 2024 09:50:38 +0200 Subject: [PATCH 055/214] Translate AST model into logical model --- nemo/src/io/parser/ast.rs | 1 + nemo/src/io/parser/ast/term.rs | 2 +- nemo/src/rule_model/component.rs | 2 + nemo/src/rule_model/component/base.rs | 30 ++ nemo/src/rule_model/component/output.rs | 25 ++ nemo/src/rule_model/component/rule.rs | 12 + nemo/src/rule_model/component/term.rs | 33 ++- .../rule_model/component/term/operation.rs | 81 ++++++ nemo/src/rule_model/component/term/tuple.rs | 16 ++ nemo/src/rule_model/program.rs | 269 +++++++++++++++++- 10 files changed, 449 insertions(+), 22 deletions(-) create mode 100644 nemo/src/rule_model/component/base.rs create mode 100644 nemo/src/rule_model/component/output.rs diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 931813397..17fc7c492 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -167,6 +167,7 @@ impl List<'_, T> { vec } } + impl IntoIterator for List<'_, T> { type Item = T; diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 07feb03f7..d57044ae6 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -199,7 +199,7 @@ impl std::fmt::Display for Term<'_> { } #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Primitive<'a> { +pub enum Primitive<'a> { Constant(Span<'a>), PrefixedConstant { span: Span<'a>, diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/component.rs index 5a925fb77..b218f9097 100644 --- a/nemo/src/rule_model/component.rs +++ b/nemo/src/rule_model/component.rs @@ -1,9 +1,11 @@ //! This module defines the logical components that make up a program. pub mod atom; +pub mod base; pub mod fact; pub mod import_export; pub mod literal; +pub mod output; pub mod rule; pub mod term; diff --git a/nemo/src/rule_model/component/base.rs b/nemo/src/rule_model/component/base.rs new file mode 100644 index 000000000..a2c597ac9 --- /dev/null +++ b/nemo/src/rule_model/component/base.rs @@ -0,0 +1,30 @@ +//! This module defines [Base] + +use std::fmt::Display; + +use crate::rule_model::origin::Origin; + +/// TODO +#[derive(Debug, Clone)] +pub struct Base { + /// Origin of this component + origin: Origin, + + base: String, +} + +impl Base { + /// Create a new [Base] + pub fn new(base: String) -> Self { + Self { + origin: Origin::default(), + base, + } + } +} + +impl Display for Base { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} diff --git a/nemo/src/rule_model/component/output.rs b/nemo/src/rule_model/component/output.rs new file mode 100644 index 000000000..44389cb08 --- /dev/null +++ b/nemo/src/rule_model/component/output.rs @@ -0,0 +1,25 @@ +//! This module defines [Output] + +use crate::rule_model::origin::Origin; + +use super::term::Identifier; + +/// TODO +#[derive(Debug, Clone)] +pub struct Output { + /// Origin of this component + origin: Origin, + + /// + predicate: Identifier, +} + +impl Output { + /// Create a mew [Output] + pub fn new(predicate: Identifier) -> Self { + Self { + origin: Origin::default(), + predicate, + } + } +} diff --git a/nemo/src/rule_model/component/rule.rs b/nemo/src/rule_model/component/rule.rs index e06f3fef4..d5a3e934b 100644 --- a/nemo/src/rule_model/component/rule.rs +++ b/nemo/src/rule_model/component/rule.rs @@ -147,6 +147,12 @@ impl RuleBodyBuilder { self } + /// Add a literal to the body of the rule. + pub fn add_literal(mut self, literal: Literal) -> Self { + self.literals.push(literal); + self + } + /// Finish building and return a list of [Literal]s. pub fn finalize(self) -> Vec { self.literals @@ -178,6 +184,12 @@ impl RuleBodySubBuilder { self } + /// Add a literal to the body of the rule. + pub fn add_literal(mut self, literal: Literal) -> Self { + self.builder.body = self.builder.body.add_literal(literal); + self + } + /// Return to the [RuleBuilder] pub fn done(self) -> RuleBuilder { self.builder diff --git a/nemo/src/rule_model/component/term.rs b/nemo/src/rule_model/component/term.rs index 2616ff44a..3d202b85c 100644 --- a/nemo/src/rule_model/component/term.rs +++ b/nemo/src/rule_model/component/term.rs @@ -1,5 +1,12 @@ //! This module defines [Term]. +pub mod aggregate; +pub mod function; +pub mod map; +pub mod operation; +pub mod primitive; +pub mod tuple; + use std::fmt::{Debug, Display}; use function::FunctionTerm; @@ -7,18 +14,12 @@ use map::Map; use nemo_physical::datavalues::AnyDataValue; use operation::Operation; use primitive::{ground::GroundTerm, variable::Variable, Primitive}; +use tuple::Tuple; use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; use super::ProgramComponent; -pub mod aggregate; -pub mod function; -pub mod map; -pub mod operation; -pub mod primitive; -pub mod tuple; - /// Name of a term #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Identifier(String); @@ -46,6 +47,8 @@ pub enum Term { Map(Map), /// Operation applied to a list of terms Operation(Operation), + /// Tuple + Tuple(Tuple), } impl Term { @@ -64,6 +67,11 @@ impl Term { Self::Primitive(Primitive::Variable(Variable::existential(name))) } + /// Create a groud term. + pub fn ground(value: AnyDataValue) -> Self { + Self::Primitive(Primitive::Ground(GroundTerm::new(value))) + } + /// Create an integer term pub fn integer(number: i64) -> Self { Self::Primitive(Primitive::Ground(GroundTerm::new( @@ -73,13 +81,8 @@ impl Term { } impl Display for Term { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Term::Primitive(primitive) => write!(f, "{}", primitive), - Term::FunctionTerm(function) => write!(f, "{}", function), - Term::Map(map) => write!(f, "{}", map), - Term::Operation(operation) => write!(f, "{}", operation), - } + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() } } @@ -97,6 +100,7 @@ impl ProgramComponent for Term { Term::FunctionTerm(function) => function.origin(), Term::Map(map) => map.origin(), Term::Operation(operation) => operation.origin(), + Term::Tuple(tuple) => tuple.origin(), } } @@ -109,6 +113,7 @@ impl ProgramComponent for Term { Term::FunctionTerm(function) => Term::FunctionTerm(function.set_origin(origin)), Term::Map(map) => Term::Map(map.set_origin(origin)), Term::Operation(operation) => Term::Operation(operation.set_origin(origin)), + Term::Tuple(tuple) => Term::Tuple(tuple.set_origin(origin)), } } diff --git a/nemo/src/rule_model/component/term/operation.rs b/nemo/src/rule_model/component/term/operation.rs index f847d8896..b447ba431 100644 --- a/nemo/src/rule_model/component/term/operation.rs +++ b/nemo/src/rule_model/component/term/operation.rs @@ -125,6 +125,71 @@ pub enum OperationKind { StringConcatenation, } +impl OperationKind { + /// Return the [OperationKind] corresponding to the given operation name or `None` if there is no such operation. + pub fn from_name(name: &str) -> Option { + Some(match name.to_uppercase().as_str() { + "+" => Self::NumericSum, + "-" => Self::NumericSubtraction, + "/" => Self::NumericDivision, + "*" => Self::NumericProduct, + "<" => Self::NumericLessthan, + ">" => Self::NumericGreaterthan, + "<=" => Self::NumericLessthaneq, + ">=" => Self::NumericGreaterthaneq, + "isInteger" => Self::CheckIsInteger, + "isFloat" => Self::CheckIsFloat, + "isDouble" => Self::CheckIsDouble, + "isIri" => Self::CheckIsIri, + "isNumeric" => Self::CheckIsNumeric, + "isNull" => Self::CheckIsNull, + "isString" => Self::CheckIsString, + "ABS" => Self::NumericAbsolute, + "SQRT" => Self::NumericSquareroot, + "NOT" => Self::BooleanNegation, + "fullStr" => Self::CanonicalString, + "STR" => Self::LexicalValue, + "SIN" => Self::NumericSine, + "COS" => Self::NumericCosine, + "TAN" => Self::NumericTangent, + "STRLEN" => Self::StringLength, + "STRREV" => Self::StringReverse, + "UCASE" => Self::StringLowercase, + "LCASE" => Self::StringUppercase, + "ROUND" => Self::NumericRound, + "CEIL" => Self::NumericCeil, + "FLOOR" => Self::NumericFloor, + "DATATYPE" => Self::Datatype, + "LANG" => Self::LanguageTag, + "INT" => Self::CastToInteger, + "DOUBLE" => Self::CastToDouble, + "FLOAT" => Self::CastToFloat, + "LOG" => Self::NumericLogarithm, + "POW" => Self::NumericPower, + "COMPARE" => Self::StringCompare, + "CONTAINS" => Self::StringContains, + "SUBSTR" => Self::StringSubstring, + "STRSTARTS" => Self::StringStarts, + "STRENDS" => Self::StringEnds, + "STRBEFORE" => Self::StringBefore, + "STRAFTER" => Self::StringAfter, + "REM" => Self::NumericRemainder, + "BITAND" => Self::BitAnd, + "BITOR" => Self::BitOr, + "BITXOR" => Self::BitXor, + "MAX" => Self::NumericMaximum, + "MIN" => Self::NumericMinimum, + "LUKA" => Self::NumericLukasiewicz, + "SUM" => Self::NumericSum, + "PROD" => Self::NumericProduct, + "AND" => Self::BooleanConjunction, + "OR" => Self::BooleanDisjunction, + "CONCAT" => Self::StringConcatenation, + _ => return None, + }) + } +} + /// Operation that can be applied to terms #[derive(Debug, Clone, Eq)] pub struct Operation { @@ -137,6 +202,22 @@ pub struct Operation { subterms: Vec, } +impl Operation { + /// Create a new [Operation] + pub fn new(kind: OperationKind, subterms: Vec) -> Self { + Self { + origin: Origin::default(), + kind, + subterms, + } + } + + /// Create a new [Operation] giving the string name of the operation. + pub fn new_from_name(operation: &str, subterms: Vec) -> Option { + Some(Self::new(OperationKind::from_name(operation)?, subterms)) + } +} + impl Display for Operation { fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() diff --git a/nemo/src/rule_model/component/term/tuple.rs b/nemo/src/rule_model/component/term/tuple.rs index ff010f14c..92a6eeca1 100644 --- a/nemo/src/rule_model/component/term/tuple.rs +++ b/nemo/src/rule_model/component/term/tuple.rs @@ -16,6 +16,16 @@ pub struct Tuple { terms: Vec, } +impl Tuple { + /// Create a new [Tuple]. + pub fn new(terms: Vec) -> Self { + Self { + origin: Origin::default(), + terms, + } + } +} + impl Display for Tuple { fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() @@ -34,6 +44,12 @@ impl Hash for Tuple { } } +impl PartialOrd for Tuple { + fn partial_cmp(&self, other: &Self) -> Option { + self.terms.partial_cmp(&other.terms) + } +} + impl ProgramComponent for Tuple { fn parse(_string: &str) -> Result where diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 8f884614c..4f6ea9e17 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -1,13 +1,30 @@ //! This module defines [Program]. -use super::component::{ - fact::Fact, - import_export::{ExportDirective, ImportDirective}, - rule::Rule, +use nemo_physical::datavalues::AnyDataValue; + +use crate::{io::parser::ast, rule_model::component::term::tuple::Tuple}; + +use super::{ + component::{ + atom::Atom, + base::Base, + fact::Fact, + import_export::{ExportDirective, ImportDirective}, + literal::Literal, + output::Output, + rule::{Rule, RuleBuilder}, + term::{ + function::FunctionTerm, + operation::{Operation, OperationKind}, + Term, + }, + ProgramComponent, + }, + origin::Origin, }; /// Representation of a nemo program -#[derive(Debug)] +#[derive(Debug, Default)] pub struct Program { /// Imported resources imports: Vec, @@ -17,10 +34,248 @@ pub struct Program { rules: Vec, /// Facts facts: Vec, + /// Base + base: Option, + /// Outputs + outputs: Vec, } impl Program { - pub fn from_ast() -> Self { - todo!() + /// Build a [Program] from an [ast::program::Program]. + pub fn from_ast(ast_program: ast::program::Program) -> Self { + let mut program = Program::default(); + + for (statement_index, statement) in ast_program.statements.iter().enumerate() { + match statement { + ast::statement::Statement::Directive(directive) => { + program.ast_build_directive(directive); + } + ast::statement::Statement::Fact { + span, + doc_comment, + atom, + dot, + } => todo!(), + ast::statement::Statement::Rule { head, body, .. } => { + program.ast_build_rule(head, body); + } + ast::statement::Statement::Comment(_) => todo!(), + ast::statement::Statement::Error(_) => todo!(), + } + } + + program + } + + fn ast_build_rule( + &mut self, + head: &ast::List, + body: &ast::List, + ) { + let origin = Origin::External(self.rules.len()); + + let rule_builder = RuleBuilder::default().origin(origin); + let mut head_builder = rule_builder.head(); + + // TODO: Implement a normal iterator to avoid cloning + for (head_index, head_atom) in head.clone().into_iter().enumerate() { + let origin = Origin::External(head_index); + if let Literal::Positive(atom) = Self::ast_build_literal(origin, &head_atom) { + head_builder = head_builder.add_atom(atom); + } else { + unreachable!("head must only contain positive atoms") + } + } + + let mut body_builder = head_builder.done().body(); + + // TODO: Implement a normal iterator to avoid cloning + for (body_index, body_atom) in head.clone().into_iter().enumerate() { + let origin = Origin::External(body_index); + body_builder = body_builder.add_literal(Self::ast_build_literal(origin, &body_atom)); + } + + self.rules.push(body_builder.done().finalize()); + } + + fn ast_build_literal(origin: Origin, atom: &ast::atom::Atom) -> Literal { + match atom { + ast::atom::Atom::Positive(positive_atom) => { + Literal::Positive(Self::ast_build_atom(origin, positive_atom)) + } + ast::atom::Atom::Negative { + atom: negative_atom, + .. + } => Literal::Negative(Self::ast_build_atom(origin, negative_atom)), + ast::atom::Atom::InfixAtom { + lhs, + operation, + rhs, + .. + } => { + let left = Self::ast_build_inner_term(Origin::External(0), lhs); + let right = Self::ast_build_inner_term(Origin::External(1), rhs); + + Literal::Operation( + Operation::new_from_name(&operation.to_string(), vec![left, right]) + .expect("unkown infix operation"), + ) + } + ast::atom::Atom::Map(_) => { + // Return unsupported error + todo!() + } + } + } + + fn ast_build_atom(origin: Origin, atom: &ast::tuple::Tuple) -> Atom { + let predicate_name = atom + .identifier + .expect("Atom must have a predicate name") + .to_string(); + let subterms = match &atom.terms { + Some(terms) => terms.to_vec(), + None => vec![], + }; + + let mut translated_subterms = Vec::new(); + + for (term_index, subterm) in subterms.into_iter().enumerate() { + let origin = Origin::External(term_index); + translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); + } + + Atom::new(&predicate_name, translated_subterms).set_origin(origin) + } + + fn ast_build_inner_term(origin: Origin, term: &ast::term::Term) -> Term { + match term { + ast::term::Term::Primitive(primitive) => Self::ast_build_primitive(origin, primitive), + ast::term::Term::UniversalVariable(name) => Term::universal_variable(&name.to_string()), + ast::term::Term::ExistentialVariable(name) => { + Term::existential_variable(&name.to_string()) + } + ast::term::Term::UnaryPrefix { + operation, term, .. + } => { + // TODO: Currently no associated function with this + todo!() + } + ast::term::Term::Binary { + lhs, + operation, + rhs, + .. + } => { + let left = Self::ast_build_inner_term(Origin::External(0), lhs); + let right = Self::ast_build_inner_term(Origin::External(1), rhs); + + Term::Operation( + Operation::new_from_name(&operation.to_string(), vec![left, right]) + .expect("unrecognized binary operation"), + ) + } + ast::term::Term::Aggregation { + operation, terms, .. + } => { + todo!() + } + ast::term::Term::Tuple(tuple) => Self::ast_build_inner_tuple(origin, tuple), + ast::term::Term::Map(_) => todo!(), + ast::term::Term::Blank(_) => todo!(), + } + .set_origin(origin) + } + + fn ast_build_primitive(origin: Origin, primitive: &ast::term::Primitive) -> Term { + match primitive { + ast::term::Primitive::Constant(value) => { + Term::ground(AnyDataValue::new_iri(value.to_string())) + } + ast::term::Primitive::PrefixedConstant { + span, + prefix, + colon, + constant, + } => todo!(), + ast::term::Primitive::Number { + span, + sign, + before, + dot, + after, + exponent, + } => todo!(), + ast::term::Primitive::String(string) => { + Term::ground(AnyDataValue::new_plain_string(string.to_string())) + } + ast::term::Primitive::Iri(iri) => Term::ground(AnyDataValue::new_iri(iri.to_string())), + ast::term::Primitive::RdfLiteral { string, iri, .. } => { + Term::ground(AnyDataValue::new_other(string.to_string(), iri.to_string())) + } + } + .set_origin(origin) + } + + fn ast_build_inner_tuple(origin: Origin, tuple: &ast::tuple::Tuple) -> Term { + let subterms = match &tuple.terms { + Some(terms) => terms.to_vec(), + None => vec![], + }; + + let mut translated_subterms = Vec::new(); + + for (term_index, subterm) in subterms.into_iter().enumerate() { + let origin = Origin::External(term_index); + translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); + } + + match tuple.identifier { + Some(name) => match OperationKind::from_name(&name.to_string()) { + Some(kind) => Term::Operation(Operation::new(kind, translated_subterms)), + None => { + Term::FunctionTerm(FunctionTerm::new(&name.to_string(), translated_subterms)) + } + }, + None => Term::Tuple(Tuple::new(translated_subterms)), + } + } + + fn ast_build_directive(&mut self, directive: &ast::directive::Directive) { + match directive { + ast::directive::Directive::Base { base_iri, .. } => { + self.base = Some(Base::new(base_iri.to_string())); + // TODO: Set origin + } + ast::directive::Directive::Prefix { + span, + doc_comment, + prefix, + prefix_iri, + dot, + } => todo!(), + ast::directive::Directive::Import { + span, + doc_comment, + predicate, + arrow, + map, + dot, + } => todo!(), + ast::directive::Directive::Export { + span, + doc_comment, + predicate, + arrow, + map, + dot, + } => todo!(), + ast::directive::Directive::Output { + span, + doc_comment, + predicates, + dot, + } => todo!(), + } } } From 1ff95df6208a7eba599f05bb920370d398506419 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 3 Jul 2024 09:52:14 +0200 Subject: [PATCH 056/214] Translate AST model into logical model (WIP) --- nemo/src/rule_model/program.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 4f6ea9e17..8509f6cb8 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -90,7 +90,7 @@ impl Program { let mut body_builder = head_builder.done().body(); // TODO: Implement a normal iterator to avoid cloning - for (body_index, body_atom) in head.clone().into_iter().enumerate() { + for (body_index, body_atom) in body.clone().into_iter().enumerate() { let origin = Origin::External(body_index); body_builder = body_builder.add_literal(Self::ast_build_literal(origin, &body_atom)); } From a096dc4aaab08b724b90017de26395388655f4b3 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Jul 2024 21:33:38 +0200 Subject: [PATCH 057/214] Add NamedTuple type --- nemo/src/io/parser.rs | 152 +++++++++++--------------- nemo/src/io/parser/ast.rs | 144 ++++++++++++------------ nemo/src/io/parser/ast/atom.rs | 24 ++-- nemo/src/io/parser/ast/named_tuple.rs | 63 +++++++++++ nemo/src/io/parser/ast/term.rs | 53 ++++----- nemo/src/io/parser/ast/tuple.rs | 4 - nemo/src/rule_model/program.rs | 55 ++++++---- 7 files changed, 273 insertions(+), 222 deletions(-) create mode 100644 nemo/src/io/parser/ast/named_tuple.rs diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 3e41b894a..8fc1225b5 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2433,6 +2433,7 @@ pub mod new { use std::borrow::BorrowMut; use std::cell::RefCell; + use super::ast::named_tuple::NamedTuple; use super::ast::{ atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, Wsoc, @@ -2834,14 +2835,14 @@ pub mod new { fn parse_head<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, List<'a, Atom<'a>>, E> { - context(Context::RuleHead, parse_list(parse_head_atoms))(input) + context(Context::RuleHead, parse_list(parse_atoms))(input) } /// Parse the body atoms of a rule. fn parse_body<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, List<'a, Atom<'a>>, E> { - context(Context::RuleBody, parse_list(parse_body_atoms))(input) + context(Context::RuleBody, parse_list(parse_atoms))(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). @@ -3130,26 +3131,8 @@ pub mod new { } } - /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Atom<'a>, E> { - context( - Context::HeadAtoms, - alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), - )(input) - } - - /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( + /// Parse the different atom variants. + fn parse_atoms<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { context( @@ -3223,16 +3206,13 @@ pub mod new { }) } - /// Parse a tuple with an optional name, like `ident(term1, term2)` - /// or just `(int, int, skip)`. + /// Parse a tuple like `(int, int, skip)`. A 1-tuple is denoted `(,)` (with a trailing comma) to distinquish it from parenthesised expressions. fn parse_tuple<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Tuple<'a>, E> { context( Context::Tuple, tuple(( - opt(lex_ident), - wsoc0, open_paren, wsoc0, opt(parse_list(parse_term)), @@ -3241,12 +3221,11 @@ pub mod new { )), )(input) .map( - |(rest_input, (identifier, _ws1, open_paren, _ws2, terms, _ws3, close_paren))| { + |(rest_input, (open_paren, _ws1, terms, _ws2, close_paren))| { ( rest_input, Tuple { span: outer_span(input.input, rest_input.input), - identifier, open_paren, terms, close_paren, @@ -3264,33 +3243,21 @@ pub mod new { E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, - ) -> IResult, Tuple<'a>, E> { + ) -> IResult, NamedTuple<'a>, E> { context( Context::NamedTuple, - tuple(( - alt((lex_prefixed_ident, lex_ident)), - wsoc0, - open_paren, - wsoc0, - opt(parse_list(parse_term)), - wsoc0, - close_paren, - )), + tuple((alt((lex_prefixed_ident, lex_ident)), wsoc0, parse_tuple)), )(input) - .map( - |(rest_input, (identifier, _ws1, open_paren, _ws2, terms, _ws3, close_paren))| { - ( - rest_input, - Tuple { - span: outer_span(input.input, rest_input.input), - identifier: Some(identifier), - open_paren, - terms, - close_paren, - }, - ) - }, - ) + .map(|(rest_input, (identifier, _ws, tuple))| { + ( + rest_input, + NamedTuple { + span: outer_span(input.input, rest_input.input), + identifier, + tuple, + }, + ) + }) } /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. @@ -3911,19 +3878,22 @@ pub mod new { statements: vec![Statement::Fact { span: s!(0, 1, "a(B,C)."), doc_comment: None, - atom: Atom::Positive(Tuple { + atom: Atom::Positive(NamedTuple { span: s!(0, 1, "a(B,C)"), - identifier: Some(s!(0, 1, "a"),), - open_paren: s!(1, 1, "("), - terms: Some(List { - span: s!(2, 1, "B,C"), - first: Term::Primitive(Primitive::Constant(s!(2, 1, "B"),)), - rest: Some(vec![( - s!(3, 1, ","), - Term::Primitive(Primitive::Constant(s!(4, 1, "C"),)), - )]), - }), - close_paren: s!(5, 1, ")"), + identifier: s!(0, 1, "a"), + tuple: Tuple { + span: s!(1, 1, "(B,C)"), + open_paren: s!(1, 1, "("), + terms: Some(List { + span: s!(2, 1, "B,C"), + first: Term::Primitive(Primitive::Constant(s!(2, 1, "B"),)), + rest: Some(vec![( + s!(3, 1, ","), + Term::Primitive(Primitive::Constant(s!(4, 1, "C"),)), + )]), + }), + close_paren: s!(5, 1, ")"), + } }), dot: s!(6, 1, ".") }], @@ -4066,29 +4036,36 @@ pub mod new { Statement::Fact { span: s!(0, 1, "some(Fact, with, whitespace) ."), doc_comment: None, - atom: Atom::Positive(Tuple { + atom: Atom::Positive(NamedTuple { span: s!(0, 1, "some(Fact, with, whitespace)"), - identifier: Some(s!(0, 1, "some"),), - open_paren: s!(4, 1, "("), - terms: Some(List { - span: s!(5, 1, "Fact, with, whitespace"), - first: Term::Primitive(Primitive::Constant(s!(5, 1, "Fact"),)), - rest: Some(vec![ - ( - s!(9, 1, ","), - Term::Primitive(Primitive::Constant(s!(11, 1, "with"))), - ), - ( - s!(15, 1, ","), - Term::Primitive(Primitive::Constant(s!( - 17, - 1, - "whitespace" - ))), - ), - ]), - }), - close_paren: s!(27, 1, ")"), + identifier: s!(0, 1, "some"), + tuple: Tuple { + span: s!(4, 1, "(Fact, with, whitespace)"), + open_paren: s!(4, 1, "("), + terms: Some(List { + span: s!(5, 1, "Fact, with, whitespace"), + first: Term::Primitive(Primitive::Constant(s!( + 5, 1, "Fact" + ),)), + rest: Some(vec![ + ( + s!(9, 1, ","), + Term::Primitive(Primitive::Constant(s!( + 11, 1, "with" + ))), + ), + ( + s!(15, 1, ","), + Term::Primitive(Primitive::Constant(s!( + 17, + 1, + "whitespace" + ))), + ), + ]), + }), + close_paren: s!(27, 1, ")"), + } }), dot: s!(29, 1, "."), }, @@ -4480,7 +4457,6 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(0, 1, "(15+3*2-(7+35)*8)"), - identifier: None, open_paren: T!(OpenParen, 0, 1, "("), terms: Some(List { span: s!(1, 1, "15+3*2-(7+35)*8"), @@ -4522,7 +4498,6 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(8, 1, "(7+35)*8"), lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(8, 1, "(7+35)"), - identifier: None, open_paren: T! {OpenParen, 8, 1, "("}, terms: Some(List { span: s!(9, 1, "7+35"), @@ -4641,7 +4616,6 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(7, 1, "(7+35)*8/3"), lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(7, 1, "(7+35)"), - identifier: None, open_paren: T! {OpenParen, 7,1,"("}, terms: Some(List { span: s!(8, 1, "7+35"), diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 17fc7c492..70643578c 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,6 +1,6 @@ use tower_lsp::lsp_types::SymbolKind; -use crate::io::lexer::{Span, Token}; +use crate::io::lexer::Span; use ascii_tree::{write_tree, Tree}; use std::fmt::Display; @@ -11,6 +11,7 @@ pub mod program; pub(crate) mod statement; pub(crate) mod term; pub(crate) mod tuple; +pub(crate) mod named_tuple; pub trait AstNode: std::fmt::Debug + Display + Sync { fn children(&self) -> Option>; @@ -155,35 +156,19 @@ pub struct List<'a, T> { // (,T)* pub rest: Option, T)>>, } -impl List<'_, T> { - pub fn to_vec(&self) -> Vec { +impl<'a, T> List<'a, T> { + pub fn to_item_vec(&'a self) -> Vec<&'a T> { let mut vec = Vec::new(); - vec.push(self.first.clone()); + vec.push(&self.first); if let Some(rest) = &self.rest { for (_, item) in rest { - vec.push(item.clone()); + vec.push(&item); } } vec } } -impl IntoIterator for List<'_, T> { - type Item = T; - - type IntoIter = std::vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - let mut vec = Vec::new(); - vec.push(self.first); - if let Some(rest) = self.rest { - for (_, item) in rest { - vec.push(item); - } - } - vec.into_iter() - } -} impl AstNode for List<'_, T> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); @@ -235,6 +220,27 @@ impl Display for List<'_, T> { } } +impl<'a, T> IntoIterator for &'a List<'a, T> { + type Item = &'a T; + + type IntoIter = ListIterator<&'a T>; + + fn into_iter(self) -> Self::IntoIter { + ListIterator(self.to_item_vec().into_iter()) + } +} + +#[derive(Debug)] +pub struct ListIterator(std::vec::IntoIter); + +impl Iterator for ListIterator { + type Item = T; + + fn next(&mut self) -> Option { + self.0.next() + } +} + pub(crate) fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { let mut vec = Vec::new(); if let Some(children) = node.children() { @@ -262,6 +268,8 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { } mod test { + use named_tuple::NamedTuple; + use super::*; use super::{ atom::Atom, @@ -321,26 +329,26 @@ mod test { doc_comment: Some( s!(184,7,"%% This is just an example predicate.\n") ), - atom: Atom::Positive(Tuple { + atom: Atom::Positive(NamedTuple { span: s!(222,8,"somePredicate(ConstA, ConstB)"), - identifier: Some( - s!(222, 8, "somePredicate"), - ), - open_paren: - s!(235,8,"(") - , - terms: Some(List { - span: s!(236, 8, "ConstA, ConstB"), - first: Term::Primitive(Primitive::Constant( s!(236, 8, "ConstA"), - )), - rest: Some(vec![( - s!(242, 8, ","), - Term::Primitive(Primitive::Constant( s!(244, 8, "ConstB"), + identifier: s!(222, 8, "somePredicate"), + tuple: Tuple { + span: s!(235,8,"(ConstA, ConstB)"), + open_paren: + s!(235,8,"(") + , + terms: Some(List { + span: s!(236, 8, "ConstA, ConstB"), + first: Term::Primitive(Primitive::Constant( s!(236, 8, "ConstA"), )), - )]), - }), - close_paren: - s!(250,8,")") + rest: Some(vec![( + s!(242, 8, ","), + Term::Primitive(Primitive::Constant( s!(244, 8, "ConstB"), + )), + )]), + }), + close_paren: s!(250,8,")") + } }), dot: s!(251,8,".") @@ -354,43 +362,45 @@ mod test { doc_comment: Some(s!(262,11,"%% This is just an example rule.\n")), head: List { span: s!(295, 12, "someHead(?VarA)"), - first: Atom::Positive(Tuple { + first: Atom::Positive(NamedTuple { span: s!(295,12,"someHead(?VarA)"), - identifier: Some( - s!(295, 12, "someHead"), - ), - open_paren: s!(303,12,"(") , - terms: Some(List { - span: s!(304, 12, "?VarA"), - first: Term::UniversalVariable( s!(304, 12, "?VarA"), - ), - rest: None, - }), - close_paren: s!(309,12,")") , + identifier: s!(295, 12, "someHead"), + tuple: Tuple { + span: s!(303,12,"(?VarA)"), + open_paren: s!(303,12,"(") , + terms: Some(List { + span: s!(304, 12, "?VarA"), + first: Term::UniversalVariable( s!(304, 12, "?VarA"), + ), + rest: None, + }), + close_paren: s!(309,12,")") , + } }), rest: None, }, arrow: s!(311,12,":-"), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), - first: Atom::Positive(Tuple { + first: Atom::Positive(NamedTuple { span: s!(314, 12,"somePredicate(?VarA, ConstB)"), - identifier: Some( - s!(314, 12, "somePredicate"), - ), - open_paren: s!(327,12,"("), - terms: Some(List { - span: s!(328, 12, "?Var, ConstB"), - first: Term::UniversalVariable( s!(328, 12, "?VarA"), - ), - rest: Some(vec![( - s!(333, 12, ","), + identifier: s!(314, 12, "somePredicate"), + tuple: Tuple { + span: s!(327,12,"(?VarA, ConstB)"), + open_paren: s!(327,12,"("), + terms: Some(List { + span: s!(328, 12, "?Var, ConstB"), + first: Term::UniversalVariable( s!(328, 12, "?VarA"), + ), + rest: Some(vec![( + s!(333, 12, ","), - Term::Primitive(Primitive::Constant(s!(335, 12, "ConstB"), - )), - )]), - }), - close_paren: s!(341, 12,")") , + Term::Primitive(Primitive::Constant(s!(335, 12, "ConstB"), + )), + )]), + }), + close_paren: s!(341, 12,")") , + } }), rest: None, }, diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 47ccc2e08..cc0217366 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,19 +1,19 @@ use tower_lsp::lsp_types::SymbolKind; use super::map::Map; +use super::named_tuple::NamedTuple; use super::term::Term; -use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, Range, Wsoc}; -use crate::io::lexer::{Span, Token}; +use super::{ast_to_ascii_tree, AstNode, Range}; +use crate::io::lexer::Span; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub enum Atom<'a> { - Positive(Tuple<'a>), + Positive(NamedTuple<'a>), Negative { span: Span<'a>, neg: Span<'a>, - atom: Tuple<'a>, + atom: NamedTuple<'a>, }, InfixAtom { span: Span<'a>, @@ -25,7 +25,7 @@ pub enum Atom<'a> { } impl Atom<'_> { - fn tuple(&self) -> Option<&Tuple<'_>> { + fn named_tuple(&self) -> Option<&NamedTuple<'_>> { match &self { Atom::Positive(tuple) => Some(tuple), Atom::Negative { atom, .. } => Some(atom), @@ -89,24 +89,24 @@ impl AstNode for Atom<'_> { } fn lsp_identifier(&self) -> Option<(String, String)> { - self.tuple().map(|tuple| { + self.named_tuple().map(|named_tuple| { ( - format!("atom/{}", tuple.identifier.unwrap().span().fragment()), + format!("atom/{}", named_tuple.identifier.fragment()), "file".to_string(), ) }) } fn lsp_range_to_rename(&self) -> Option { - self.tuple() - .and_then(|tuple| tuple.identifier) + self.named_tuple() + .and_then(|named_tuple| Some(named_tuple.identifier)) .map(|identifier| identifier.range()) } fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - match self.tuple() { + match self.named_tuple() { Some(tuple) => Some(( - format!("Atom: {}", tuple.identifier.unwrap().fragment()), + format!("Atom: {}", tuple.identifier.fragment()), SymbolKind::FUNCTION, )), None => Some((String::from("Atom"), SymbolKind::FUNCTION)), diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs new file mode 100644 index 000000000..ef14633b1 --- /dev/null +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -0,0 +1,63 @@ +use std::fmt::Display; + +use crate::io::{lexer::Span, parser::ast::ast_to_ascii_tree}; + +use super::{tuple::Tuple, AstNode}; +use ascii_tree::write_tree; + +#[derive(Debug, Clone, PartialEq)] +pub struct NamedTuple<'a> { + pub span: Span<'a>, + pub identifier: Span<'a>, + pub tuple: Tuple<'a>, +} + +impl AstNode for NamedTuple<'_> { + // NOTE: This flattens the tuple children into the vec. An alternative could be + // vec![&self.identifier, &self.tuple] but then you always have an `Tuple` as an + // child + fn children(&self) -> Option> { + let mut vec: Vec<&dyn AstNode> = vec![&self.identifier]; + if let Some(mut children) = self.tuple.children() { + vec.append(&mut children); + }; + Some(vec) + } + + fn span(&self) -> Span { + self.span + } + + fn is_token(&self) -> bool { + false + } + + fn name(&self) -> String { + format!( + "NamedTuple \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) + } + + fn lsp_identifier(&self) -> Option<(String, String)> { + todo!() + } + + fn lsp_symbol_info(&self) -> Option<(String, tower_lsp::lsp_types::SymbolKind)> { + todo!() + } + + fn lsp_range_to_rename(&self) -> Option { + todo!() + } +} + +impl Display for NamedTuple<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } +} diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index d57044ae6..52671a29a 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,6 +1,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::map::Map; +use super::named_tuple::NamedTuple; use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode, List, Range, Wsoc}; use crate::io::lexer::{Span, Token}; @@ -31,6 +32,7 @@ pub enum Term<'a> { close_paren: Span<'a>, }, Tuple(Box>), + NamedTuple(Box>), Map(Box>), Blank(Span<'a>), } @@ -71,7 +73,8 @@ impl AstNode for Term<'_> { Some(vec) } // TODO: check whether directly the children or Some(vec![named_tuple]) should get returned (for fidelity in ast) - Term::Tuple(named_tuple) => named_tuple.children(), + Term::Tuple(tuple) => tuple.children(), + Term::NamedTuple(named_tuple) => named_tuple.children(), Term::Map(map) => map.children(), Term::Blank(token) => Some(vec![token]), } @@ -79,15 +82,16 @@ impl AstNode for Term<'_> { fn span(&self) -> Span { match self { - Term::Primitive(t) => t.span(), - Term::UniversalVariable(t) => t.span(), - Term::ExistentialVariable(t) => t.span(), + Term::Primitive(p) => p.span(), + Term::UniversalVariable(span) => *span, + Term::ExistentialVariable(span) => *span, Term::UnaryPrefix { span, .. } => *span, Term::Binary { span, .. } => *span, Term::Aggregation { span, .. } => *span, - Term::Tuple(named_tuple) => named_tuple.span(), + Term::Tuple(tuple) => tuple.span(), + Term::NamedTuple(named_tuple) => named_tuple.span(), Term::Map(map) => map.span(), - Term::Blank(t) => t.span(), + Term::Blank(span) => *span, } } @@ -114,13 +118,8 @@ impl AstNode for Term<'_> { Term::UnaryPrefix { .. } => name!("Unary Term"), Term::Binary { .. } => name!("Binary Term"), Term::Aggregation { .. } => name!("Aggregation"), - Term::Tuple(f) => { - if f.identifier.is_some() { - name!("Function Symbol") - } else { - name!("Tuple") - } - } + Term::Tuple(_) => name!("Tuple"), + Term::NamedTuple(_) => name!("Function"), Term::Map(_) => name!("Map"), Term::Blank(_) => name!("Blank"), } @@ -136,12 +135,10 @@ impl AstNode for Term<'_> { format!("aggregation/{}", operation.span().fragment()), "file".to_string(), )), - Term::Tuple(tuple) => tuple.identifier.map(|identifier| { - ( - format!("function/{}", identifier.span().fragment()), - "file".to_string(), - ) - }), + Term::NamedTuple(named_tuple) => Some(( + format!("function/{}", named_tuple.identifier.span().fragment()), + "file".to_string(), + )), _ => None, } } @@ -155,7 +152,8 @@ impl AstNode for Term<'_> { Term::ExistentialVariable(t) => Some(t.range()), Term::Binary { .. } => None, Term::Aggregation { operation, .. } => Some(operation.range()), - Term::Tuple(tuple) => tuple.identifier.map(|identifier| identifier.range()), + Term::Tuple(_) => None, + Term::NamedTuple(named_tuple) => Some(named_tuple.identifier.range()), Term::Map(_map) => None, } } @@ -176,16 +174,11 @@ impl AstNode for Term<'_> { format!("Aggregation: {}", operation.fragment()), SymbolKind::OPERATOR, )), - Term::Tuple(tuple) => { - if let Some(identifier) = tuple.identifier { - Some(( - format!("Function: {}", identifier.fragment()), - SymbolKind::OPERATOR, - )) - } else { - Some((String::from("Tuple"), SymbolKind::ARRAY)) - } - } + Term::Tuple(_) => Some((String::from("Tuple"), SymbolKind::ARRAY)), + Term::NamedTuple(named_tuple) => Some(( + format!("Function: {}", named_tuple.identifier.fragment()), + SymbolKind::OPERATOR, + )), Term::Map(_map) => Some((String::from("Map"), SymbolKind::ARRAY)), } } diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index d3f5e7625..1442632c7 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -8,7 +8,6 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub struct Tuple<'a> { pub span: Span<'a>, - pub identifier: Option>, pub open_paren: Span<'a>, pub terms: Option>>, pub close_paren: Span<'a>, @@ -17,9 +16,6 @@ pub struct Tuple<'a> { impl AstNode for Tuple<'_> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(identifier) = &self.identifier { - vec.push(identifier); - } vec.push(&self.open_paren); if let Some(terms) = &self.terms { vec.push(terms); diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 8509f6cb8..907ad82d0 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -77,8 +77,8 @@ impl Program { let rule_builder = RuleBuilder::default().origin(origin); let mut head_builder = rule_builder.head(); - // TODO: Implement a normal iterator to avoid cloning - for (head_index, head_atom) in head.clone().into_iter().enumerate() { + // TODO: check whether the list iterator implementation is good + for (head_index, head_atom) in head.to_item_vec().iter().enumerate() { let origin = Origin::External(head_index); if let Literal::Positive(atom) = Self::ast_build_literal(origin, &head_atom) { head_builder = head_builder.add_atom(atom); @@ -89,8 +89,8 @@ impl Program { let mut body_builder = head_builder.done().body(); - // TODO: Implement a normal iterator to avoid cloning - for (body_index, body_atom) in body.clone().into_iter().enumerate() { + // TODO: check wether the list iterator implementation is good + for (body_index, body_atom) in body.into_iter().enumerate() { let origin = Origin::External(body_index); body_builder = body_builder.add_literal(Self::ast_build_literal(origin, &body_atom)); } @@ -128,13 +128,10 @@ impl Program { } } - fn ast_build_atom(origin: Origin, atom: &ast::tuple::Tuple) -> Atom { - let predicate_name = atom - .identifier - .expect("Atom must have a predicate name") - .to_string(); - let subterms = match &atom.terms { - Some(terms) => terms.to_vec(), + fn ast_build_atom(origin: Origin, atom: &ast::named_tuple::NamedTuple) -> Atom { + let predicate_name = atom.identifier.to_string(); + let subterms = match &atom.tuple.terms { + Some(terms) => terms.to_item_vec(), None => vec![], }; @@ -181,6 +178,9 @@ impl Program { todo!() } ast::term::Term::Tuple(tuple) => Self::ast_build_inner_tuple(origin, tuple), + ast::term::Term::NamedTuple(named_tuple) => { + Self::ast_build_inner_named_tuple(origin, named_tuple) + } ast::term::Term::Map(_) => todo!(), ast::term::Term::Blank(_) => todo!(), } @@ -219,7 +219,7 @@ impl Program { fn ast_build_inner_tuple(origin: Origin, tuple: &ast::tuple::Tuple) -> Term { let subterms = match &tuple.terms { - Some(terms) => terms.to_vec(), + Some(terms) => terms.to_item_vec(), None => vec![], }; @@ -230,14 +230,29 @@ impl Program { translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); } - match tuple.identifier { - Some(name) => match OperationKind::from_name(&name.to_string()) { - Some(kind) => Term::Operation(Operation::new(kind, translated_subterms)), - None => { - Term::FunctionTerm(FunctionTerm::new(&name.to_string(), translated_subterms)) - } - }, - None => Term::Tuple(Tuple::new(translated_subterms)), + Term::Tuple(Tuple::new(translated_subterms)) + } + + fn ast_build_inner_named_tuple( + origin: Origin, + named_tuple: &ast::named_tuple::NamedTuple, + ) -> Term { + let subterms = match &named_tuple.tuple.terms { + Some(terms) => terms.to_item_vec(), + None => vec![], + }; + + let mut translated_subterms = Vec::new(); + + for (term_index, subterm) in subterms.into_iter().enumerate() { + let origin = Origin::External(term_index); + translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); + } + + let name = &named_tuple.identifier.to_string(); + match OperationKind::from_name(name) { + Some(kind) => Term::Operation(Operation::new(kind, translated_subterms)), + None => Term::FunctionTerm(FunctionTerm::new(name, translated_subterms)), } } From a803f73de8c4772c9bf8a1e947745877db708632 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 8 Feb 2024 22:24:14 +0100 Subject: [PATCH 058/214] Add module, enum and first few lexer functions --- nemo/src/io.rs | 1 + nemo/src/io/lexer.rs | 198 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 nemo/src/io/lexer.rs diff --git a/nemo/src/io.rs b/nemo/src/io.rs index e2fdb18ef..46defcab6 100644 --- a/nemo/src/io.rs +++ b/nemo/src/io.rs @@ -6,6 +6,7 @@ pub mod compression_format; pub mod export_manager; pub mod formats; pub mod import_manager; +pub mod lexer; pub mod parser; pub mod resource_providers; diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs new file mode 100644 index 000000000..25e0119db --- /dev/null +++ b/nemo/src/io/lexer.rs @@ -0,0 +1,198 @@ +//! Lexical tokenization of rulewerk-style rules. + +use nom::{ + branch::alt, + bytes::complete::{is_a, is_not, tag}, + character::complete::multispace0, + combinator::{peek, recognize}, + error::ParseError, + multi::many0, + sequence::{delimited, tuple}, + IResult, Parser, +}; +use nom_locate::LocatedSpan; + +type Span<'a> = LocatedSpan<&'a str>; + +/// All the tokens the input gets parsed into. +#[derive(Debug, PartialEq)] +enum Token<'a> { + // Directives + Base(Span<'a>), + Prefix(Span<'a>), + Import(Span<'a>), + Export(Span<'a>), + // Syntactic symbols + QuestionMark(Span<'a>), + BracketOpen(Span<'a>), + BracketClose(Span<'a>), + SquaredBracketOpen(Span<'a>), + SquaredBracketClose(Span<'a>), + CurlyBracketOpen(Span<'a>), + CurlyBracketClose(Span<'a>), + Dot(Span<'a>), + Comma(Span<'a>), + Colon(Span<'a>), + ImplicationArrow(Span<'a>), + Greater(Span<'a>), + Equal(Span<'a>), + Less(Span<'a>), + Not(Span<'a>), + DoubleCaret(Span<'a>), + Hash(Span<'a>), + Underscore(Span<'a>), + AtSign(Span<'a>), + // Names or values + Identifier(Span<'a>), + IRI(Span<'a>), + Integer(Span<'a>), + Float(Span<'a>), + String(Span<'a>), + // miscellaneous + Comment(Span<'a>), + Illegal(Span<'a>), + EOF(Span<'a>), +} + +// FIXME: Figure out when erros occur +fn tokenize<'a>(input: Span<'a>) -> Vec> { + let (rest, vec) = many0(ignore_ws(alt((comment, base, prefix, import, export))))(input) + .expect("An error occured"); + vec +} + +fn ignore_ws<'a, F, O, E: ParseError>>( + inner: F, +) -> impl FnMut(Span<'a>) -> IResult, O, E> +where + F: Parser, O, E>, +{ + delimited(multispace0, inner, multispace0) +} + +fn comment<'a>(input: Span<'a>) -> IResult, Token<'a>> { + recognize(tuple(( + tag("%"), + is_not("\n\r"), + alt((tag("\n\r"), tag("\n"))), + )))(input) + .map(|(rest, span)| (rest, Token::Comment(span))) +} + +/// Recognize the `@base` directive +fn base<'a>(input: Span<'a>) -> IResult, Token<'a>> { + tag("@base")(input).map(|(rest, span)| (rest, Token::Base(span))) +} + +fn prefix<'a>(input: Span<'a>) -> IResult, Token<'a>> { + tag("@prefix")(input).map(|(rest, span)| (rest, Token::Prefix(span))) +} + +fn import<'a>(input: Span<'a>) -> IResult, Token<'a>> { + tag("@import")(input).map(|(rest, span)| (rest, Token::Import(span))) +} + +fn export<'a>(input: Span<'a>) -> IResult, Token<'a>> { + tag("@export")(input).map(|(rest, span)| (rest, Token::Export(span))) +} + +#[cfg(test)] +mod test { + use nom::multi::many0; + + use super::{Span, Token}; + // is `tag` the right denomination? + #[test] + fn base_tag() { + assert_eq!( + super::base(Span::new("@base")).unwrap().1, + Token::Base(unsafe { Span::new_from_raw_offset(0, 1, "@base", ()) }) + ); + } + + // is `tag` the right denomination? + #[test] + fn prefix_tag() { + assert_eq!( + super::prefix(Span::new("@prefix")).unwrap().1, + Token::Prefix(unsafe { Span::new_from_raw_offset(0, 1, "@prefix", ()) }) + ); + } + + // is `tag` the right denomination? + #[test] + fn import_tag() { + assert_eq!( + super::import(Span::new("@import")).unwrap().1, + Token::Import(unsafe { Span::new_from_raw_offset(0, 1, "@import", ()) }) + ); + } + + // is `tag` the right denomination? + #[test] + fn export_tag() { + assert_eq!( + super::export(Span::new("@export")).unwrap().1, + Token::Export(unsafe { Span::new_from_raw_offset(0, 1, "@export", ()) }) + ); + } + + #[test] + fn comment() { + assert_eq!( + super::comment(Span::new( + "% Some meaningful comment with some other %'s in it\n" + )) + .unwrap() + .1, + Token::Comment(unsafe { + Span::new_from_raw_offset( + 0, + 1, + "% Some meaningful comment with some other %'s in it\n", + (), + ) + }) + ); + assert_eq!( + super::comment(Span::new( + "% Some meaningful comment with some other %'s in it\n\r" + )) + .unwrap() + .1, + Token::Comment(unsafe { + Span::new_from_raw_offset( + 0, + 1, + "% Some meaningful comment with some other %'s in it\n\r", + (), + ) + }) + ); + assert_eq!( + super::comment(Span::new( + "% Some meaningful comment\n%that is more than one line long\n" + )) + .unwrap() + .1, + Token::Comment(unsafe { + Span::new_from_raw_offset(0, 1, "% Some meaningful comment\n", ()) + }) + ); + assert_eq!( + many0(super::comment)(Span::new( + "% Some meaningful comment\n%that is more than one line long\n" + )) + .unwrap() + .1, + vec![ + Token::Comment(unsafe { + Span::new_from_raw_offset(0, 1, "% Some meaningful comment\n", ()) + }), + Token::Comment(unsafe { + Span::new_from_raw_offset(26, 2, "%that is more than one line long\n", ()) + }) + ] + ); + } +} From eae7f57b0a3d6de8ff82d3f2daa3df55b2c6d134 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 15 Feb 2024 19:27:32 +0100 Subject: [PATCH 059/214] Switch to character based tokenizer --- Cargo.lock | 1 + nemo/Cargo.toml | 1 + nemo/src/io/lexer.rs | 552 +++++++++++++++++++++++++++++++------------ 3 files changed, 401 insertions(+), 153 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a2bf0955..46134e7f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1186,6 +1186,7 @@ dependencies = [ "test-log", "thiserror", "tokio", + "unicode-ident", ] [[package]] diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 5ccc66f02..173a3de22 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -45,6 +45,7 @@ ascii_tree = "0.1.1" serde_json = "1.0.108" serde = {version = "1.0.138", features = ["derive"] } dyn-clone = "1.0.16" +unicode-ident = "1.0.12" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 25e0119db..a8a4b8415 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1,198 +1,444 @@ //! Lexical tokenization of rulewerk-style rules. -use nom::{ - branch::alt, - bytes::complete::{is_a, is_not, tag}, - character::complete::multispace0, - combinator::{peek, recognize}, - error::ParseError, - multi::many0, - sequence::{delimited, tuple}, - IResult, Parser, -}; -use nom_locate::LocatedSpan; - -type Span<'a> = LocatedSpan<&'a str>; +use std::str::Chars; -/// All the tokens the input gets parsed into. -#[derive(Debug, PartialEq)] -enum Token<'a> { - // Directives - Base(Span<'a>), - Prefix(Span<'a>), - Import(Span<'a>), - Export(Span<'a>), - // Syntactic symbols - QuestionMark(Span<'a>), - BracketOpen(Span<'a>), - BracketClose(Span<'a>), - SquaredBracketOpen(Span<'a>), - SquaredBracketClose(Span<'a>), - CurlyBracketOpen(Span<'a>), - CurlyBracketClose(Span<'a>), - Dot(Span<'a>), - Comma(Span<'a>), - Colon(Span<'a>), - ImplicationArrow(Span<'a>), - Greater(Span<'a>), - Equal(Span<'a>), - Less(Span<'a>), - Not(Span<'a>), - DoubleCaret(Span<'a>), - Hash(Span<'a>), - Underscore(Span<'a>), - AtSign(Span<'a>), - // Names or values - Identifier(Span<'a>), - IRI(Span<'a>), - Integer(Span<'a>), - Float(Span<'a>), - String(Span<'a>), - // miscellaneous - Comment(Span<'a>), - Illegal(Span<'a>), - EOF(Span<'a>), -} +const EOF_CHAR: char = '\0'; -// FIXME: Figure out when erros occur -fn tokenize<'a>(input: Span<'a>) -> Vec> { - let (rest, vec) = many0(ignore_ws(alt((comment, base, prefix, import, export))))(input) - .expect("An error occured"); - vec +#[derive(Debug)] +struct Lexer<'a> { + chars: Chars<'a>, } -fn ignore_ws<'a, F, O, E: ParseError>>( - inner: F, -) -> impl FnMut(Span<'a>) -> IResult, O, E> -where - F: Parser, O, E>, -{ - delimited(multispace0, inner, multispace0) -} +impl Lexer<'_> { + fn new(input: &str) -> Lexer { + Lexer { + chars: input.chars(), + } + } + fn peek(&self, count: usize) -> char { + self.chars.clone().nth(count - 1).unwrap_or(EOF_CHAR) + } + fn bump(&mut self) -> Option { + self.chars.next() + } + fn is_eof(&self) -> bool { + self.chars.as_str().is_empty() + } + fn bump_while(&mut self, mut predicate: impl FnMut(char) -> bool) { + while predicate(self.peek(1)) && !self.is_eof() { + self.bump(); + } + } + fn advance_token(&mut self) -> TokenKind { + use TokenKind::*; + let first_char = match self.bump() { + Some(c) => c, + None => return Eof, + }; + match first_char { + '%' => match (self.peek(1), self.peek(2)) { + (n1, n2) if n1.is_digit(16) && n2.is_digit(16) => self.pct_encoded(), + _ => self.comment(), + }, + '\n' => Whitespace(true), + c if is_whitespace(c) => self.whitespace(), + c if unicode_ident::is_xid_start(c) => self.ident(), + c @ '0'..='9' => self.number(), + '?' => QuestionMark, + '!' => ExclamationMark, + '(' => OpenParen, + ')' => CloseParen, + '[' => OpenBracket, + ']' => CloseBracket, + '{' => OpenBrace, + '}' => CloseBrace, + '.' => Dot, + ',' => Comma, + ':' => Colon, + ';' => Semicolon, + '>' => Greater, + '=' => Equal, + '<' => Less, + '~' => Tilde, + '^' => Caret, + '#' => Hash, + '_' => Underscore, + '@' => At, + '+' => Plus, + '-' => Minus, + '*' => Star, + '/' => Slash, + '$' => Dollar, + '&' => Ampersand, + '\'' => Apostrophe, + _ => todo!(), + } + } -fn comment<'a>(input: Span<'a>) -> IResult, Token<'a>> { - recognize(tuple(( - tag("%"), - is_not("\n\r"), - alt((tag("\n\r"), tag("\n"))), - )))(input) - .map(|(rest, span)| (rest, Token::Comment(span))) + fn number(&mut self) -> TokenKind { + self.bump_while(is_hex_digit); + TokenKind::Number + } + fn pct_encoded(&mut self) -> TokenKind { + self.bump(); + self.bump(); + TokenKind::PctEncoded + } + fn comment(&mut self) -> TokenKind { + self.bump_while(|c| c != '\n'); + self.bump(); + TokenKind::Comment + } + fn whitespace(&mut self) -> TokenKind { + self.bump_while(|c| is_whitespace(c) && c != '\n'); + if '\n' == self.peek(1) { + self.bump(); + return TokenKind::Whitespace(true); + } + TokenKind::Whitespace(false) + } + fn ident(&mut self) -> TokenKind { + self.bump_while(unicode_ident::is_xid_continue); + TokenKind::Ident + } } -/// Recognize the `@base` directive -fn base<'a>(input: Span<'a>) -> IResult, Token<'a>> { - tag("@base")(input).map(|(rest, span)| (rest, Token::Base(span))) +fn is_hex_digit(c: char) -> bool { + c.is_digit(16) } -fn prefix<'a>(input: Span<'a>) -> IResult, Token<'a>> { - tag("@prefix")(input).map(|(rest, span)| (rest, Token::Prefix(span))) +fn is_whitespace(c: char) -> bool { + // support also vertical tab, form feed, NEXT LINE (latin1), + // LEFT-TO-RIGHT MARK, RIGHT-TO-LEFT MARK, LINE SEPARATOR and PARAGRAPH SEPARATOR? + matches!(c, ' ' | '\n' | '\t' | '\r') } -fn import<'a>(input: Span<'a>) -> IResult, Token<'a>> { - tag("@import")(input).map(|(rest, span)| (rest, Token::Import(span))) +fn is_ident(s: &str) -> bool { + let mut chars = s.chars(); + if let Some(char) = chars.next() { + unicode_ident::is_xid_start(char) && chars.all(unicode_ident::is_xid_continue) + } else { + false + } } -fn export<'a>(input: Span<'a>) -> IResult, Token<'a>> { - tag("@export")(input).map(|(rest, span)| (rest, Token::Export(span))) +/// All the tokens the input gets parsed into. +#[derive(Debug, PartialEq, Copy, Clone)] +enum TokenKind { + // Syntactic symbols: + /// '?' + QuestionMark, + /// '!' + ExclamationMark, + /// '(' + OpenParen, + /// ')' + CloseParen, + /// '[' + OpenBracket, + /// ']' + CloseBracket, + /// '{' + OpenBrace, + /// '}' + CloseBrace, + /// '.' + Dot, + /// ',' + Comma, + /// ':' + Colon, + /// ';' + Semicolon, + /// '>' + Greater, + /// '=' + Equal, + /// '<' + Less, + /// '~' + Tilde, + /// '^' + Caret, + /// '#' + Hash, + /// '_' + Underscore, + /// '@' + At, + /// '+' + Plus, + /// '-' + Minus, + /// '*' + Star, + /// '/' + Slash, + /// '$' + Dollar, + /// '&' + Ampersand, + /// "'" + Apostrophe, + // Multi-char tokens: + /// Identifier for keywords and predicate names + Ident, + /// All other Utf8 characters that can be used in an IRI + Utf8Chars, + /// Percent-encoded characters in IRIs + PctEncoded, + /// Base 10 digits + Number, + /// A string literal + String, + /// A comment, starting with `%` + Comment, + /// A comment, starting with `%%` + DocComment, + /// bool: ends_with_newline + Whitespace(bool), + /// catch all token + Illegal, + /// signals end of file + Eof, } #[cfg(test)] mod test { - use nom::multi::many0; + use super::TokenKind::*; + use crate::io::lexer::Lexer; - use super::{Span, Token}; - // is `tag` the right denomination? #[test] - fn base_tag() { + fn tokenize() { assert_eq!( - super::base(Span::new("@base")).unwrap().1, - Token::Base(unsafe { Span::new_from_raw_offset(0, 1, "@base", ()) }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("P(?X) :- A(?X).\t\n A(Human)."); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Ident, + OpenParen, + QuestionMark, + Ident, + CloseParen, + Whitespace(false), + Colon, + Minus, + Whitespace(false), + Ident, + OpenParen, + QuestionMark, + Ident, + CloseParen, + Dot, + Whitespace(true), + Whitespace(false), + Ident, + OpenParen, + Ident, + CloseParen, + Dot, + Eof + ] + ) } - // is `tag` the right denomination? #[test] - fn prefix_tag() { + fn comment() { assert_eq!( - super::prefix(Span::new("@prefix")).unwrap().1, - Token::Prefix(unsafe { Span::new_from_raw_offset(0, 1, "@prefix", ()) }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("% Some Comment\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![Comment, Eof] + ) } - // is `tag` the right denomination? #[test] - fn import_tag() { + fn pct_enc_with_comment() { assert_eq!( - super::import(Span::new("@import")).unwrap().1, - Token::Import(unsafe { Span::new_from_raw_offset(0, 1, "@import", ()) }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("%38%a3% Some Comment\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![PctEncoded, PctEncoded, Comment, Eof] + ) } - // is `tag` the right denomination? #[test] - fn export_tag() { + fn ident() { assert_eq!( - super::export(Span::new("@export")).unwrap().1, - Token::Export(unsafe { Span::new_from_raw_offset(0, 1, "@export", ()) }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("some_Ident(Alice). %comment at the end of a line\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Ident, + OpenParen, + Ident, + CloseParen, + Dot, + Whitespace(false), + Comment, + Eof + ] + ) } #[test] - fn comment() { + #[should_panic] + fn forbidden_ident() { assert_eq!( - super::comment(Span::new( - "% Some meaningful comment with some other %'s in it\n" - )) - .unwrap() - .1, - Token::Comment(unsafe { - Span::new_from_raw_offset( - 0, - 1, - "% Some meaningful comment with some other %'s in it\n", - (), - ) - }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("_someIdent(Alice). %comment at the end of a line\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Ident, + OpenParen, + Ident, + CloseParen, + Dot, + Whitespace(false), + Comment, + Eof + ] + ) + } + + #[test] + fn iri() { assert_eq!( - super::comment(Span::new( - "% Some meaningful comment with some other %'s in it\n\r" - )) - .unwrap() - .1, - Token::Comment(unsafe { - Span::new_from_raw_offset( - 0, - 1, - "% Some meaningful comment with some other %'s in it\n\r", - (), - ) - }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new(""); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Less, Ident, Colon, Slash, Slash, Ident, Dot, Ident, Dot, Ident, Slash, Greater, + Eof + ] + ) + } + + #[test] + fn iri_pct_enc() { assert_eq!( - super::comment(Span::new( - "% Some meaningful comment\n%that is more than one line long\n" - )) - .unwrap() - .1, - Token::Comment(unsafe { - Span::new_from_raw_offset(0, 1, "% Some meaningful comment\n", ()) - }) - ); + { + let mut vec = vec![]; + let mut lexer = Lexer::new("\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, + vec![ + Less, + Ident, + Colon, + Slash, + Slash, + Ident, + PctEncoded, + PctEncoded, + Ident, + PctEncoded, + PctEncoded, + Dot, + Ident, + Dot, + Ident, + Greater, + Whitespace(true), + Eof + ] + ) + } + + #[test] + fn pct_enc_comment() { assert_eq!( - many0(super::comment)(Span::new( - "% Some meaningful comment\n%that is more than one line long\n" - )) - .unwrap() - .1, + { + let mut vec = vec![]; + let mut lexer = Lexer::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); + loop { + let tok = lexer.advance_token(); + vec.push(tok.clone()); + if tok == Eof { + break; + } + } + vec + }, vec![ - Token::Comment(unsafe { - Span::new_from_raw_offset(0, 1, "% Some meaningful comment\n", ()) - }), - Token::Comment(unsafe { - Span::new_from_raw_offset(26, 2, "%that is more than one line long\n", ()) - }) + PctEncoded, + Whitespace(false), + Ident, + Whitespace(false), + Ident, + Whitespace(false), + Ident, + Whitespace(false), + Ident, + Whitespace(false), + Ident, + Comma, + Whitespace(true), + Comment, + Comment, + Eof ] - ); + ) } } From b21cd174eae5339783190f391445ebb25a86a399 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Sat, 17 Feb 2024 08:07:29 +0100 Subject: [PATCH 060/214] Add ucschar and iprivate lexing for IRIs --- nemo/src/io/lexer.rs | 58 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index a8a4b8415..806a786c4 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -71,6 +71,26 @@ impl Lexer<'_> { '$' => Dollar, '&' => Ampersand, '\'' => Apostrophe, + '\u{A0}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFEF}' + | '\u{10000}'..='\u{1FFFD}' + | '\u{20000}'..='\u{2FFFD}' + | '\u{30000}'..='\u{3FFFD}' + | '\u{40000}'..='\u{4FFFD}' + | '\u{50000}'..='\u{5FFFD}' + | '\u{60000}'..='\u{6FFFD}' + | '\u{70000}'..='\u{7FFFD}' + | '\u{80000}'..='\u{8FFFD}' + | '\u{90000}'..='\u{9FFFD}' + | '\u{A0000}'..='\u{AFFFD}' + | '\u{B0000}'..='\u{BFFFD}' + | '\u{C0000}'..='\u{CFFFD}' + | '\u{D0000}'..='\u{DFFFD}' + | '\u{E1000}'..='\u{EFFFD}' => self.ucschar(), + '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}' => { + self.iprivate() + } _ => todo!(), } } @@ -101,6 +121,16 @@ impl Lexer<'_> { self.bump_while(unicode_ident::is_xid_continue); TokenKind::Ident } + + fn ucschar(&mut self) -> TokenKind { + self.bump_while(is_ucschar); + TokenKind::UcsChars + } + + fn iprivate(&mut self) -> TokenKind { + self.bump_while(is_iprivate); + TokenKind::Iprivate + } } fn is_hex_digit(c: char) -> bool { @@ -122,6 +152,30 @@ fn is_ident(s: &str) -> bool { } } +fn is_ucschar(c: char) -> bool { + matches!(c, '\u{A0}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFEF}' + | '\u{10000}'..='\u{1FFFD}' + | '\u{20000}'..='\u{2FFFD}' + | '\u{30000}'..='\u{3FFFD}' + | '\u{40000}'..='\u{4FFFD}' + | '\u{50000}'..='\u{5FFFD}' + | '\u{60000}'..='\u{6FFFD}' + | '\u{70000}'..='\u{7FFFD}' + | '\u{80000}'..='\u{8FFFD}' + | '\u{90000}'..='\u{9FFFD}' + | '\u{A0000}'..='\u{AFFFD}' + | '\u{B0000}'..='\u{BFFFD}' + | '\u{C0000}'..='\u{CFFFD}' + | '\u{D0000}'..='\u{DFFFD}' + | '\u{E1000}'..='\u{EFFFD}') +} + +fn is_iprivate(c: char) -> bool { + matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') +} + /// All the tokens the input gets parsed into. #[derive(Debug, PartialEq, Copy, Clone)] enum TokenKind { @@ -184,7 +238,9 @@ enum TokenKind { /// Identifier for keywords and predicate names Ident, /// All other Utf8 characters that can be used in an IRI - Utf8Chars, + UcsChars, + /// Characters in private use areas + Iprivate, /// Percent-encoded characters in IRIs PctEncoded, /// Base 10 digits From a4309b3d1af5dde704c8a8e0daca01a9b48f8065 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 20 Feb 2024 15:49:40 +0100 Subject: [PATCH 061/214] Add loop and Span to tokenizer --- nemo/src/io/lexer.rs | 586 +++++++++++++++++++++++++------------------ 1 file changed, 343 insertions(+), 243 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 806a786c4..697236b3c 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -4,17 +4,50 @@ use std::str::Chars; const EOF_CHAR: char = '\0'; -#[derive(Debug)] +#[derive(Debug, Copy, Clone, PartialEq)] +struct Span<'a> { + offset: usize, + line: usize, + // size: usize, + fragment: &'a str, +} +// impl<'a> Span<'a> { +impl<'a> Span<'a> { + fn new(offset: usize, line: usize, input: &'a str) -> Span<'a> { + // fn new(offset: usize, line: usize, size: usize) -> Span { + Span { + offset, + line, + fragment: input, + // size, + } + } +} + +#[derive(Debug, Clone)] struct Lexer<'a> { + input: &'a str, + len_remaining: usize, + offset: usize, + lines: usize, chars: Chars<'a>, } - -impl Lexer<'_> { - fn new(input: &str) -> Lexer { +impl<'a> Lexer<'a> { + fn new(input: &'a str) -> Lexer<'a> { Lexer { + input, + len_remaining: input.len(), + offset: 0, + lines: 1, chars: input.chars(), } } + fn consumed_char_length(&self) -> usize { + self.len_remaining - self.chars.as_str().len() + } + fn update_remaining_len(&mut self) { + self.len_remaining = self.chars.as_str().len(); + } fn peek(&self, count: usize) -> char { self.chars.clone().nth(count - 1).unwrap_or(EOF_CHAR) } @@ -29,69 +62,102 @@ impl Lexer<'_> { self.bump(); } } - fn advance_token(&mut self) -> TokenKind { + fn get_tokens(&mut self) -> Vec { use TokenKind::*; - let first_char = match self.bump() { - Some(c) => c, - None => return Eof, - }; - match first_char { - '%' => match (self.peek(1), self.peek(2)) { - (n1, n2) if n1.is_digit(16) && n2.is_digit(16) => self.pct_encoded(), - _ => self.comment(), - }, - '\n' => Whitespace(true), - c if is_whitespace(c) => self.whitespace(), - c if unicode_ident::is_xid_start(c) => self.ident(), - c @ '0'..='9' => self.number(), - '?' => QuestionMark, - '!' => ExclamationMark, - '(' => OpenParen, - ')' => CloseParen, - '[' => OpenBracket, - ']' => CloseBracket, - '{' => OpenBrace, - '}' => CloseBrace, - '.' => Dot, - ',' => Comma, - ':' => Colon, - ';' => Semicolon, - '>' => Greater, - '=' => Equal, - '<' => Less, - '~' => Tilde, - '^' => Caret, - '#' => Hash, - '_' => Underscore, - '@' => At, - '+' => Plus, - '-' => Minus, - '*' => Star, - '/' => Slash, - '$' => Dollar, - '&' => Ampersand, - '\'' => Apostrophe, - '\u{A0}'..='\u{D7FF}' - | '\u{F900}'..='\u{FDCF}' - | '\u{FDF0}'..='\u{FFEF}' - | '\u{10000}'..='\u{1FFFD}' - | '\u{20000}'..='\u{2FFFD}' - | '\u{30000}'..='\u{3FFFD}' - | '\u{40000}'..='\u{4FFFD}' - | '\u{50000}'..='\u{5FFFD}' - | '\u{60000}'..='\u{6FFFD}' - | '\u{70000}'..='\u{7FFFD}' - | '\u{80000}'..='\u{8FFFD}' - | '\u{90000}'..='\u{9FFFD}' - | '\u{A0000}'..='\u{AFFFD}' - | '\u{B0000}'..='\u{BFFFD}' - | '\u{C0000}'..='\u{CFFFD}' - | '\u{D0000}'..='\u{DFFFD}' - | '\u{E1000}'..='\u{EFFFD}' => self.ucschar(), - '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}' => { - self.iprivate() - } - _ => todo!(), + let mut vec = Vec::new(); + loop { + let old_line_num = self.lines; + let first_char = match self.bump() { + Some(c) => c, + None => { + let eof_tok = Token::new( + Eof, + Span::new( + self.offset, + self.lines, + &self.input[self.offset..self.offset], + ), + ); + vec.push(eof_tok); + return vec; + } + }; + let token_kind = match first_char { + '%' => match (self.peek(1), self.peek(2)) { + (n1, n2) if n1.is_digit(16) && n2.is_digit(16) => self.pct_encoded(), + _ => self.comment(), + }, + '\n' => { + self.lines += 1; + Whitespace + } + c if is_whitespace(c) => self.whitespace(), + c if unicode_ident::is_xid_start(c) => self.ident(), + c @ '0'..='9' => self.number(), + '?' => QuestionMark, + '!' => ExclamationMark, + '(' => OpenParen, + ')' => CloseParen, + '[' => OpenBracket, + ']' => CloseBracket, + '{' => OpenBrace, + '}' => CloseBrace, + '.' => Dot, + ',' => Comma, + ':' => Colon, + ';' => Semicolon, + '>' => Greater, + '=' => Equal, + '<' => Less, + '~' => Tilde, + '^' => Caret, + '#' => Hash, + '_' => Underscore, + '@' => At, + '+' => Plus, + '-' => Minus, + '*' => Star, + '/' => Slash, + '$' => Dollar, + '&' => Ampersand, + '\'' => Apostrophe, + '\u{A0}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFEF}' + | '\u{10000}'..='\u{1FFFD}' + | '\u{20000}'..='\u{2FFFD}' + | '\u{30000}'..='\u{3FFFD}' + | '\u{40000}'..='\u{4FFFD}' + | '\u{50000}'..='\u{5FFFD}' + | '\u{60000}'..='\u{6FFFD}' + | '\u{70000}'..='\u{7FFFD}' + | '\u{80000}'..='\u{8FFFD}' + | '\u{90000}'..='\u{9FFFD}' + | '\u{A0000}'..='\u{AFFFD}' + | '\u{B0000}'..='\u{BFFFD}' + | '\u{C0000}'..='\u{CFFFD}' + | '\u{D0000}'..='\u{DFFFD}' + | '\u{E1000}'..='\u{EFFFD}' => self.ucschar(), + '\u{E000}'..='\u{F8FF}' + | '\u{F0000}'..='\u{FFFFD}' + | '\u{100000}'..='\u{10FFFD}' => self.iprivate(), + _ => todo!(), + }; + let tok_len = self.consumed_char_length(); + + // let fragment = &*self.input; + let token = Token::new( + token_kind, + Span::new( + self.offset, + old_line_num, + &self.input[self.offset..(self.offset + tok_len)], + ), + // Span::new(self.offset, self.lines, tok_len), + ); + self.offset += tok_len; + self.update_remaining_len(); + vec.push(token); } } @@ -107,15 +173,17 @@ impl Lexer<'_> { fn comment(&mut self) -> TokenKind { self.bump_while(|c| c != '\n'); self.bump(); + self.lines += 1; TokenKind::Comment } fn whitespace(&mut self) -> TokenKind { self.bump_while(|c| is_whitespace(c) && c != '\n'); if '\n' == self.peek(1) { self.bump(); - return TokenKind::Whitespace(true); + self.lines += 1; + return TokenKind::Whitespace; } - TokenKind::Whitespace(false) + TokenKind::Whitespace } fn ident(&mut self) -> TokenKind { self.bump_while(unicode_ident::is_xid_continue); @@ -176,6 +244,18 @@ fn is_iprivate(c: char) -> bool { matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') } +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) struct Token<'a> { + kind: TokenKind, + span: Span<'a>, +} +// impl<'a> Token<'a> { +impl<'a> Token<'a> { + fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { + Token { kind, span } + } +} + /// All the tokens the input gets parsed into. #[derive(Debug, PartialEq, Copy, Clone)] enum TokenKind { @@ -252,7 +332,7 @@ enum TokenKind { /// A comment, starting with `%%` DocComment, /// bool: ends_with_newline - Whitespace(bool), + Whitespace, /// catch all token Illegal, /// signals end of file @@ -262,113 +342,155 @@ enum TokenKind { #[cfg(test)] mod test { use super::TokenKind::*; - use crate::io::lexer::Lexer; + use crate::io::lexer::{Lexer, Span, Token}; + + #[test] + fn empty_input() { + let mut lexer = Lexer::new(""); + assert_eq!( + lexer.get_tokens(), + vec![Token::new(Eof, Span::new(0, 1, ""))] + ) + } + + #[test] + fn base() { + let mut lexer = Lexer::new("@base"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "base")), + Token::new(Eof, Span::new(5, 1, "")), + ] + ) + } + + #[test] + fn prefix() { + let mut lexer = Lexer::new("@prefix"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "prefix")), + Token::new(Eof, Span::new(7, 1, "")), + ] + ) + } + + #[test] + fn output() { + let mut lexer = Lexer::new("@output"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "output")), + Token::new(Eof, Span::new(7, 1, "")), + ] + ) + } + + #[test] + fn import() { + let mut lexer = Lexer::new("@import"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "import")), + Token::new(Eof, Span::new(7, 1, "")), + ] + ) + } + + #[test] + fn export() { + let mut lexer = Lexer::new("@export"); + assert_eq!( + lexer.get_tokens(), + vec![ + Token::new(At, Span::new(0, 1, "@")), + Token::new(Ident, Span::new(1, 1, "export")), + Token::new(Eof, Span::new(7, 1, "")), + ] + ) + } #[test] fn tokenize() { + let mut lexer = Lexer::new("P(?X) :- A(?X).\t\n A(Human)."); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("P(?X) :- A(?X).\t\n A(Human)."); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Ident, - OpenParen, - QuestionMark, - Ident, - CloseParen, - Whitespace(false), - Colon, - Minus, - Whitespace(false), - Ident, - OpenParen, - QuestionMark, - Ident, - CloseParen, - Dot, - Whitespace(true), - Whitespace(false), - Ident, - OpenParen, - Ident, - CloseParen, - Dot, - Eof + Token::new(Ident, Span::new(0, 1, "P")), + Token::new(OpenParen, Span::new(1, 1, "(")), + Token::new(QuestionMark, Span::new(2, 1, "?")), + Token::new(Ident, Span::new(3, 1, "X")), + Token::new(CloseParen, Span::new(4, 1, ")")), + Token::new(Whitespace, Span::new(5, 1, " ")), + Token::new(Colon, Span::new(6, 1, ":")), + Token::new(Minus, Span::new(7, 1, "-")), + Token::new(Whitespace, Span::new(8, 1, " ")), + Token::new(Ident, Span::new(9, 1, "A")), + Token::new(OpenParen, Span::new(10, 1, "(")), + Token::new(QuestionMark, Span::new(11, 1, "?")), + Token::new(Ident, Span::new(12, 1, "X")), + Token::new(CloseParen, Span::new(13, 1, ")")), + Token::new(Dot, Span::new(14, 1, ".")), + Token::new(Whitespace, Span::new(15, 1, "\t\n")), + Token::new(Whitespace, Span::new(17, 2, " ")), + Token::new(Ident, Span::new(21, 2, "A")), + Token::new(OpenParen, Span::new(22, 2, "(")), + Token::new(Ident, Span::new(23, 2, "Human")), + Token::new(CloseParen, Span::new(28, 2, ")")), + Token::new(Dot, Span::new(29, 2, ".")), + Token::new(Eof, Span::new(30, 2, "")), ] ) } #[test] fn comment() { + let mut lexer = Lexer::new("% Some Comment\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("% Some Comment\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, - vec![Comment, Eof] + lexer.get_tokens(), + vec![ + Token::new(Comment, Span::new(0, 1, "% Some Comment\n")), + Token::new(Eof, Span::new(15, 2, "")) + ] ) } #[test] fn pct_enc_with_comment() { + let mut lexer = Lexer::new("%38%a3% Some Comment\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("%38%a3% Some Comment\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, - vec![PctEncoded, PctEncoded, Comment, Eof] + lexer.get_tokens(), + vec![ + Token::new(PctEncoded, Span::new(0, 1, "%38")), + Token::new(PctEncoded, Span::new(3, 1, "%a3")), + Token::new(Comment, Span::new(6, 1, "% Some Comment\n")), + Token::new(Eof, Span::new(21, 2, "")), + ] ) } #[test] fn ident() { + let mut lexer = Lexer::new("some_Ident(Alice). %comment at the end of a line\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("some_Ident(Alice). %comment at the end of a line\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Ident, - OpenParen, - Ident, - CloseParen, - Dot, - Whitespace(false), - Comment, - Eof + Token::new(Ident, Span::new(0, 1, "some_Ident")), + Token::new(OpenParen, Span::new(10, 1, "(")), + Token::new(Ident, Span::new(11, 1, "Alice")), + Token::new(CloseParen, Span::new(16, 1, ")")), + Token::new(Dot, Span::new(17, 1, ".")), + Token::new(Whitespace, Span::new(18, 1, " ")), + Token::new(Comment, Span::new(19, 1, "%comment at the end of a line\n")), + Token::new(Eof, Span::new(49, 2, "")), ] ) } @@ -376,124 +498,102 @@ mod test { #[test] #[should_panic] fn forbidden_ident() { + let mut lexer = Lexer::new("_someIdent(Alice). %comment at the end of a line\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("_someIdent(Alice). %comment at the end of a line\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Ident, - OpenParen, - Ident, - CloseParen, - Dot, - Whitespace(false), - Comment, - Eof + Token::new(Ident, Span::new(0, 1, "_someIdent")), + Token::new(OpenParen, Span::new(10, 1, "(")), + Token::new(Ident, Span::new(11, 1, "Alice")), + Token::new(CloseParen, Span::new(16, 1, ")")), + Token::new(Dot, Span::new(17, 1, ".")), + Token::new(Whitespace, Span::new(18, 1, " ")), + Token::new(Comment, Span::new(19, 1, "%comment at the end of a line\n")), + Token::new(Eof, Span::new(49, 2, "")), ] ) } #[test] fn iri() { + let mut lexer = Lexer::new(""); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new(""); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Less, Ident, Colon, Slash, Slash, Ident, Dot, Ident, Dot, Ident, Slash, Greater, - Eof + Token::new(Less, Span::new(0, 1, "<")), + Token::new(Ident, Span::new(1, 1, "https")), + Token::new(Colon, Span::new(6, 1, ":")), + Token::new(Slash, Span::new(7, 1, "/")), + Token::new(Slash, Span::new(8, 1, "/")), + Token::new(Ident, Span::new(9, 1, "résumé")), + Token::new(Dot, Span::new(17, 1, ".")), + Token::new(Ident, Span::new(18, 1, "example")), + Token::new(Dot, Span::new(25, 1, ".")), + Token::new(Ident, Span::new(26, 1, "org")), + Token::new(Slash, Span::new(29, 1, "/")), + Token::new(Greater, Span::new(30, 1, ">")), + Token::new(Eof, Span::new(31, 1, "")), ] ) } #[test] fn iri_pct_enc() { + let mut lexer = Lexer::new("\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - Less, - Ident, - Colon, - Slash, - Slash, - Ident, - PctEncoded, - PctEncoded, - Ident, - PctEncoded, - PctEncoded, - Dot, - Ident, - Dot, - Ident, - Greater, - Whitespace(true), - Eof + Token::new(Less, Span::new(0, 1, "<")), + Token::new(Ident, Span::new(1, 1, "http")), + Token::new(Colon, Span::new(5, 1, ":")), + Token::new(Slash, Span::new(6, 1, "/")), + Token::new(Slash, Span::new(7, 1, "/")), + Token::new(Ident, Span::new(8, 1, "r")), + Token::new(PctEncoded, Span::new(9, 1, "%C3")), + Token::new(PctEncoded, Span::new(12, 1, "%A9")), + Token::new(Ident, Span::new(15, 1, "sum")), + Token::new(PctEncoded, Span::new(18, 1, "%C3")), + Token::new(PctEncoded, Span::new(21, 1, "%A9")), + Token::new(Dot, Span::new(24, 1, ".")), + Token::new(Ident, Span::new(25, 1, "example")), + Token::new(Dot, Span::new(32, 1, ".")), + Token::new(Ident, Span::new(33, 1, "org")), + Token::new(Greater, Span::new(36, 1, ">")), + Token::new(Whitespace, Span::new(37, 1, "\n")), + Token::new(Eof, Span::new(38, 2, "")), ] ) } #[test] fn pct_enc_comment() { + let mut lexer = Lexer::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); assert_eq!( - { - let mut vec = vec![]; - let mut lexer = Lexer::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); - loop { - let tok = lexer.advance_token(); - vec.push(tok.clone()); - if tok == Eof { - break; - } - } - vec - }, + lexer.get_tokens(), vec![ - PctEncoded, - Whitespace(false), - Ident, - Whitespace(false), - Ident, - Whitespace(false), - Ident, - Whitespace(false), - Ident, - Whitespace(false), - Ident, - Comma, - Whitespace(true), - Comment, - Comment, - Eof + Token::new(PctEncoded, Span::new(0, 1, "%d4")), + Token::new(Whitespace, Span::new(3, 1, " ")), + Token::new(Ident, Span::new(4, 1, "this")), + Token::new(Whitespace, Span::new(8, 1, " ")), + Token::new(Ident, Span::new(9, 1, "should")), + Token::new(Whitespace, Span::new(15, 1, " ")), + Token::new(Ident, Span::new(16, 1, "be")), + Token::new(Whitespace, Span::new(18, 1, " ")), + Token::new(Ident, Span::new(19, 1, "a")), + Token::new(Whitespace, Span::new(20, 1, " ")), + Token::new(Ident, Span::new(21, 1, "comment")), + Token::new(Comma, Span::new(28, 1, ",")), + Token::new(Whitespace, Span::new(29, 1, "\n")), + Token::new( + Comment, + Span::new( + 30, + 2, + "% but the lexer can't distinguish a percent encoded value\n" + ) + ), + Token::new(Comment, Span::new(88, 3, "% in an iri from a comment :(\n")), + Token::new(Eof, Span::new(118, 4, "")), ] ) } From 0fc9124e51fb60adab44111328ac3a39279ba3af Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:29:04 +0100 Subject: [PATCH 062/214] Add ast --- nemo/src/io/parser/ast.rs | 282 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 nemo/src/io/parser/ast.rs diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs new file mode 100644 index 000000000..02c9c2d93 --- /dev/null +++ b/nemo/src/io/parser/ast.rs @@ -0,0 +1,282 @@ +use std::collections::BTreeMap; + +use crate::io::lexer::Token; + +struct Position { + offset: usize, + line: u32, + column: u32, +} + +pub(crate) type Program<'a> = Vec>; + +#[derive(Debug, PartialEq)] +pub(crate) enum Statement<'a> { + Directive(Directive<'a>), + Fact { + atom: Atom<'a>, + }, + Rule { + head: Vec>, + body: Vec>, + }, +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Directive<'a> { + Base { + kw: Token<'a>, + base_iri: Token<'a>, + }, + Prefix { + kw: Token<'a>, + prefix: Token<'a>, + prefix_iri: Token<'a>, + }, + Import { + kw: Token<'a>, + predicate: Token<'a>, + map: Map<'a>, + }, + Export { + kw: Token<'a>, + predicate: Token<'a>, + map: Map<'a>, + }, + Output { + kw: Token<'a>, + predicates: Vec>, + }, +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Atom<'a> { + Atom { + predicate: Token<'a>, + terms: Vec>, + }, + InfixAtom { + operation: Token<'a>, + lhs: Term<'a>, + rhs: Term<'a>, + }, + Map(Map<'a>), +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Literal<'a> { + Positive(Atom<'a>), + Negative(Atom<'a>), +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Term<'a> { + Primitive(Token<'a>), + Binary { + operation: Token<'a>, + lhs: Box>, + rhs: Box>, + }, + Unary { + operation: Token<'a>, + term: Box>, + }, + Aggregation { + operation: Token<'a>, + terms: Vec>, + }, + Function { + identifier: Token<'a>, + terms: Vec>, + }, + Map(Map<'a>), +} + +#[derive(Debug, PartialEq)] +struct Map<'a> { + identifier: Option>, + pairs: BTreeMap, Term<'a>>, +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Node<'a> { + Statement(&'a Statement<'a>), + Directive(&'a Directive<'a>), + RuleHead(&'a Vec>), + RuleBody(&'a Vec>), + Atom(&'a Atom<'a>), + Term(&'a Term<'a>), + Terms(&'a Vec>), + Map(&'a Map<'a>), + KeyWord(&'a Token<'a>), + BaseIri(&'a Token<'a>), + Prefix(&'a Token<'a>), + PrefixIri(&'a Token<'a>), + Predicate(&'a Token<'a>), + Predicates(&'a Vec>), + Operation(&'a Token<'a>), + Lhs(&'a Term<'a>), + Rhs(&'a Term<'a>), + Identifier(&'a Token<'a>), + Pairs(&'a BTreeMap, Term<'a>>), + MapIdentifier(&'a Option>), + Primitive(&'a Token<'a>), +} + +trait AstNode { + fn children(&self) -> Vec; + // fn position(&self) -> Position; +} + +impl<'a> AstNode for Program<'a> { + fn children(&self) -> Vec { + let mut vec = Vec::new(); + for statement in self { + vec.push(Node::Statement(statement)) + } + vec + } + + // fn position(&self) -> Position { + // let first = self.get(0); + // match first { + // Some(elem) => { + // let span; + // match elem { + // Statement::Directive(directive) => match directive { + // Directive::Base { kw, base_iri } => span = kw.span, + // Directive::Prefix { + // kw, + // prefix, + // prefix_iri, + // } => span = kw.span, + // Directive::Import { kw, predicate, map } => span = kw.span, + // Directive::Export { kw, predicate, map } => span = kw.span, + // Directive::Output { kw, predicates } => span = kw.span, + // }, + // Statement::Fact { atom } => match atom { + // Atom::Atom { predicate, terms } => todo!(), + // Atom::InfixAtom { operation, lhs, rhs } => todo!(), + // Atom::Map(_) => todo!(), + // }, + // Statement::Rule { head, body } => todo!(), + // }; + // } + // None => Position { + // offset: 0, + // line: 1, + // column: 0, + // }, + // } + // } +} + +impl<'a> AstNode for Statement<'a> { + fn children(&self) -> Vec { + match self { + Statement::Directive(directive) => directive.children(), + Statement::Fact { atom } => vec![Node::Atom(atom)], + Statement::Rule { head, body } => { + vec![Node::RuleHead(head), Node::RuleBody(body)] + } + } + } + + // fn position(&self) -> Position { + // todo!() + // } +} + +impl<'a> AstNode for Directive<'a> { + fn children(&self) -> Vec { + match self { + Directive::Base { kw, base_iri } => { + vec![Node::KeyWord(kw), Node::BaseIri(base_iri)] + } + Directive::Prefix { + kw, + prefix, + prefix_iri, + } => vec![ + Node::KeyWord(kw), + Node::Prefix(prefix), + Node::PrefixIri(prefix_iri), + ], + Directive::Import { kw, predicate, map } => vec![ + Node::KeyWord(kw), + Node::Predicate(predicate), + Node::Map(map), + ], + Directive::Export { kw, predicate, map } => vec![ + Node::KeyWord(kw), + Node::Predicate(predicate), + Node::Map(map), + ], + Directive::Output { kw, predicates } => { + vec![Node::KeyWord(kw), Node::Predicates(predicates)] + } + } + } + + // fn position(&self) -> Position { + // todo!() + // } +} + +impl<'a> AstNode for Atom<'a> { + fn children(&self) -> Vec { + match self { + Atom::Atom { predicate, terms } => { + vec![Node::KeyWord(predicate), Node::Terms(terms)] + } + Atom::InfixAtom { + operation, + lhs, + rhs, + } => vec![Node::Operation(operation), Node::Lhs(lhs), Node::Rhs(rhs)], + Atom::Map(map) => map.children(), + } + } + + // fn position(&self) -> Position { + // todo!() + // } +} + +impl<'a> AstNode for Term<'a> { + fn children(&self) -> Vec { + match self { + Term::Primitive(prim) => vec![Node::Primitive(prim)], + Term::Binary { + operation, + lhs, + rhs, + } => vec![Node::Operation(operation), Node::Lhs(lhs), Node::Rhs(rhs)], + Term::Unary { operation, term } => vec![Node::Operation(operation), Node::Term(term)], + Term::Aggregation { operation, terms } => { + vec![Node::Operation(operation), Node::Terms(terms)] + } + Term::Function { identifier, terms } => { + vec![Node::Identifier(identifier), Node::Terms(terms)] + } + Term::Map(map) => map.children(), + } + } + + // fn position(&self) -> Position { + // todo!() + // } +} + +impl<'a> AstNode for Map<'a> { + fn children(&self) -> Vec { + vec![ + Node::MapIdentifier(&self.identifier), + Node::Pairs(&self.pairs), + ] + } + + // fn position(&self) -> Position { + // todo!() + // } +} From cbe7474426dc751e4d4bfb164a114b1bf27706f0 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:30:32 +0100 Subject: [PATCH 063/214] Refactor lexer to use nom --- nemo/src/io/lexer.rs | 787 ++++++++++++++++++++----------------------- 1 file changed, 366 insertions(+), 421 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 697236b3c..c0b6ccb0d 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1,264 +1,21 @@ //! Lexical tokenization of rulewerk-style rules. -use std::str::Chars; +use nom::{ + branch::alt, + bytes::complete::{is_not, tag, take}, + character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, + combinator::{all_consuming, map, recognize}, + multi::many0, + sequence::{delimited, pair, tuple}, + IResult, +}; +use nom_locate::LocatedSpan; -const EOF_CHAR: char = '\0'; - -#[derive(Debug, Copy, Clone, PartialEq)] -struct Span<'a> { - offset: usize, - line: usize, - // size: usize, - fragment: &'a str, -} -// impl<'a> Span<'a> { -impl<'a> Span<'a> { - fn new(offset: usize, line: usize, input: &'a str) -> Span<'a> { - // fn new(offset: usize, line: usize, size: usize) -> Span { - Span { - offset, - line, - fragment: input, - // size, - } - } -} - -#[derive(Debug, Clone)] -struct Lexer<'a> { - input: &'a str, - len_remaining: usize, - offset: usize, - lines: usize, - chars: Chars<'a>, -} -impl<'a> Lexer<'a> { - fn new(input: &'a str) -> Lexer<'a> { - Lexer { - input, - len_remaining: input.len(), - offset: 0, - lines: 1, - chars: input.chars(), - } - } - fn consumed_char_length(&self) -> usize { - self.len_remaining - self.chars.as_str().len() - } - fn update_remaining_len(&mut self) { - self.len_remaining = self.chars.as_str().len(); - } - fn peek(&self, count: usize) -> char { - self.chars.clone().nth(count - 1).unwrap_or(EOF_CHAR) - } - fn bump(&mut self) -> Option { - self.chars.next() - } - fn is_eof(&self) -> bool { - self.chars.as_str().is_empty() - } - fn bump_while(&mut self, mut predicate: impl FnMut(char) -> bool) { - while predicate(self.peek(1)) && !self.is_eof() { - self.bump(); - } - } - fn get_tokens(&mut self) -> Vec { - use TokenKind::*; - let mut vec = Vec::new(); - loop { - let old_line_num = self.lines; - let first_char = match self.bump() { - Some(c) => c, - None => { - let eof_tok = Token::new( - Eof, - Span::new( - self.offset, - self.lines, - &self.input[self.offset..self.offset], - ), - ); - vec.push(eof_tok); - return vec; - } - }; - let token_kind = match first_char { - '%' => match (self.peek(1), self.peek(2)) { - (n1, n2) if n1.is_digit(16) && n2.is_digit(16) => self.pct_encoded(), - _ => self.comment(), - }, - '\n' => { - self.lines += 1; - Whitespace - } - c if is_whitespace(c) => self.whitespace(), - c if unicode_ident::is_xid_start(c) => self.ident(), - c @ '0'..='9' => self.number(), - '?' => QuestionMark, - '!' => ExclamationMark, - '(' => OpenParen, - ')' => CloseParen, - '[' => OpenBracket, - ']' => CloseBracket, - '{' => OpenBrace, - '}' => CloseBrace, - '.' => Dot, - ',' => Comma, - ':' => Colon, - ';' => Semicolon, - '>' => Greater, - '=' => Equal, - '<' => Less, - '~' => Tilde, - '^' => Caret, - '#' => Hash, - '_' => Underscore, - '@' => At, - '+' => Plus, - '-' => Minus, - '*' => Star, - '/' => Slash, - '$' => Dollar, - '&' => Ampersand, - '\'' => Apostrophe, - '\u{A0}'..='\u{D7FF}' - | '\u{F900}'..='\u{FDCF}' - | '\u{FDF0}'..='\u{FFEF}' - | '\u{10000}'..='\u{1FFFD}' - | '\u{20000}'..='\u{2FFFD}' - | '\u{30000}'..='\u{3FFFD}' - | '\u{40000}'..='\u{4FFFD}' - | '\u{50000}'..='\u{5FFFD}' - | '\u{60000}'..='\u{6FFFD}' - | '\u{70000}'..='\u{7FFFD}' - | '\u{80000}'..='\u{8FFFD}' - | '\u{90000}'..='\u{9FFFD}' - | '\u{A0000}'..='\u{AFFFD}' - | '\u{B0000}'..='\u{BFFFD}' - | '\u{C0000}'..='\u{CFFFD}' - | '\u{D0000}'..='\u{DFFFD}' - | '\u{E1000}'..='\u{EFFFD}' => self.ucschar(), - '\u{E000}'..='\u{F8FF}' - | '\u{F0000}'..='\u{FFFFD}' - | '\u{100000}'..='\u{10FFFD}' => self.iprivate(), - _ => todo!(), - }; - let tok_len = self.consumed_char_length(); - - // let fragment = &*self.input; - let token = Token::new( - token_kind, - Span::new( - self.offset, - old_line_num, - &self.input[self.offset..(self.offset + tok_len)], - ), - // Span::new(self.offset, self.lines, tok_len), - ); - self.offset += tok_len; - self.update_remaining_len(); - vec.push(token); - } - } - - fn number(&mut self) -> TokenKind { - self.bump_while(is_hex_digit); - TokenKind::Number - } - fn pct_encoded(&mut self) -> TokenKind { - self.bump(); - self.bump(); - TokenKind::PctEncoded - } - fn comment(&mut self) -> TokenKind { - self.bump_while(|c| c != '\n'); - self.bump(); - self.lines += 1; - TokenKind::Comment - } - fn whitespace(&mut self) -> TokenKind { - self.bump_while(|c| is_whitespace(c) && c != '\n'); - if '\n' == self.peek(1) { - self.bump(); - self.lines += 1; - return TokenKind::Whitespace; - } - TokenKind::Whitespace - } - fn ident(&mut self) -> TokenKind { - self.bump_while(unicode_ident::is_xid_continue); - TokenKind::Ident - } - - fn ucschar(&mut self) -> TokenKind { - self.bump_while(is_ucschar); - TokenKind::UcsChars - } - - fn iprivate(&mut self) -> TokenKind { - self.bump_while(is_iprivate); - TokenKind::Iprivate - } -} - -fn is_hex_digit(c: char) -> bool { - c.is_digit(16) -} - -fn is_whitespace(c: char) -> bool { - // support also vertical tab, form feed, NEXT LINE (latin1), - // LEFT-TO-RIGHT MARK, RIGHT-TO-LEFT MARK, LINE SEPARATOR and PARAGRAPH SEPARATOR? - matches!(c, ' ' | '\n' | '\t' | '\r') -} - -fn is_ident(s: &str) -> bool { - let mut chars = s.chars(); - if let Some(char) = chars.next() { - unicode_ident::is_xid_start(char) && chars.all(unicode_ident::is_xid_continue) - } else { - false - } -} - -fn is_ucschar(c: char) -> bool { - matches!(c, '\u{A0}'..='\u{D7FF}' - | '\u{F900}'..='\u{FDCF}' - | '\u{FDF0}'..='\u{FFEF}' - | '\u{10000}'..='\u{1FFFD}' - | '\u{20000}'..='\u{2FFFD}' - | '\u{30000}'..='\u{3FFFD}' - | '\u{40000}'..='\u{4FFFD}' - | '\u{50000}'..='\u{5FFFD}' - | '\u{60000}'..='\u{6FFFD}' - | '\u{70000}'..='\u{7FFFD}' - | '\u{80000}'..='\u{8FFFD}' - | '\u{90000}'..='\u{9FFFD}' - | '\u{A0000}'..='\u{AFFFD}' - | '\u{B0000}'..='\u{BFFFD}' - | '\u{C0000}'..='\u{CFFFD}' - | '\u{D0000}'..='\u{DFFFD}' - | '\u{E1000}'..='\u{EFFFD}') -} - -fn is_iprivate(c: char) -> bool { - matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') -} - -#[derive(Debug, Copy, Clone, PartialEq)] -pub(crate) struct Token<'a> { - kind: TokenKind, - span: Span<'a>, -} -// impl<'a> Token<'a> { -impl<'a> Token<'a> { - fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { - Token { kind, span } - } -} +pub(crate) type Span<'a> = LocatedSpan<&'a str>; /// All the tokens the input gets parsed into. #[derive(Debug, PartialEq, Copy, Clone)] -enum TokenKind { +pub(crate) enum TokenKind { // Syntactic symbols: /// '?' QuestionMark, @@ -282,14 +39,20 @@ enum TokenKind { Comma, /// ':' Colon, - /// ';' - Semicolon, + /// `:-` + Arrow, /// '>' Greater, + /// `>=` + GreaterEqual, /// '=' Equal, + /// `<=` + LessEqual, /// '<' Less, + /// `!=` + Unequal, /// '~' Tilde, /// '^' @@ -308,292 +71,474 @@ enum TokenKind { Star, /// '/' Slash, - /// '$' - Dollar, - /// '&' - Ampersand, - /// "'" - Apostrophe, // Multi-char tokens: /// Identifier for keywords and predicate names Ident, - /// All other Utf8 characters that can be used in an IRI - UcsChars, - /// Characters in private use areas - Iprivate, - /// Percent-encoded characters in IRIs - PctEncoded, + /// IRI, delimited with `<` and `>` + Iri, /// Base 10 digits Number, - /// A string literal + /// A string literal, delimited with `"` String, /// A comment, starting with `%` Comment, /// A comment, starting with `%%` DocComment, - /// bool: ends_with_newline + /// ` `, `\t`, `\r` or `\n` Whitespace, + /// base directive keyword + Base, + /// prefix directive keyword + Prefix, + /// import directive keyword + Import, + /// export directive keyword + Export, + /// output directive keyword + Output, /// catch all token Illegal, /// signals end of file Eof, } +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) struct Token<'a> { + pub(crate) kind: TokenKind, + pub(crate) span: Span<'a>, +} +impl<'a> Token<'a> { + fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { + Token { kind, span } + } +} + +macro_rules! syntax { + ($func_name: ident, $tag_string: literal, $token: expr) => { + pub(crate) fn $func_name<'a>(input: Span) -> IResult { + map(tag($tag_string), |span| Token::new($token, span))(input) + } + }; +} + +syntax!(open_paren, "(", TokenKind::OpenParen); +syntax!(close_paren, ")", TokenKind::CloseParen); +syntax!(open_bracket, "[", TokenKind::OpenBracket); +syntax!(close_bracket, "]", TokenKind::CloseBracket); +syntax!(open_brace, "{", TokenKind::OpenBrace); +syntax!(close_brace, "}", TokenKind::CloseBrace); +syntax!(dot, ".", TokenKind::Dot); +syntax!(comma, ",", TokenKind::Comma); +syntax!(colon, ":", TokenKind::Colon); +syntax!(arrow, ":-", TokenKind::Arrow); +syntax!(question_mark, "?", TokenKind::QuestionMark); +syntax!(exclamation_mark, "!", TokenKind::ExclamationMark); +syntax!(tilde, "~", TokenKind::Tilde); +syntax!(caret, "^", TokenKind::Caret); +syntax!(hash, "#", TokenKind::Hash); +syntax!(underscore, "_", TokenKind::Underscore); +syntax!(at, "@", TokenKind::At); + +pub(crate) fn lex_punctuations(input: Span) -> IResult { + alt(( + arrow, + open_paren, + close_paren, + open_bracket, + close_bracket, + open_brace, + close_brace, + dot, + comma, + colon, + question_mark, + exclamation_mark, + tilde, + caret, + hash, + underscore, + at, + ))(input) +} + +syntax!(less, "<", TokenKind::Less); +syntax!(less_equal, "<=", TokenKind::LessEqual); +syntax!(equal, "=", TokenKind::Equal); +syntax!(greater_equal, ">=", TokenKind::GreaterEqual); +syntax!(greater, ">", TokenKind::Greater); +syntax!(unequals, "!=", TokenKind::Unequal); +syntax!(plus, "+", TokenKind::Plus); +syntax!(minus, "-", TokenKind::Minus); +syntax!(star, "*", TokenKind::Star); +syntax!(slash, "/", TokenKind::Slash); + +pub(crate) fn lex_operators(input: Span) -> IResult { + alt(( + less_equal, + greater_equal, + unequals, + less, + equal, + greater, + plus, + minus, + star, + slash, + ))(input) +} + +pub(crate) fn lex_ident(input: Span) -> IResult { + let (rest, result) = recognize(pair( + alpha1, + many0(alt((alphanumeric1, tag("_"), tag("-")))), + ))(input)?; + let token = match *result.fragment() { + "base" => Token::new(TokenKind::Base, result), + "prefix" => Token::new(TokenKind::Prefix, result), + "import" => Token::new(TokenKind::Import, result), + "export" => Token::new(TokenKind::Export, result), + "output" => Token::new(TokenKind::Output, result), + _ => Token::new(TokenKind::Ident, result), + }; + Ok((rest, token)) +} + +pub(crate) fn lex_iri(input: Span) -> IResult { + recognize(delimited(tag("<"), is_not("> \n"), tag(">")))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result))) +} + +pub(crate) fn lex_number(input: Span) -> IResult { + digit1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Number, result))) +} + +pub(crate) fn lex_string(input: Span) -> IResult { + recognize(delimited(tag("\""), is_not("\""), tag("\"")))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::String, result))) +} + +pub(crate) fn lex_comment(input: Span) -> IResult { + recognize(tuple((tag("%"), many0(is_not("\r\n")), line_ending)))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result))) +} + +pub(crate) fn lex_doc_comment(input: Span) -> IResult { + recognize(tuple((tag("%%"), many0(is_not("\r\n")), line_ending)))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result))) +} + +pub(crate) fn lex_whitespace(input: Span) -> IResult { + multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result))) +} + +pub(crate) fn lex_illegal(input: Span) -> IResult { + take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result))) +} + +pub(crate) fn lex_tokens(input: Span) -> IResult> { + all_consuming(many0(alt(( + lex_iri, + lex_operators, + lex_punctuations, + lex_ident, + lex_number, + lex_string, + lex_comment, + lex_whitespace, + lex_illegal, + ))))(input) + .map(|(span, mut vec)| { + vec.append(&mut vec![Token::new(TokenKind::Eof, span)]); + (span, vec) + }) +} + #[cfg(test)] mod test { use super::TokenKind::*; - use crate::io::lexer::{Lexer, Span, Token}; + use super::*; + + macro_rules! T { + ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { + Token::new($tok_kind, unsafe { + Span::new_from_raw_offset($offset, $line, $str, ()) + }) + }; + } #[test] fn empty_input() { - let mut lexer = Lexer::new(""); - assert_eq!( - lexer.get_tokens(), - vec![Token::new(Eof, Span::new(0, 1, ""))] - ) + let input = Span::new(""); + assert_eq!(lex_tokens(input).unwrap().1, vec![T!(Eof, 0, 1, "")]) } #[test] fn base() { - let mut lexer = Lexer::new("@base"); + let input = Span::new("@base"); assert_eq!( - lexer.get_tokens(), - vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "base")), - Token::new(Eof, Span::new(5, 1, "")), - ] + lex_tokens(input).unwrap().1, + vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } #[test] fn prefix() { - let mut lexer = Lexer::new("@prefix"); + let input = Span::new("@prefix"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "prefix")), - Token::new(Eof, Span::new(7, 1, "")), + T!(At, 0, 1, "@"), + T!(Prefix, 1, 1, "prefix"), + T!(Eof, 7, 1, ""), ] ) } #[test] fn output() { - let mut lexer = Lexer::new("@output"); + let input = Span::new("@output"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "output")), - Token::new(Eof, Span::new(7, 1, "")), + T!(At, 0, 1, "@"), + T!(Output, 1, 1, "output"), + T!(Eof, 7, 1, ""), ] ) } #[test] fn import() { - let mut lexer = Lexer::new("@import"); + let input = Span::new("@import"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "import")), - Token::new(Eof, Span::new(7, 1, "")), + T!(At, 0, 1, "@"), + T!(Import, 1, 1, "import"), + T!(Eof, 7, 1, ""), ] ) } #[test] fn export() { - let mut lexer = Lexer::new("@export"); + let input = Span::new("@export"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(At, Span::new(0, 1, "@")), - Token::new(Ident, Span::new(1, 1, "export")), - Token::new(Eof, Span::new(7, 1, "")), + T!(At, 0, 1, "@"), + T!(Export, 1, 1, "export"), + T!(Eof, 7, 1, ""), ] ) } #[test] - fn tokenize() { - let mut lexer = Lexer::new("P(?X) :- A(?X).\t\n A(Human)."); + fn idents_with_keyword_prefix() { + let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Ident, Span::new(0, 1, "P")), - Token::new(OpenParen, Span::new(1, 1, "(")), - Token::new(QuestionMark, Span::new(2, 1, "?")), - Token::new(Ident, Span::new(3, 1, "X")), - Token::new(CloseParen, Span::new(4, 1, ")")), - Token::new(Whitespace, Span::new(5, 1, " ")), - Token::new(Colon, Span::new(6, 1, ":")), - Token::new(Minus, Span::new(7, 1, "-")), - Token::new(Whitespace, Span::new(8, 1, " ")), - Token::new(Ident, Span::new(9, 1, "A")), - Token::new(OpenParen, Span::new(10, 1, "(")), - Token::new(QuestionMark, Span::new(11, 1, "?")), - Token::new(Ident, Span::new(12, 1, "X")), - Token::new(CloseParen, Span::new(13, 1, ")")), - Token::new(Dot, Span::new(14, 1, ".")), - Token::new(Whitespace, Span::new(15, 1, "\t\n")), - Token::new(Whitespace, Span::new(17, 2, " ")), - Token::new(Ident, Span::new(21, 2, "A")), - Token::new(OpenParen, Span::new(22, 2, "(")), - Token::new(Ident, Span::new(23, 2, "Human")), - Token::new(CloseParen, Span::new(28, 2, ")")), - Token::new(Dot, Span::new(29, 2, ".")), - Token::new(Eof, Span::new(30, 2, "")), + T!(At, 0, 1, "@"), + T!(Ident, 1, 1, "baseA"), + T!(Comma, 6, 1, ","), + T!(Whitespace, 7, 1, " "), + T!(At, 8, 1, "@"), + T!(Ident, 9, 1, "prefixB"), + T!(Comma, 16, 1, ","), + T!(Whitespace, 17, 1, " "), + T!(At, 18, 1, "@"), + T!(Ident, 19, 1, "importC"), + T!(Comma, 26, 1, ","), + T!(Whitespace, 27, 1, " "), + T!(At, 28, 1, "@"), + T!(Ident, 29, 1, "exportD"), + T!(Comma, 36, 1, ","), + T!(Whitespace, 37, 1, " "), + T!(At, 38, 1, "@"), + T!(Ident, 39, 1, "outputE"), + T!(Dot, 46, 1, "."), + T!(Eof, 47, 1, ""), ] ) } #[test] - fn comment() { - let mut lexer = Lexer::new("% Some Comment\n"); + fn tokenize() { + let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Comment, Span::new(0, 1, "% Some Comment\n")), - Token::new(Eof, Span::new(15, 2, "")) + T!(Ident, 0, 1, "P"), + T!(OpenParen, 1, 1, "("), + T!(QuestionMark, 2, 1, "?"), + T!(Ident, 3, 1, "X"), + T!(CloseParen, 4, 1, ")"), + T!(Whitespace, 5, 1, " "), + T!(Arrow, 6, 1, ":-"), + T!(Whitespace, 8, 1, " "), + T!(Ident, 9, 1, "A"), + T!(OpenParen, 10, 1, "("), + T!(QuestionMark, 11, 1, "?"), + T!(Ident, 12, 1, "X"), + T!(CloseParen, 13, 1, ")"), + T!(Dot, 14, 1, "."), + T!(Whitespace, 15, 1, "\t\n "), + T!(Ident, 21, 2, "A"), + T!(OpenParen, 22, 2, "("), + T!(Ident, 23, 2, "Human"), + T!(CloseParen, 28, 2, ")"), + T!(Dot, 29, 2, "."), + T!(Eof, 30, 2, ""), ] ) } #[test] - fn pct_enc_with_comment() { - let mut lexer = Lexer::new("%38%a3% Some Comment\n"); + fn comment() { + let input = Span::new("% Some Comment\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(PctEncoded, Span::new(0, 1, "%38")), - Token::new(PctEncoded, Span::new(3, 1, "%a3")), - Token::new(Comment, Span::new(6, 1, "% Some Comment\n")), - Token::new(Eof, Span::new(21, 2, "")), + T!(Comment, 0, 1, "% Some Comment\n"), + T!(Eof, 15, 2, ""), + // T!(Comment, Span::new(0, 1, "% Some Comment\n")), + // T!(Eof, Span::new(15, 2, "")) ] ) } #[test] fn ident() { - let mut lexer = Lexer::new("some_Ident(Alice). %comment at the end of a line\n"); + let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Ident, Span::new(0, 1, "some_Ident")), - Token::new(OpenParen, Span::new(10, 1, "(")), - Token::new(Ident, Span::new(11, 1, "Alice")), - Token::new(CloseParen, Span::new(16, 1, ")")), - Token::new(Dot, Span::new(17, 1, ".")), - Token::new(Whitespace, Span::new(18, 1, " ")), - Token::new(Comment, Span::new(19, 1, "%comment at the end of a line\n")), - Token::new(Eof, Span::new(49, 2, "")), + T!(Ident, 0, 1, "some_Ident"), + T!(OpenParen, 10, 1, "("), + T!(Ident, 11, 1, "Alice"), + T!(CloseParen, 16, 1, ")"), + T!(Dot, 17, 1, "."), + T!(Whitespace, 18, 1, " "), + T!(Comment, 19, 1, "%comment at the end of a line\n"), + T!(Eof, 49, 2, ""), ] ) } #[test] - #[should_panic] fn forbidden_ident() { - let mut lexer = Lexer::new("_someIdent(Alice). %comment at the end of a line\n"); + let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Ident, Span::new(0, 1, "_someIdent")), - Token::new(OpenParen, Span::new(10, 1, "(")), - Token::new(Ident, Span::new(11, 1, "Alice")), - Token::new(CloseParen, Span::new(16, 1, ")")), - Token::new(Dot, Span::new(17, 1, ".")), - Token::new(Whitespace, Span::new(18, 1, " ")), - Token::new(Comment, Span::new(19, 1, "%comment at the end of a line\n")), - Token::new(Eof, Span::new(49, 2, "")), + T!(Underscore, 0, 1, "_"), + T!(Ident, 1, 1, "someIdent"), + T!(OpenParen, 10, 1, "("), + T!(Ident, 11, 1, "Alice"), + T!(CloseParen, 16, 1, ")"), + T!(Dot, 17, 1, "."), + T!(Whitespace, 18, 1, " "), + T!(Comment, 19, 1, "%comment at the end of a line\n"), + T!(Eof, 49, 2, ""), ] ) } #[test] fn iri() { - let mut lexer = Lexer::new(""); + let input = Span::new(""); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Less, Span::new(0, 1, "<")), - Token::new(Ident, Span::new(1, 1, "https")), - Token::new(Colon, Span::new(6, 1, ":")), - Token::new(Slash, Span::new(7, 1, "/")), - Token::new(Slash, Span::new(8, 1, "/")), - Token::new(Ident, Span::new(9, 1, "résumé")), - Token::new(Dot, Span::new(17, 1, ".")), - Token::new(Ident, Span::new(18, 1, "example")), - Token::new(Dot, Span::new(25, 1, ".")), - Token::new(Ident, Span::new(26, 1, "org")), - Token::new(Slash, Span::new(29, 1, "/")), - Token::new(Greater, Span::new(30, 1, ">")), - Token::new(Eof, Span::new(31, 1, "")), + T!(Iri, 0, 1, ""), + T!(Eof, 31, 1, ""), ] ) } #[test] fn iri_pct_enc() { - let mut lexer = Lexer::new("\n"); + let input = Span::new("\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(Less, Span::new(0, 1, "<")), - Token::new(Ident, Span::new(1, 1, "http")), - Token::new(Colon, Span::new(5, 1, ":")), - Token::new(Slash, Span::new(6, 1, "/")), - Token::new(Slash, Span::new(7, 1, "/")), - Token::new(Ident, Span::new(8, 1, "r")), - Token::new(PctEncoded, Span::new(9, 1, "%C3")), - Token::new(PctEncoded, Span::new(12, 1, "%A9")), - Token::new(Ident, Span::new(15, 1, "sum")), - Token::new(PctEncoded, Span::new(18, 1, "%C3")), - Token::new(PctEncoded, Span::new(21, 1, "%A9")), - Token::new(Dot, Span::new(24, 1, ".")), - Token::new(Ident, Span::new(25, 1, "example")), - Token::new(Dot, Span::new(32, 1, ".")), - Token::new(Ident, Span::new(33, 1, "org")), - Token::new(Greater, Span::new(36, 1, ">")), - Token::new(Whitespace, Span::new(37, 1, "\n")), - Token::new(Eof, Span::new(38, 2, "")), + T!(Iri, 0, 1, ""), + T!(Whitespace, 37, 1, "\n"), + T!(Eof, 38, 2, ""), + ] + ) + } + + // FIXME: change the name of this test according to the correct name for `?X > 3` + // (Constraints are Rules with an empty Head) + #[test] + fn constraints() { + let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); + assert_eq!( + lex_tokens(input).unwrap().1, + vec![ + T!(Ident, 0, 1, "A"), + T!(OpenParen, 1, 1, "("), + T!(QuestionMark, 2, 1, "?"), + T!(Ident, 3, 1, "X"), + T!(CloseParen, 4, 1, ")"), + T!(Arrow, 5, 1, ":-"), + T!(Ident, 7, 1, "B"), + T!(OpenParen, 8, 1, "("), + T!(QuestionMark, 9, 1, "?"), + T!(Ident, 10, 1, "X"), + T!(CloseParen, 11, 1, ")"), + T!(Comma, 12, 1, ","), + T!(QuestionMark, 13, 1, "?"), + T!(Ident, 14, 1, "X"), + T!(Less, 15, 1, "<"), + T!(Number, 16, 1, "42"), + T!(Comma, 18, 1, ","), + T!(QuestionMark, 19, 1, "?"), + T!(Ident, 20, 1, "X"), + T!(Greater, 21, 1, ">"), + T!(Number, 22, 1, "3"), + T!(Dot, 23, 1, "."), + T!(Eof, 24, 1, ""), ] ) } #[test] fn pct_enc_comment() { - let mut lexer = Lexer::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); + let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); assert_eq!( - lexer.get_tokens(), + lex_tokens(input).unwrap().1, vec![ - Token::new(PctEncoded, Span::new(0, 1, "%d4")), - Token::new(Whitespace, Span::new(3, 1, " ")), - Token::new(Ident, Span::new(4, 1, "this")), - Token::new(Whitespace, Span::new(8, 1, " ")), - Token::new(Ident, Span::new(9, 1, "should")), - Token::new(Whitespace, Span::new(15, 1, " ")), - Token::new(Ident, Span::new(16, 1, "be")), - Token::new(Whitespace, Span::new(18, 1, " ")), - Token::new(Ident, Span::new(19, 1, "a")), - Token::new(Whitespace, Span::new(20, 1, " ")), - Token::new(Ident, Span::new(21, 1, "comment")), - Token::new(Comma, Span::new(28, 1, ",")), - Token::new(Whitespace, Span::new(29, 1, "\n")), - Token::new( + T!(Comment, 0, 1, "%d4 this should be a comment,\n"), + T!( Comment, - Span::new( - 30, - 2, - "% but the lexer can't distinguish a percent encoded value\n" - ) + 30, + 2, + "% but the lexer can't distinguish a percent encoded value\n" ), - Token::new(Comment, Span::new(88, 3, "% in an iri from a comment :(\n")), - Token::new(Eof, Span::new(118, 4, "")), + T!(Comment, 88, 3, "% in an iri from a comment :(\n"), + T!(Eof, 118, 4, ""), + ] + ) + } + + #[test] + fn fact() { + let input = Span::new("somePred(term1, term2)."); + assert_eq!( + lex_tokens(input).unwrap().1, + vec![ + T!(Ident, 0, 1, "somePred"), + T!(OpenParen, 8, 1, "("), + T!(Ident, 9, 1, "term1"), + T!(Comma, 14, 1, ","), + T!(Whitespace, 15, 1, " "), + T!(Ident, 16, 1, "term2"), + T!(CloseParen, 21, 1, ")"), + T!(Dot, 22, 1, "."), + T!(Eof, 23, 1, ""), ] ) } From 2131e067d246d4c5179ede57f33497e3f6d29a85 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:31:26 +0100 Subject: [PATCH 064/214] Add ast module and start to add new parser functions --- nemo/src/io/parser.rs | 112 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index f90abdccb..cd90bb337 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -22,6 +22,7 @@ use nom::{ use macros::traced; +mod ast; mod types; use types::{ConstraintOperator, IntermediateResult, Span}; @@ -2426,3 +2427,114 @@ mod test { ); } } + +// NEW PARSER +mod new { + use super::ast::{self, Term}; + use super::types::Tokens; + use crate::io::lexer::{self, lex_ident, Span, TokenKind}; + use nom::{ + branch::alt, + bytes::complete::take, + combinator::verify, + multi::{many1, separated_list0}, + sequence::tuple, + IResult, + }; + + macro_rules! tag_token ( + ($func_name:ident, $tag: expr) => ( + fn $func_name(tokens: Tokens) -> IResult { + verify(take(1usize), |t: &Tokens| t.tok[0].kind == $tag)(tokens) + } + ) + ); + + // tag_token!(ident, TokenKind::Ident); + // tag_token!(open_paren, TokenKind::OpenParen); + // tag_token!(close_paren, TokenKind::CloseParen); + // tag_token!(comma, TokenKind::Comma); + // tag_token!(dot, TokenKind::Dot); + + fn parse_program<'a>(input: Span<'a>) -> ast::Program<'a> { + let (_, statements) = + // many1(alt((/*parse_rule, */parse_fact/*, parse_directive*/)))(input).unwrap(); + many1(parse_fact)(input).unwrap(); + dbg!(&statements); + let mut program = ast::Program::new(); + for statement in statements { + program.push(statement) + } + program + } + + // fn parse_rule<'a>(input: Tokens) -> IResult> {} + + fn parse_fact<'a>(input: Span<'a>) -> IResult> { + tuple(( + lex_ident, + lexer::open_paren, + separated_list0(lexer::comma, lex_ident), + lexer::close_paren, + lexer::dot, + ))(input) + .map(|(rest, result)| { + let mut terms = Vec::new(); + for x in result.2 { + terms.push(Term::Primitive(x)) + } + ( + rest, + ast::Statement::Fact { + atom: ast::Atom::Atom { + predicate: result.0, + terms, + }, + }, + ) + }) + } + + // fn parse_directive<'a>(input: Tokens) -> IResult> {} + + #[cfg(test)] + mod test { + use super::*; + use crate::io::{lexer::*, parser::ast::*}; + + macro_rules! S { + ($offset:literal,$line:literal,$str:literal) => { + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } + }; + } + + #[test] + fn fact() { + // let input = Tokens { + // tok: &lex_tokens(Span::new("a(B,C).")).unwrap().1, + // }; + let input = Span::new("a(B,C)."); + assert_eq!( + parse_program(input), + vec![ast::Statement::Fact { + atom: ast::Atom::Atom { + predicate: Token { + kind: TokenKind::Ident, + span: S!(0, 1, "a"), + }, + terms: vec![ + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(2, 1, "B"), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(4, 1, "C"), + }), + ], + }, + }] + ) + } + } +} From 592dfefed816c53d45f0e53b7ad1b74281fd3b0b Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:32:47 +0100 Subject: [PATCH 065/214] Add Tokens type --- nemo/src/io/parser/types.rs | 162 ++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index dc40559b2..a65730809 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -422,3 +422,165 @@ impl FromExternalError, DataValueCreationError> for LocatedParseError { ParseError::ExternalError(Box::new(e.into())).at(input) } } + +use crate::io::lexer::Token; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) struct Tokens<'a> { + pub(crate) tok: &'a [Token<'a>], +} +impl<'a> Tokens<'a> { + fn new(vec: &'a [Token]) -> Tokens<'a> { + Tokens { tok: vec } + } +} +impl<'a> nom::AsBytes for Tokens<'a> { + fn as_bytes(&self) -> &[u8] { + todo!() + } +} +impl<'a, T> nom::Compare for Tokens<'a> { + fn compare(&self, t: T) -> nom::CompareResult { + todo!() + } + + fn compare_no_case(&self, t: T) -> nom::CompareResult { + todo!() + } +} +// impl<'a> nom::ExtendInto for Tokens<'a> { +// type Item; + +// type Extender; + +// fn new_builder(&self) -> Self::Extender { +// todo!() +// } + +// fn extend_into(&self, acc: &mut Self::Extender) { +// todo!() +// } +// } +impl<'a, T> nom::FindSubstring for Tokens<'a> { + fn find_substring(&self, substr: T) -> Option { + todo!() + } +} +impl<'a, T> nom::FindToken for Tokens<'a> { + fn find_token(&self, token: T) -> bool { + todo!() + } +} +impl<'a> nom::InputIter for Tokens<'a> { + type Item = &'a Token<'a>; + + type Iter = std::iter::Enumerate<::std::slice::Iter<'a, Token<'a>>>; + + type IterElem = ::std::slice::Iter<'a, Token<'a>>; + + fn iter_indices(&self) -> Self::Iter { + self.tok.iter().enumerate() + } + + fn iter_elements(&self) -> Self::IterElem { + self.tok.iter() + } + + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + self.tok.iter().position(predicate) + } + + fn slice_index(&self, count: usize) -> Result { + if self.tok.len() >= count { + Ok(count) + } else { + Err(nom::Needed::Unknown) + } + } +} +impl<'a> nom::InputLength for Tokens<'a> { + fn input_len(&self) -> usize { + self.tok.len() + } +} +impl<'a> nom::InputTake for Tokens<'a> { + fn take(&self, count: usize) -> Self { + Tokens { + tok: &self.tok[0..count], + } + } + + fn take_split(&self, count: usize) -> (Self, Self) { + ( + Tokens { + tok: &self.tok[count..self.tok.len()], + }, + Tokens { + tok: &self.tok[0..count], + }, + ) + } +} +impl<'a> nom::InputTakeAtPosition for Tokens<'a> { + type Item = &'a Token<'a>; + + fn split_at_position>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn split_at_position1>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn split_at_position_complete>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn split_at_position1_complete>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } +} +impl<'a> nom::Offset for Tokens<'a> { + fn offset(&self, second: &Self) -> usize { + todo!() + } +} +impl<'a, R> nom::ParseTo for Tokens<'a> { + fn parse_to(&self) -> Option { + todo!() + } +} +impl<'a, R> nom::Slice for Tokens<'a> { + fn slice(&self, range: R) -> Self { + todo!() + } +} From 5c3743edc1dc6de9c4f43a4daccb4e4d11439ff4 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:34:02 +0100 Subject: [PATCH 066/214] Change mod visibility to pub(crate) --- nemo-physical/src/datavalues.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo-physical/src/datavalues.rs b/nemo-physical/src/datavalues.rs index e4f307c2b..37486431f 100644 --- a/nemo-physical/src/datavalues.rs +++ b/nemo-physical/src/datavalues.rs @@ -2,7 +2,7 @@ //! Data values are conceived on this level as canonical representations of unique (semantic) //! values across a number of domains (integers, strings, etc.). -pub(crate) mod syntax; +pub mod syntax; /// Module to define the general [DataValue] trait. pub mod datavalue; From 652107dea480011958c0b529583152e5a3c2b796 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 13 Mar 2024 08:35:00 +0100 Subject: [PATCH 067/214] Add import to physical syntax module --- nemo/src/model/rule_model/syntax.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/src/model/rule_model/syntax.rs b/nemo/src/model/rule_model/syntax.rs index e3b9cad78..7f2f38326 100644 --- a/nemo/src/model/rule_model/syntax.rs +++ b/nemo/src/model/rule_model/syntax.rs @@ -1,6 +1,7 @@ //! Constants for strings that are relevant to the syntax of rules. //! These are kept in one location, since they are required in various //! places related to parsing and display. +use nemo_physical::datavalues::syntax; /// The "predicate name" used for the CSV format in import/export directives. pub(crate) const FILE_FORMAT_CSV: &str = "csv"; From a7991bcae3070285ab6c023c753d59d08c385032 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 13:31:56 +0100 Subject: [PATCH 068/214] Rename unequals to unequal --- nemo/src/io/lexer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index c0b6ccb0d..5bf30cd6e 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -166,7 +166,7 @@ syntax!(less_equal, "<=", TokenKind::LessEqual); syntax!(equal, "=", TokenKind::Equal); syntax!(greater_equal, ">=", TokenKind::GreaterEqual); syntax!(greater, ">", TokenKind::Greater); -syntax!(unequals, "!=", TokenKind::Unequal); +syntax!(unequal, "!=", TokenKind::Unequal); syntax!(plus, "+", TokenKind::Plus); syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); @@ -176,7 +176,7 @@ pub(crate) fn lex_operators(input: Span) -> IResult { alt(( less_equal, greater_equal, - unequals, + unequal, less, equal, greater, From a18fdbae106e088aecf6034d2757bed63088636f Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 13:35:17 +0100 Subject: [PATCH 069/214] Unify functions and predicates with NamedTuple and refactor rule literals into NegativeAtom --- nemo/src/io/parser/ast.rs | 82 +++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 02c9c2d93..48589ed38 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -10,7 +10,7 @@ struct Position { pub(crate) type Program<'a> = Vec>; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Statement<'a> { Directive(Directive<'a>), Fact { @@ -18,11 +18,11 @@ pub(crate) enum Statement<'a> { }, Rule { head: Vec>, - body: Vec>, + body: Vec>, }, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Directive<'a> { Base { kw: Token<'a>, @@ -43,18 +43,17 @@ pub(crate) enum Directive<'a> { predicate: Token<'a>, map: Map<'a>, }, + // maybe will be deprecated Output { kw: Token<'a>, predicates: Vec>, }, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Atom<'a> { - Atom { - predicate: Token<'a>, - terms: Vec>, - }, + Atom(NamedTuple<'a>), + NegativeAtom(NamedTuple<'a>), InfixAtom { operation: Token<'a>, lhs: Term<'a>, @@ -63,47 +62,55 @@ pub(crate) enum Atom<'a> { Map(Map<'a>), } -#[derive(Debug, PartialEq)] -pub(crate) enum Literal<'a> { - Positive(Atom<'a>), - Negative(Atom<'a>), -} - -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Term<'a> { Primitive(Token<'a>), + Unary { + operation: Token<'a>, + term: Box>, + }, Binary { operation: Token<'a>, lhs: Box>, rhs: Box>, }, - Unary { - operation: Token<'a>, - term: Box>, - }, Aggregation { operation: Token<'a>, terms: Vec>, }, - Function { - identifier: Token<'a>, - terms: Vec>, - }, + Function(NamedTuple<'a>), Map(Map<'a>), } -#[derive(Debug, PartialEq)] -struct Map<'a> { - identifier: Option>, - pairs: BTreeMap, Term<'a>>, +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct NamedTuple<'a> { + pub(crate) identifier: Token<'a>, + pub(crate) terms: Vec>, +} + +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct Map<'a> { + pub(crate) identifier: Option>, + pub(crate) pairs: Vec, Term<'a>>>, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct Pair { + key: K, + value: V, +} +impl Pair { + pub fn new(key: K, value: V) -> Pair { + Pair { key, value } + } +} + +#[derive(Debug, PartialEq, Clone)] pub(crate) enum Node<'a> { Statement(&'a Statement<'a>), Directive(&'a Directive<'a>), RuleHead(&'a Vec>), - RuleBody(&'a Vec>), + RuleBody(&'a Vec>), Atom(&'a Atom<'a>), Term(&'a Term<'a>), Terms(&'a Vec>), @@ -118,7 +125,7 @@ pub(crate) enum Node<'a> { Lhs(&'a Term<'a>), Rhs(&'a Term<'a>), Identifier(&'a Token<'a>), - Pairs(&'a BTreeMap, Term<'a>>), + Pairs(&'a Vec, Term<'a>>>), MapIdentifier(&'a Option>), Primitive(&'a Token<'a>), } @@ -226,8 +233,17 @@ impl<'a> AstNode for Directive<'a> { impl<'a> AstNode for Atom<'a> { fn children(&self) -> Vec { match self { - Atom::Atom { predicate, terms } => { - vec![Node::KeyWord(predicate), Node::Terms(terms)] + Atom::Atom(named_tuple) => { + vec![ + Node::Identifier(&named_tuple.identifier), + Node::Terms(&named_tuple.terms), + ] + } + Atom::NegativeAtom(named_tuple) => { + vec![ + Node::Identifier(&named_tuple.identifier), + Node::Terms(&named_tuple.terms), + ] } Atom::InfixAtom { operation, @@ -256,7 +272,7 @@ impl<'a> AstNode for Term<'a> { Term::Aggregation { operation, terms } => { vec![Node::Operation(operation), Node::Terms(terms)] } - Term::Function { identifier, terms } => { + Term::Function(NamedTuple { identifier, terms }) => { vec![Node::Identifier(identifier), Node::Terms(terms)] } Term::Map(map) => map.children(), From 0bd00097a8fde12fe8eab3c8c4a2cabc1ded965c Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 18:57:09 +0100 Subject: [PATCH 070/214] Add Variable TokenKind --- nemo/src/io/lexer.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 5bf30cd6e..216f732bf 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -72,8 +72,10 @@ pub(crate) enum TokenKind { /// '/' Slash, // Multi-char tokens: - /// Identifier for keywords and predicate names + /// Identifier for keywords and names Ident, + /// Variable, + Variable, /// IRI, delimited with `<` and `>` Iri, /// Base 10 digits From 3428b13cfe2f348578539eef17860de2b7916552 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 18:58:38 +0100 Subject: [PATCH 071/214] Add Variable Term and change Pair field visibility --- nemo/src/io/parser/ast.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 48589ed38..7a2a2eebd 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -65,6 +65,7 @@ pub(crate) enum Atom<'a> { #[derive(Debug, PartialEq, Clone)] pub(crate) enum Term<'a> { Primitive(Token<'a>), + Variable(Token<'a>), Unary { operation: Token<'a>, term: Box>, @@ -96,8 +97,8 @@ pub(crate) struct Map<'a> { #[derive(Debug, PartialEq, Clone)] pub(crate) struct Pair { - key: K, - value: V, + pub(crate) key: K, + pub(crate) value: V, } impl Pair { pub fn new(key: K, value: V) -> Pair { @@ -128,6 +129,7 @@ pub(crate) enum Node<'a> { Pairs(&'a Vec, Term<'a>>>), MapIdentifier(&'a Option>), Primitive(&'a Token<'a>), + Variable(&'a Token<'a>), } trait AstNode { @@ -262,7 +264,8 @@ impl<'a> AstNode for Atom<'a> { impl<'a> AstNode for Term<'a> { fn children(&self) -> Vec { match self { - Term::Primitive(prim) => vec![Node::Primitive(prim)], + Term::Primitive(primitive) => vec![Node::Primitive(primitive)], + Term::Variable(var) => vec![Node::Variable(var)], Term::Binary { operation, lhs, From a74e9803f51355c14d0244c41fb3d510b63de781 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 18:59:21 +0100 Subject: [PATCH 072/214] Add str_concat crate --- Cargo.lock | 7 +++++++ nemo/Cargo.toml | 1 + 2 files changed, 8 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 46134e7f8..50e220b21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1183,6 +1183,7 @@ dependencies = [ "sanitise-file-name", "serde", "serde_json", + "str-concat", "test-log", "thiserror", "tokio", @@ -2071,6 +2072,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "str-concat" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3468939e48401c4fe3cdf5e5cef50951c2808ed549d1467fde249f1fcb602634" + [[package]] name = "streaming-iterator" version = "0.1.9" diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 173a3de22..411883468 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -46,6 +46,7 @@ serde_json = "1.0.108" serde = {version = "1.0.138", features = ["derive"] } dyn-clone = "1.0.16" unicode-ident = "1.0.12" +str-concat = "0.2.0" [dev-dependencies] env_logger = "*" From 7048c1bcfb6b7f61f03dede3b534434c82871cbf Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 19 Mar 2024 19:02:00 +0100 Subject: [PATCH 073/214] Add parser functions and tests --- nemo/src/io/parser.rs | 484 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 445 insertions(+), 39 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index cd90bb337..67121271b 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2428,11 +2428,23 @@ mod test { } } -// NEW PARSER +/// NEW PARSER mod new { - use super::ast::{self, Term}; + use std::collections::{BTreeMap, HashMap}; + use std::ffi::c_ushort; + + use super::ast::{self, NamedTuple, Pair, Term}; use super::types::Tokens; - use crate::io::lexer::{self, lex_ident, Span, TokenKind}; + use crate::io::lexer::{ + self, close_brace, comma, equal, greater, greater_equal, less, less_equal, lex_comment, + lex_doc_comment, lex_ident, lex_iri, lex_number, lex_string, lex_whitespace, open_brace, + unequal, Span, Token, TokenKind, + }; + use nom::combinator::{all_consuming, opt}; + use nom::error::ParseError; + use nom::multi::{many0, separated_list1}; + use nom::sequence::{delimited, pair}; + use nom::Parser; use nom::{ branch::alt, bytes::complete::take, @@ -2442,25 +2454,42 @@ mod new { IResult, }; - macro_rules! tag_token ( - ($func_name:ident, $tag: expr) => ( - fn $func_name(tokens: Tokens) -> IResult { - verify(take(1usize), |t: &Tokens| t.tok[0].kind == $tag)(tokens) - } - ) - ); + fn combine_spans<'a>(span1: Span<'a>, span2: Span<'a>) -> Result, str_concat::Error> { + // SAFETY: The concatenation of strings is okay, because they originated from the same source string. + // The raw offset is okay, because the offset of another span is used. + unsafe { + let fragment = str_concat::concat(span1.fragment(), span2.fragment())?; + Ok(Span::new_from_raw_offset( + span1.location_offset(), + span1.location_line(), + fragment, + (), + )) + } + } - // tag_token!(ident, TokenKind::Ident); - // tag_token!(open_paren, TokenKind::OpenParen); - // tag_token!(close_paren, TokenKind::CloseParen); - // tag_token!(comma, TokenKind::Comma); - // tag_token!(dot, TokenKind::Dot); + fn ignore_ws_and_comments<'a, F, O>( + inner: F, + ) -> impl FnMut(Span<'a>) -> IResult, O, nom::error::Error>> + where + F: Parser, O, nom::error::Error>> + + std::ops::FnMut(Span<'a>) -> IResult, O, nom::error::Error>>, + { + delimited( + many0(alt((lex_whitespace, lex_comment, lex_doc_comment))), + inner, + many0(alt((lex_whitespace, lex_comment, lex_doc_comment))), + ) + } fn parse_program<'a>(input: Span<'a>) -> ast::Program<'a> { - let (_, statements) = - // many1(alt((/*parse_rule, */parse_fact/*, parse_directive*/)))(input).unwrap(); - many1(parse_fact)(input).unwrap(); - dbg!(&statements); + let (_, statements) = all_consuming(many1(ignore_ws_and_comments(alt(( + parse_fact, + parse_rule, + parse_directive, + )))))(input) + .unwrap(); + // many0(parse_fact)(input).unwrap(); let mut program = ast::Program::new(); for statement in statements { program.push(statement) @@ -2468,34 +2497,245 @@ mod new { program } - // fn parse_rule<'a>(input: Tokens) -> IResult> {} - fn parse_fact<'a>(input: Span<'a>) -> IResult> { tuple(( - lex_ident, - lexer::open_paren, - separated_list0(lexer::comma, lex_ident), - lexer::close_paren, - lexer::dot, + ignore_ws_and_comments(parse_named_tuple), + ignore_ws_and_comments(lexer::dot), ))(input) - .map(|(rest, result)| { - let mut terms = Vec::new(); - for x in result.2 { - terms.push(Term::Primitive(x)) - } + .map(|(rest, (atom, _))| { ( rest, ast::Statement::Fact { - atom: ast::Atom::Atom { - predicate: result.0, - terms, - }, + atom: ast::Atom::Atom(atom), }, ) }) } - // fn parse_directive<'a>(input: Tokens) -> IResult> {} + fn parse_rule<'a>(input: Span<'a>) -> IResult> { + tuple(( + ignore_ws_and_comments(separated_list1( + lexer::comma, + ignore_ws_and_comments(parse_named_tuple), + )), + ignore_ws_and_comments(lexer::arrow), + ignore_ws_and_comments(separated_list1( + lexer::comma, + ignore_ws_and_comments(pair(opt(lexer::tilde), parse_named_tuple)), + )), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (head, _, body, _))| { + ( + rest, + ast::Statement::Rule { + head: head.iter().map(|x| ast::Atom::Atom(x.clone())).collect(), + body: body + .iter() + .map(|(tilde, atom)| { + if let None = tilde { + ast::Atom::Atom(atom.clone()) + } else { + ast::Atom::NegativeAtom(atom.clone()) + } + }) + .collect(), + }, + ) + }) + } + + fn parse_directive<'a>(input: Span<'a>) -> IResult> { + alt(( + ignore_ws_and_comments(parse_base_directive), + ignore_ws_and_comments(parse_prefix_directive), + ignore_ws_and_comments(parse_import_directive), + ignore_ws_and_comments(parse_export_directive), + ignore_ws_and_comments(parse_output_directive), + ))(input) + .map(|(rest, directive)| (rest, ast::Statement::Directive(directive))) + } + + fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Base), + ignore_ws_and_comments(lex_iri), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, base_iri, _))| (rest, ast::Directive::Base { kw, base_iri })) + } + + fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), + ignore_ws_and_comments(lex_ident), + ignore_ws_and_comments(lexer::colon), + ignore_ws_and_comments(lex_iri), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, prefix, _, prefix_iri, _))| { + ( + rest, + ast::Directive::Prefix { + kw, + prefix, + prefix_iri, + }, + ) + }) + } + + fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Import), + ignore_ws_and_comments(lex_ident), + ignore_ws_and_comments(lexer::arrow), + ignore_ws_and_comments(parse_map), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, predicate, _, map, _))| { + (rest, ast::Directive::Import { kw, predicate, map }) + }) + } + + fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Export), + ignore_ws_and_comments(lex_ident), + ignore_ws_and_comments(lexer::arrow), + ignore_ws_and_comments(parse_map), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, predicate, _, map, _))| { + (rest, ast::Directive::Export { kw, predicate, map }) + }) + } + + fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { + tuple(( + lexer::at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + ignore_ws_and_comments(separated_list0( + lexer::comma, + ignore_ws_and_comments(lex_ident), + )), + ignore_ws_and_comments(lexer::dot), + ))(input) + .map(|(rest, (_, kw, predicates, _))| (rest, ast::Directive::Output { kw, predicates })) + } + + fn parse_atom<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_atom`!") + } + + fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_negative_atom`!") + } + + fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { + tuple((parse_term, parse_operation_token, parse_term))(input).map( + |(rest, (lhs, operation, rhs))| { + ( + rest, + ast::Atom::InfixAtom { + operation, + lhs, + rhs, + }, + ) + }, + ) + } + + fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + tuple(( + lex_ident, + lexer::open_paren, + // ignore_ws_and_comments(separated_list0(lexer::comma, parse_term)), + ignore_ws_and_comments(separated_list0(comma, ignore_ws_and_comments(parse_term))), + ignore_ws_and_comments(lexer::close_paren), + ))(input) + .map(|(rest, (identifier, _, terms, _))| (rest, NamedTuple { identifier, terms })) + } + + fn parse_map<'a>(input: Span<'a>) -> IResult> { + tuple(( + opt(lex_ident), + ignore_ws_and_comments(open_brace), + separated_list0( + ignore_ws_and_comments(comma), + ignore_ws_and_comments(tuple((parse_term, equal, parse_term))), + ), + ignore_ws_and_comments(close_brace), + ))(input) + .map(|(rest, (identifier, _, vec_of_pairs, _))| { + let mut pairs = Vec::new(); + for (key, _, value) in vec_of_pairs { + pairs.push(Pair::new(key, value)); + } + (rest, ast::Map { identifier, pairs }) + }) + } + + fn parse_term<'a>(input: Span<'a>) -> IResult> { + // alt(( + // parse_primitive_term, + // parse_unary_term, + // parse_binary_term, + // parse_aggregation_term, + // parse_function_term, + // parse_map_term, + // ))(input) + ignore_ws_and_comments(alt((parse_primitive_term, parse_variable)))(input) + } + + fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { + alt((lex_ident, lex_iri, lex_number, lex_string))(input) + .map(|(rest, term)| (rest, ast::Term::Primitive(term))) + } + + fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_unary_term`!") + } + + fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_binary_term`!") + } + + fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_aggregation_term`!") + } + + fn parse_function_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_function_term`!") + } + + fn parse_map_term<'a>(input: Span<'a>) -> IResult> { + todo!("`parse_map_term`!") + } + + fn parse_variable<'a>(input: Span<'a>) -> IResult> { + ignore_ws_and_comments(pair(lexer::question_mark, lex_ident))(input).map( + |(rest, (question_mark, ident))| { + ( + rest, + ast::Term::Variable(Token { + kind: TokenKind::Variable, + span: combine_spans(question_mark.span, ident.span) + .expect("Spans were not adjacent in memory"), + }), + ) + }, + ) + } + + fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { + alt((equal, unequal, less, less_equal, greater, greater_equal))(input) + } #[cfg(test)] mod test { @@ -2517,8 +2757,8 @@ mod new { assert_eq!( parse_program(input), vec![ast::Statement::Fact { - atom: ast::Atom::Atom { - predicate: Token { + atom: ast::Atom::Atom(NamedTuple { + identifier: Token { kind: TokenKind::Ident, span: S!(0, 1, "a"), }, @@ -2532,9 +2772,175 @@ mod new { span: S!(4, 1, "C"), }), ], - }, + }), }] ) } + + #[test] + fn syntax() { + let input = Span::new( + r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a."#, + ); + assert_eq!( + parse_program(input), + vec![ + ast::Statement::Directive(Directive::Base { + kw: Token { + kind: TokenKind::Base, + span: S!(1, 1, "base"), + }, + base_iri: Token { + kind: TokenKind::Iri, + span: S!(6, 1, "") + } + }), + ast::Statement::Directive(Directive::Prefix { + kw: Token { + kind: TokenKind::Prefix, + span: S!(33, 1, "prefix"), + }, + prefix: Token { + kind: TokenKind::Ident, + span: S!(40, 1, "rdfs"), + }, + prefix_iri: Token { + kind: TokenKind::Iri, + span: S!(45, 1, ""), + }, + }), + ast::Statement::Directive(Directive::Import { + kw: Token { + kind: TokenKind::Import, + span: S!(86, 1, "import"), + }, + predicate: Token { + kind: TokenKind::Ident, + span: S!(93, 1, "sourceA"), + }, + map: Map { + identifier: Some(Token { + kind: TokenKind::Ident, + span: S!(102, 1, "csv") + }), + pairs: vec![Pair { + key: Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(106, 1, "resource"), + }), + value: Term::Primitive(Token { + kind: TokenKind::String, + span: S!(115, 1, "\"sources/dataA.csv\""), + }) + }], + }, + }), + ast::Statement::Directive(Directive::Export { + kw: Token { + kind: TokenKind::Export, + span: S!(137, 1, "export"), + }, + predicate: Token { + kind: TokenKind::Ident, + span: S!(144, 1, "a"), + }, + map: Map { + identifier: Some(Token { + kind: TokenKind::Ident, + span: S!(147, 1, "csv"), + }), + pairs: vec![] + } + }), + ast::Statement::Directive(Directive::Output { + kw: Token { + kind: TokenKind::Output, + span: S!(154, 1, "output") + }, + predicates: vec![Token { + kind: TokenKind::Ident, + span: S!(161, 1, "a") + }] + }), + ] + ) + } + + #[test] + fn ignore_ws_and_comments() { + let input = Span::new(" Hi %cool comment\n"); + assert_eq!( + super::ignore_ws_and_comments(lex_ident)(input), + Ok(( + S!(22, 2, ""), + Token { + kind: TokenKind::Ident, + span: S!(3, 1, "Hi") + } + )) + ) + } + + #[test] + fn fact_with_ws() { + let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); + assert_eq!( + parse_program(input), + vec![ast::Statement::Fact { + atom: Atom::Atom(NamedTuple { + identifier: Token { + kind: TokenKind::Ident, + span: S!(0, 1, "some"), + }, + terms: vec![ + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(5, 1, "Fact") + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(11, 1, "with") + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(17, 1, "whitespace") + }), + ] + }) + }] + ) + } + + #[test] + fn combine_spans() { + use nom::bytes::complete::tag; + let source = "Some Input ;)"; + let input = Span::new(source); + let (input, first) = tag::<&str, Span, nom::error::Error<_>>("Some ")(input).unwrap(); + let (input, second) = tag::<&str, Span, nom::error::Error<_>>("Input")(input).unwrap(); + let span = super::combine_spans(first, second); + assert_eq!(span, Ok(Span::new("Some Input"))) + } + + #[test] + fn combine_spans_error() { + use nom::bytes::complete::tag; + let source = "Some Input ;)"; + let input = Span::new(source); + let (input, first) = tag::<&str, Span, nom::error::Error<_>>("Some")(input).unwrap(); + let (input, _) = tag::<&str, Span, nom::error::Error<_>>(" ")(input).unwrap(); + let (input, second) = tag::<&str, Span, nom::error::Error<_>>("Input")(input).unwrap(); + let span = super::combine_spans(first, second); + assert_eq!(span, Err(str_concat::Error::NotAdjacent)) + } + + #[test] + fn parser_test() { + let str = + std::fs::read_to_string("../testfile.rls").expect("`../testfile.rls` not found"); + let input = Span::new(str.as_str()); + dbg!(parse_program(input)); + // assert!(false); + } } } From 48829f63675ee65a04c63e7fc4879bdfa3b975fb Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 09:05:34 +0200 Subject: [PATCH 074/214] Extend Ast nodes and implement AstNode trait --- Cargo.lock | 7 - nemo/Cargo.toml | 1 - nemo/src/io/lexer.rs | 127 ++- nemo/src/io/parser.rs | 1146 ++++++++++++++++++------- nemo/src/io/parser/ast.rs | 549 ++++++------ nemo/src/io/parser/ast/atom.rs | 62 ++ nemo/src/io/parser/ast/directive.rs | 212 +++++ nemo/src/io/parser/ast/map.rs | 72 ++ nemo/src/io/parser/ast/named_tuple.rs | 40 + nemo/src/io/parser/ast/program.rs | 29 + nemo/src/io/parser/ast/statement.rs | 96 +++ nemo/src/io/parser/ast/term.rs | 103 +++ 12 files changed, 1820 insertions(+), 624 deletions(-) create mode 100644 nemo/src/io/parser/ast/atom.rs create mode 100644 nemo/src/io/parser/ast/directive.rs create mode 100644 nemo/src/io/parser/ast/map.rs create mode 100644 nemo/src/io/parser/ast/named_tuple.rs create mode 100644 nemo/src/io/parser/ast/program.rs create mode 100644 nemo/src/io/parser/ast/statement.rs create mode 100644 nemo/src/io/parser/ast/term.rs diff --git a/Cargo.lock b/Cargo.lock index 50e220b21..46134e7f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1183,7 +1183,6 @@ dependencies = [ "sanitise-file-name", "serde", "serde_json", - "str-concat", "test-log", "thiserror", "tokio", @@ -2072,12 +2071,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "str-concat" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3468939e48401c4fe3cdf5e5cef50951c2808ed549d1467fde249f1fcb602634" - [[package]] name = "streaming-iterator" version = "0.1.9" diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 411883468..173a3de22 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -46,7 +46,6 @@ serde_json = "1.0.108" serde = {version = "1.0.138", features = ["derive"] } dyn-clone = "1.0.16" unicode-ident = "1.0.12" -str-concat = "0.2.0" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 216f732bf..521b6b82c 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -5,12 +5,14 @@ use nom::{ bytes::complete::{is_not, tag, take}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, combinator::{all_consuming, map, recognize}, - multi::many0, + multi::{many0, many1}, sequence::{delimited, pair, tuple}, IResult, }; use nom_locate::LocatedSpan; +use super::parser::ast::Position; + pub(crate) type Span<'a> = LocatedSpan<&'a str>; /// All the tokens the input gets parsed into. @@ -86,6 +88,8 @@ pub(crate) enum TokenKind { Comment, /// A comment, starting with `%%` DocComment, + /// A comment, starting with `%!` + TlDocComment, /// ` `, `\t`, `\r` or `\n` Whitespace, /// base directive keyword @@ -98,11 +102,63 @@ pub(crate) enum TokenKind { Export, /// output directive keyword Output, + /// Ident for prefixes + PrefixIdent, /// catch all token Illegal, /// signals end of file Eof, } +impl std::fmt::Display for TokenKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TokenKind::QuestionMark => write!(f, "QuestionMark"), + TokenKind::ExclamationMark => write!(f, "ExclamationMark"), + TokenKind::OpenParen => write!(f, "OpenParen"), + TokenKind::CloseParen => write!(f, "CloseParen"), + TokenKind::OpenBracket => write!(f, "OpenBracket"), + TokenKind::CloseBracket => write!(f, "CloseBracket"), + TokenKind::OpenBrace => write!(f, "OpenBrace"), + TokenKind::CloseBrace => write!(f, "CloseBrace"), + TokenKind::Dot => write!(f, "Dot"), + TokenKind::Comma => write!(f, "Comma"), + TokenKind::Colon => write!(f, "Colon"), + TokenKind::Arrow => write!(f, "Arrow"), + TokenKind::Greater => write!(f, "Greater"), + TokenKind::GreaterEqual => write!(f, "GreaterEqual"), + TokenKind::Equal => write!(f, "Equal"), + TokenKind::LessEqual => write!(f, "LessEqual"), + TokenKind::Less => write!(f, "Less"), + TokenKind::Unequal => write!(f, "Unequal"), + TokenKind::Tilde => write!(f, "Tilde"), + TokenKind::Caret => write!(f, "Caret"), + TokenKind::Hash => write!(f, "Hash"), + TokenKind::Underscore => write!(f, "Underscore"), + TokenKind::At => write!(f, "At"), + TokenKind::Plus => write!(f, "Plus"), + TokenKind::Minus => write!(f, "Minus"), + TokenKind::Star => write!(f, "Star"), + TokenKind::Slash => write!(f, "Slash"), + TokenKind::Ident => write!(f, "Ident"), + TokenKind::Variable => write!(f, "Variable"), + TokenKind::Iri => write!(f, "Iri"), + TokenKind::Number => write!(f, "Number"), + TokenKind::String => write!(f, "String"), + TokenKind::Comment => write!(f, "Comment"), + TokenKind::DocComment => write!(f, "DocComment"), + TokenKind::TlDocComment => write!(f, "TlDocComment"), + TokenKind::Whitespace => write!(f, "Whitespace"), + TokenKind::Base => write!(f, "Base"), + TokenKind::Prefix => write!(f, "Prefix"), + TokenKind::Import => write!(f, "Import"), + TokenKind::Export => write!(f, "Export"), + TokenKind::Output => write!(f, "Output"), + TokenKind::PrefixIdent => write!(f, "PrefixIdent"), + TokenKind::Illegal => write!(f, "Illegal"), + TokenKind::Eof => write!(f, "Eof"), + } + } +} #[derive(Debug, Copy, Clone, PartialEq)] pub(crate) struct Token<'a> { @@ -110,10 +166,43 @@ pub(crate) struct Token<'a> { pub(crate) span: Span<'a>, } impl<'a> Token<'a> { - fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { + pub(crate) fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { Token { kind, span } } } +impl std::fmt::Display for Token<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let offset = self.span.location_offset(); + let line = self.span.location_line(); + let fragment = self.span.fragment(); + if self.span.extra == () { + write!(f, "T!{{{0}, S!({offset}, {line}, {fragment})}}", self.kind) + } else { + write!( + f, + "T!{{{0}, S!({offset}, {line}, {fragment}, {1:?})}}", + self.kind, self.span.extra + ) + } + } +} +impl<'a> crate::io::parser::ast::AstNode for Token<'a> { + fn children(&self) -> Option> { + None::> + } + + fn span(&self) -> Span { + self.span + } + + // fn position(&self) -> Position { + // Position { + // offset: self.span.location_offset(), + // line: self.span.location_line(), + // column: self.span.get_column() as u32, + // } + // } +} macro_rules! syntax { ($func_name: ident, $tag_string: literal, $token: expr) => { @@ -220,15 +309,24 @@ pub(crate) fn lex_string(input: Span) -> IResult { } pub(crate) fn lex_comment(input: Span) -> IResult { - recognize(tuple((tag("%"), many0(is_not("\r\n")), line_ending)))(input) + recognize(tuple((tag("%"), many0(is_not("\n")), line_ending)))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result))) } pub(crate) fn lex_doc_comment(input: Span) -> IResult { - recognize(tuple((tag("%%"), many0(is_not("\r\n")), line_ending)))(input) + recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result))) } +pub(crate) fn lex_toplevel_doc_comment(input: Span) -> IResult { + recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending))))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result))) +} + +pub(crate) fn lex_comments(input: Span) -> IResult { + alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment))(input) +} + pub(crate) fn lex_whitespace(input: Span) -> IResult { multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result))) } @@ -245,7 +343,7 @@ pub(crate) fn lex_tokens(input: Span) -> IResult> { lex_ident, lex_number, lex_string, - lex_comment, + lex_comments, lex_whitespace, lex_illegal, ))))(input) @@ -398,12 +496,13 @@ mod test { #[test] fn comment() { - let input = Span::new("% Some Comment\n"); + let input = Span::new(" % Some Comment\n"); assert_eq!( lex_tokens(input).unwrap().1, vec![ - T!(Comment, 0, 1, "% Some Comment\n"), - T!(Eof, 15, 2, ""), + T!(Whitespace, 0, 1, " "), + T!(Comment, 4, 1, "% Some Comment\n"), + T!(Eof, 19, 2, ""), // T!(Comment, Span::new(0, 1, "% Some Comment\n")), // T!(Eof, Span::new(15, 2, "")) ] @@ -544,4 +643,16 @@ mod test { ] ) } + + #[test] + fn whitespace() { + let input = Span::new(" \t \n\n\t \n"); + assert_eq!( + lex_tokens(input).unwrap().1, + vec![ + T!(Whitespace, 0, 1, " \t \n\n\t \n"), + T!(Eof, 12, 4, ""), + ] + ) + } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 67121271b..b29afe701 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -22,7 +22,7 @@ use nom::{ use macros::traced; -mod ast; +pub(crate) mod ast; mod types; use types::{ConstraintOperator, IntermediateResult, Span}; @@ -2430,41 +2430,36 @@ mod test { /// NEW PARSER mod new { - use std::collections::{BTreeMap, HashMap}; - use std::ffi::c_ushort; - - use super::ast::{self, NamedTuple, Pair, Term}; - use super::types::Tokens; + use super::ast::{ + atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, List, + }; use crate::io::lexer::{ - self, close_brace, comma, equal, greater, greater_equal, less, less_equal, lex_comment, - lex_doc_comment, lex_ident, lex_iri, lex_number, lex_string, lex_whitespace, open_brace, - unequal, Span, Token, TokenKind, + arrow, at, close_brace, close_paren, colon, comma, dot, equal, greater, greater_equal, + less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, + lex_operators, lex_string, lex_toplevel_doc_comment, lex_whitespace, open_brace, + open_paren, question_mark, tilde, unequal, Span, Token, TokenKind, }; - use nom::combinator::{all_consuming, opt}; - use nom::error::ParseError; - use nom::multi::{many0, separated_list1}; + use nom::combinator::{all_consuming, opt, recognize}; use nom::sequence::{delimited, pair}; use nom::Parser; use nom::{ branch::alt, - bytes::complete::take, combinator::verify, - multi::{many1, separated_list0}, + multi::{many0, many1, separated_list0}, sequence::tuple, IResult, }; - fn combine_spans<'a>(span1: Span<'a>, span2: Span<'a>) -> Result, str_concat::Error> { - // SAFETY: The concatenation of strings is okay, because they originated from the same source string. - // The raw offset is okay, because the offset of another span is used. + fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { unsafe { - let fragment = str_concat::concat(span1.fragment(), span2.fragment())?; - Ok(Span::new_from_raw_offset( - span1.location_offset(), - span1.location_line(), - fragment, + let span = Span::new_from_raw_offset( + input.location_offset(), + input.location_line(), + &input[..(rest_input.location_offset() - input.location_offset())], (), - )) + ); + // dbg!(&input, &span, &rest_input); + span } } @@ -2473,264 +2468,575 @@ mod new { ) -> impl FnMut(Span<'a>) -> IResult, O, nom::error::Error>> where F: Parser, O, nom::error::Error>> - + std::ops::FnMut(Span<'a>) -> IResult, O, nom::error::Error>>, + + FnMut(Span<'a>) -> IResult, O, nom::error::Error>>, { delimited( - many0(alt((lex_whitespace, lex_comment, lex_doc_comment))), + many0(alt((lex_whitespace, lex_comment))), inner, - many0(alt((lex_whitespace, lex_comment, lex_doc_comment))), + many0(alt((lex_whitespace, lex_comment))), ) } - fn parse_program<'a>(input: Span<'a>) -> ast::Program<'a> { - let (_, statements) = all_consuming(many1(ignore_ws_and_comments(alt(( - parse_fact, - parse_rule, - parse_directive, - )))))(input) - .unwrap(); - // many0(parse_fact)(input).unwrap(); - let mut program = ast::Program::new(); - for statement in statements { - program.push(statement) + fn parse_program<'a>(input: Span<'a>) -> Program<'a> { + let span = input.clone(); + let (_, (tl_doc_comment, statements)) = all_consuming(pair( + opt(lex_toplevel_doc_comment), + many1(alt(( + parse_fact, + parse_rule, + parse_whitespace, + parse_directive, + parse_comment, + ))), + ))(input) + .expect("Expect EOF"); + Program { + span, + tl_doc_comment, + statements, } - program } - fn parse_fact<'a>(input: Span<'a>) -> IResult> { + fn parse_whitespace<'a>(input: Span<'a>) -> IResult> { + lex_whitespace(input).map(|(rest, ws)| (rest, Statement::Whitespace(ws))) + } + + fn parse_comment<'a>(input: Span<'a>) -> IResult> { + lex_comment(input).map(|(rest, comment)| (rest, Statement::Comment(comment))) + } + + fn parse_fact<'a>(input: Span<'a>) -> IResult> { + let input_span = input; tuple(( - ignore_ws_and_comments(parse_named_tuple), - ignore_ws_and_comments(lexer::dot), + opt(lex_doc_comment), + parse_normal_atom, + opt(lex_whitespace), + dot, ))(input) - .map(|(rest, (atom, _))| { + .map(|(rest_input, (doc_comment, atom, ws, dot))| { ( - rest, - ast::Statement::Fact { - atom: ast::Atom::Atom(atom), + rest_input, + Statement::Fact { + span: outer_span(input_span, rest_input), + doc_comment, + atom, + ws, + dot, }, ) }) } - fn parse_rule<'a>(input: Span<'a>) -> IResult> { + fn parse_rule<'a>(input: Span<'a>) -> IResult> { + let input_span = input; tuple(( - ignore_ws_and_comments(separated_list1( - lexer::comma, - ignore_ws_and_comments(parse_named_tuple), - )), - ignore_ws_and_comments(lexer::arrow), - ignore_ws_and_comments(separated_list1( - lexer::comma, - ignore_ws_and_comments(pair(opt(lexer::tilde), parse_named_tuple)), + opt(lex_doc_comment), + parse_head, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_body, + opt(lex_whitespace), + dot, + ))(input) + .map( + |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { + ( + rest_input, + Statement::Rule { + span: outer_span(input_span, rest_input), + doc_comment, + head, + ws1, + arrow, + ws2, + body, + ws3, + dot, + }, + ) + }, + ) + } + + fn parse_head<'a>(input: Span<'a>) -> IResult>> { + parse_atom_list(input, parse_head_atoms) + } + + fn parse_body<'a>(input: Span<'a>) -> IResult>> { + parse_atom_list(input, parse_body_atoms) + } + + fn parse_directive<'a>(input: Span<'a>) -> IResult> { + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, + ))(input) + .map(|(rest, directive)| (rest, Statement::Directive(directive))) + } + + fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Base), )), - ignore_ws_and_comments(lexer::dot), + opt(lex_whitespace), + lex_iri, + opt(lex_whitespace), + dot, ))(input) - .map(|(rest, (head, _, body, _))| { + .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { ( - rest, - ast::Statement::Rule { - head: head.iter().map(|x| ast::Atom::Atom(x.clone())).collect(), - body: body - .iter() - .map(|(tilde, atom)| { - if let None = tilde { - ast::Atom::Atom(atom.clone()) - } else { - ast::Atom::NegativeAtom(atom.clone()) - } - }) - .collect(), + rest_input, + Directive::Base { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Base, + span: kw, + }, + ws1, + base_iri, + ws2, + dot, }, ) }) } - fn parse_directive<'a>(input: Span<'a>) -> IResult> { - alt(( - ignore_ws_and_comments(parse_base_directive), - ignore_ws_and_comments(parse_prefix_directive), - ignore_ws_and_comments(parse_import_directive), - ignore_ws_and_comments(parse_export_directive), - ignore_ws_and_comments(parse_output_directive), + fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), + )), + opt(lex_whitespace), + recognize(pair(lex_ident, colon)), + opt(lex_whitespace), + lex_iri, + opt(lex_whitespace), + dot, + ))(input) + .map( + |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { + ( + rest_input, + Directive::Prefix { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Prefix, + span: kw, + }, + ws1, + prefix: Token { + kind: TokenKind::Ident, + span: prefix, + }, + ws2, + prefix_iri, + ws3, + dot, + }, + ) + }, + ) + } + + fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Import), + )), + lex_whitespace, + lex_ident, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + dot, ))(input) - .map(|(rest, directive)| (rest, ast::Statement::Directive(directive))) + .map( + |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { + ( + rest_input, + Directive::Import { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Import, + span: kw, + }, + ws1, + predicate, + ws2, + arrow, + ws3, + map, + ws4, + dot, + }, + ) + }, + ) } - fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { + fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Base), - ignore_ws_and_comments(lex_iri), - ignore_ws_and_comments(lexer::dot), + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Export), + )), + lex_whitespace, + lex_ident, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + dot, ))(input) - .map(|(rest, (_, kw, base_iri, _))| (rest, ast::Directive::Base { kw, base_iri })) + .map( + |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { + ( + rest_input, + Directive::Export { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Export, + span: kw, + }, + ws1, + predicate, + ws2, + arrow, + ws3, + map, + ws4, + dot, + }, + ) + }, + ) } - fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { + fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), - ignore_ws_and_comments(lex_ident), - ignore_ws_and_comments(lexer::colon), - ignore_ws_and_comments(lex_iri), - ignore_ws_and_comments(lexer::dot), + opt(lex_doc_comment), + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + ignore_ws_and_comments(separated_list0(comma, ignore_ws_and_comments(lex_ident))), + ignore_ws_and_comments(dot), ))(input) - .map(|(rest, (_, kw, prefix, _, prefix_iri, _))| { + .map(|(rest_input, (doc_comment, _, kw, predicates, _))| { ( - rest, - ast::Directive::Prefix { + rest_input, + Directive::Output { + span: outer_span(input_span, rest_input), + doc_comment, kw, - prefix, - prefix_iri, + predicates, }, ) }) } - fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { - tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Import), - ignore_ws_and_comments(lex_ident), - ignore_ws_and_comments(lexer::arrow), - ignore_ws_and_comments(parse_map), - ignore_ws_and_comments(lexer::dot), - ))(input) - .map(|(rest, (_, kw, predicate, _, map, _))| { - (rest, ast::Directive::Import { kw, predicate, map }) + fn parse_atom_list<'a>( + input: Span<'a>, + parse_atom: fn(Span<'a>) -> IResult>, + ) -> IResult>> { + let input_span = input.clone(); + pair( + parse_atom, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_atom, + ))), + )(input) + .map(|(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input_span, rest_input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) }) } - fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { - tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Export), - ignore_ws_and_comments(lex_ident), - ignore_ws_and_comments(lexer::arrow), - ignore_ws_and_comments(parse_map), - ignore_ws_and_comments(lexer::dot), - ))(input) - .map(|(rest, (_, kw, predicate, _, map, _))| { - (rest, ast::Directive::Export { kw, predicate, map }) - }) + fn parse_head_atoms<'a>(input: Span<'a>) -> IResult> { + alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) } - fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { - tuple(( - lexer::at, - verify(lex_ident, |token| token.kind == TokenKind::Output), - ignore_ws_and_comments(separated_list0( - lexer::comma, - ignore_ws_and_comments(lex_ident), - )), - ignore_ws_and_comments(lexer::dot), + fn parse_body_atoms<'a>(input: Span<'a>) -> IResult> { + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, ))(input) - .map(|(rest, (_, kw, predicates, _))| (rest, ast::Directive::Output { kw, predicates })) } - fn parse_atom<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_atom`!") + fn parse_normal_atom<'a>(input: Span<'a>) -> IResult> { + parse_named_tuple(input) + .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) + } + + fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { + ( + rest_input, + Atom::Negative { + span: outer_span(input_span, rest_input), + neg: tilde, + atom: named_tuple, + }, + ) + }) } - fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_negative_atom`!") + fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + parse_term, + opt(lex_whitespace), + parse_operation_token, + opt(lex_whitespace), + parse_term, + ))(input) + .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { + ( + rest_input, + Atom::InfixAtom { + span: outer_span(input_span, rest_input), + lhs, + ws1, + operation, + ws2, + rhs, + }, + ) + }) } - fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { - tuple((parse_term, parse_operation_token, parse_term))(input).map( - |(rest, (lhs, operation, rhs))| { + fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + lex_ident, + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_term_list), + opt(lex_whitespace), + close_paren, + ))(input) + .map( + |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( - rest, - ast::Atom::InfixAtom { - operation, - lhs, - rhs, + rest_input, + NamedTuple { + span: outer_span(input_span, rest_input), + identifier, + ws1, + open_paren, + ws2, + terms, + ws3, + close_paren, }, ) }, ) } - fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + fn parse_map<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); tuple(( - lex_ident, - lexer::open_paren, - // ignore_ws_and_comments(separated_list0(lexer::comma, parse_term)), - ignore_ws_and_comments(separated_list0(comma, ignore_ws_and_comments(parse_term))), - ignore_ws_and_comments(lexer::close_paren), + opt(lex_ident), + opt(lex_whitespace), + open_brace, + opt(lex_whitespace), + parse_pair_list, + opt(lex_whitespace), + close_brace, ))(input) - .map(|(rest, (identifier, _, terms, _))| (rest, NamedTuple { identifier, terms })) + .map( + |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { + ( + rest_input, + Map { + span: outer_span(input_span, rest_input), + identifier, + ws1, + open_brace, + ws2, + pairs, + ws3, + close_brace, + }, + ) + }, + ) } - fn parse_map<'a>(input: Span<'a>) -> IResult> { - tuple(( - opt(lex_ident), - ignore_ws_and_comments(open_brace), - separated_list0( - ignore_ws_and_comments(comma), - ignore_ws_and_comments(tuple((parse_term, equal, parse_term))), - ), - ignore_ws_and_comments(close_brace), + fn parse_map_atom<'a>(input: Span<'a>) -> IResult> { + parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) + } + + fn parse_pair_list<'a>( + input: Span<'a>, + ) -> IResult, Term<'a>>>>> { + let input_span = input.clone(); + opt(pair( + parse_pair, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_pair, + ))), ))(input) - .map(|(rest, (identifier, _, vec_of_pairs, _))| { - let mut pairs = Vec::new(); - for (key, _, value) in vec_of_pairs { - pairs.push(Pair::new(key, value)); + .map(|(rest_input, pair_list)| { + if let Some((first, rest)) = pair_list { + ( + rest_input, + Some(List { + span: outer_span(input_span, rest_input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }), + ) + } else { + (rest_input, None) } - (rest, ast::Map { identifier, pairs }) }) } - fn parse_term<'a>(input: Span<'a>) -> IResult> { - // alt(( - // parse_primitive_term, - // parse_unary_term, - // parse_binary_term, - // parse_aggregation_term, - // parse_function_term, - // parse_map_term, - // ))(input) - ignore_ws_and_comments(alt((parse_primitive_term, parse_variable)))(input) + fn parse_pair<'a>(input: Span<'a>) -> IResult, Term<'a>>> { + let input_span = input.clone(); + tuple(( + parse_term, + opt(lex_whitespace), + equal, + opt(lex_whitespace), + parse_term, + ))(input) + .map(|(rest_input, (key, ws1, equal, ws2, value))| { + ( + rest_input, + Pair { + span: outer_span(input_span, rest_input), + key, + ws1, + equal, + ws2, + value, + }, + ) + }) } - fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { + fn parse_term_list<'a>(input: Span<'a>) -> IResult>> { + let input_span = input.clone(); + pair( + parse_term, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_term, + ))), + )(input) + .map(|(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input_span, rest_input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) + }) + } + + fn parse_term<'a>(input: Span<'a>) -> IResult> { + alt(( + parse_primitive_term, + parse_variable, + parse_unary_term, + // parse_binary_term, + // parse_aggregation_term, + parse_function_term, + parse_map_term, + ))(input) + } + + fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { alt((lex_ident, lex_iri, lex_number, lex_string))(input) - .map(|(rest, term)| (rest, ast::Term::Primitive(term))) + .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } - fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_unary_term`!") + fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + pair(lex_operators, parse_term)(input).map(|(rest_input, (operation, term))| { + ( + rest_input, + Term::Unary { + span: outer_span(input_span, rest_input), + operation, + term: Box::new(term), + }, + ) + }) } - fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { + fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { todo!("`parse_binary_term`!") } - fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { + fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { todo!("`parse_aggregation_term`!") } - fn parse_function_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_function_term`!") + fn parse_function_term<'a>(input: Span<'a>) -> IResult> { + parse_named_tuple(input) + .map(|(rest_input, named_tuple)| (rest_input, Term::Function(Box::new(named_tuple)))) } - fn parse_map_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_map_term`!") + fn parse_map_term<'a>(input: Span<'a>) -> IResult> { + parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } - fn parse_variable<'a>(input: Span<'a>) -> IResult> { - ignore_ws_and_comments(pair(lexer::question_mark, lex_ident))(input).map( - |(rest, (question_mark, ident))| { - ( - rest, - ast::Term::Variable(Token { - kind: TokenKind::Variable, - span: combine_spans(question_mark.span, ident.span) - .expect("Spans were not adjacent in memory"), - }), - ) - }, - ) + fn parse_variable<'a>(input: Span<'a>) -> IResult> { + recognize(pair(question_mark, lex_ident))(input).map(|(rest, var)| { + ( + rest, + Term::Variable(Token { + kind: TokenKind::Variable, + span: var, + }), + ) + }) } fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { @@ -2740,7 +3046,13 @@ mod new { #[cfg(test)] mod test { use super::*; - use crate::io::{lexer::*, parser::ast::*}; + use crate::io::{ + lexer::*, + parser::ast::*, + // parser::ast::{ + // atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, + // }, + }; macro_rules! S { ($offset:literal,$line:literal,$str:literal) => { @@ -2756,24 +3068,56 @@ mod new { let input = Span::new("a(B,C)."); assert_eq!( parse_program(input), - vec![ast::Statement::Fact { - atom: ast::Atom::Atom(NamedTuple { - identifier: Token { - kind: TokenKind::Ident, - span: S!(0, 1, "a"), - }, - terms: vec![ - Term::Primitive(Token { + Program { + span: input, + tl_doc_comment: None, + statements: vec![Statement::Fact { + span: S!(0, 1, "a(B,C)."), + doc_comment: None, + atom: Atom::Positive(NamedTuple { + span: S!(0, 1, "a(B,C)"), + identifier: Token { kind: TokenKind::Ident, - span: S!(2, 1, "B"), - }), - Term::Primitive(Token { - kind: TokenKind::Ident, - span: S!(4, 1, "C"), + span: S!(0, 1, "a"), + }, + ws1: None, + open_paren: Token { + kind: TokenKind::OpenParen, + span: S!(1, 1, "("), + }, + ws2: None, + terms: Some(List { + span: S!(2, 1, "B,C"), + first: Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(2, 1, "B"), + }), + rest: Some(vec![( + None, + Token { + kind: TokenKind::Comma, + span: S!(3, 1, ",") + }, + None, + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(4, 1, "C"), + }), + )]), }), - ], - }), - }] + ws3: None, + close_paren: Token { + kind: TokenKind::CloseParen, + span: S!(5, 1, ")"), + }, + }), + ws: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(6, 1, ".") + } + }], + } ) } @@ -2784,85 +3128,191 @@ mod new { ); assert_eq!( parse_program(input), - vec![ - ast::Statement::Directive(Directive::Base { - kw: Token { - kind: TokenKind::Base, - span: S!(1, 1, "base"), - }, - base_iri: Token { - kind: TokenKind::Iri, - span: S!(6, 1, "") - } - }), - ast::Statement::Directive(Directive::Prefix { - kw: Token { - kind: TokenKind::Prefix, - span: S!(33, 1, "prefix"), - }, - prefix: Token { - kind: TokenKind::Ident, - span: S!(40, 1, "rdfs"), - }, - prefix_iri: Token { - kind: TokenKind::Iri, - span: S!(45, 1, ""), - }, - }), - ast::Statement::Directive(Directive::Import { - kw: Token { - kind: TokenKind::Import, - span: S!(86, 1, "import"), - }, - predicate: Token { - kind: TokenKind::Ident, - span: S!(93, 1, "sourceA"), - }, - map: Map { - identifier: Some(Token { - kind: TokenKind::Ident, - span: S!(102, 1, "csv") + Program { + tl_doc_comment: None, + span: input, + statements: vec![ + Statement::Directive(Directive::Base { + span: S!(0, 1, "@base ."), + doc_comment: None, + kw: Token { + kind: TokenKind::Base, + span: S!(0, 1, "@base"), + }, + ws1: Some(Token { + kind: TokenKind::Whitespace, + span: S!(5, 1, " ") + }), + base_iri: Token { + kind: TokenKind::Iri, + span: S!(6, 1, "") + }, + ws2: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(31, 1, ".") + }, + }), + Statement::Directive(Directive::Prefix { + span: S!( + 32, + 1, + "@prefix rdfs:." + ), + doc_comment: None, + kw: Token { + kind: TokenKind::Prefix, + span: S!(32, 1, "@prefix"), + }, + ws1: Some(Token { + kind: TokenKind::Whitespace, + span: S!(39, 1, " ") }), - pairs: vec![Pair { - key: Term::Primitive(Token { + prefix: Token { + kind: TokenKind::Ident, + span: S!(40, 1, "rdfs:"), + }, + ws2: None, + prefix_iri: Token { + kind: TokenKind::Iri, + span: S!(45, 1, ""), + }, + ws3: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(84, 1, ".") + } + }), + Statement::Directive(Directive::Import { + span: S!( + 85, + 1, + r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# + ), + doc_comment: None, + kw: Token { + kind: TokenKind::Import, + span: S!(85, 1, "@import"), + }, + ws1: Token { + kind: TokenKind::Whitespace, + span: S!(92, 1, " "), + }, + predicate: Token { + kind: TokenKind::Ident, + span: S!(93, 1, "sourceA"), + }, + ws2: None, + arrow: Token { + kind: TokenKind::Arrow, + span: S!(100, 1, ":-"), + }, + ws3: None, + map: Map { + span: S!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), + identifier: Some(Token { kind: TokenKind::Ident, - span: S!(106, 1, "resource"), + span: S!(102, 1, "csv") }), - value: Term::Primitive(Token { - kind: TokenKind::String, - span: S!(115, 1, "\"sources/dataA.csv\""), - }) - }], - }, - }), - ast::Statement::Directive(Directive::Export { - kw: Token { - kind: TokenKind::Export, - span: S!(137, 1, "export"), - }, - predicate: Token { - kind: TokenKind::Ident, - span: S!(144, 1, "a"), - }, - map: Map { - identifier: Some(Token { + ws1: None, + open_brace: Token { + kind: TokenKind::OpenBrace, + span: S!(105, 1, "{") + }, + ws2: None, + pairs: Some(List { + span: S!(106, 1, "resource=\"sources/dataA.csv\""), + first: Pair { + span: S!(106, 1, "resource=\"sources/dataA.csv\""), + key: Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(106, 1, "resource"), + }), + ws1: None, + equal: Token { + kind: TokenKind::Equal, + span: S!(114, 1, "="), + }, + ws2: None, + value: Term::Primitive(Token { + kind: TokenKind::String, + span: S!(115, 1, "\"sources/dataA.csv\""), + }) + }, + rest: None, + }), + ws3: None, + close_brace: Token { + kind: TokenKind::CloseBrace, + span: S!(134, 1, "}") + }, + }, + ws4: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(135, 1, ".") + } + }), + Statement::Directive(Directive::Export { + span: S!(136, 1, "@export a:-csv{}."), + doc_comment: None, + kw: Token { + kind: TokenKind::Export, + span: S!(136, 1, "@export"), + }, + ws1: Token { + kind: TokenKind::Whitespace, + span: S!(143, 1, " "), + }, + predicate: Token { kind: TokenKind::Ident, - span: S!(147, 1, "csv"), - }), - pairs: vec![] - } - }), - ast::Statement::Directive(Directive::Output { - kw: Token { - kind: TokenKind::Output, - span: S!(154, 1, "output") - }, - predicates: vec![Token { - kind: TokenKind::Ident, - span: S!(161, 1, "a") - }] - }), - ] + span: S!(144, 1, "a"), + }, + ws2: None, + arrow: Token { + kind: TokenKind::Arrow, + span: S!(145, 1, ":-"), + }, + ws3: None, + map: Map { + span: S!(147, 1, "csv{}"), + identifier: Some(Token { + kind: TokenKind::Ident, + span: S!(147, 1, "csv"), + }), + ws1: None, + open_brace: Token { + kind: TokenKind::OpenBrace, + span: S!(150, 1, "{"), + }, + ws2: None, + pairs: None, + ws3: None, + close_brace: Token { + kind: TokenKind::CloseBrace, + span: S!(151, 1, "}"), + }, + }, + ws4: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(152, 1, "."), + }, + }), + Statement::Directive(Directive::Output { + span: S!(153, 1, "@output a."), + doc_comment: None, + kw: Token { + kind: TokenKind::Output, + span: S!(154, 1, "output") + }, + predicates: vec![Token { + kind: TokenKind::Ident, + span: S!(161, 1, "a") + }], + }), + ], + } ) } @@ -2886,54 +3336,92 @@ mod new { let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); assert_eq!( parse_program(input), - vec![ast::Statement::Fact { - atom: Atom::Atom(NamedTuple { - identifier: Token { - kind: TokenKind::Ident, - span: S!(0, 1, "some"), - }, - terms: vec![ - Term::Primitive(Token { - kind: TokenKind::Ident, - span: S!(5, 1, "Fact") - }), - Term::Primitive(Token { - kind: TokenKind::Ident, - span: S!(11, 1, "with") + Program { + span: input, + tl_doc_comment: None, + statements: vec![ + Statement::Fact { + span: S!(0, 1, "some(Fact, with, whitespace) ."), + doc_comment: None, + atom: Atom::Positive(NamedTuple { + span: S!(0, 1, "some(Fact, with, whitespace)"), + identifier: Token { + kind: TokenKind::Ident, + span: S!(0, 1, "some"), + }, + ws1: None, + open_paren: Token { + kind: TokenKind::OpenParen, + span: S!(4, 1, "(") + }, + ws2: None, + terms: Some(List { + span: S!(5, 1, "Fact, with, whitespace"), + first: Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(5, 1, "Fact"), + }), + rest: Some(vec![ + ( + None, + Token { + kind: TokenKind::Comma, + span: S!(9, 1, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: S!(10, 1, " "), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(11, 1, "with") + }), + ), + ( + None, + Token { + kind: TokenKind::Comma, + span: S!(15, 1, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: S!(16, 1, " "), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: S!(17, 1, "whitespace") + }), + ), + ]), + }), + ws3: None, + close_paren: Token { + kind: TokenKind::CloseParen, + span: S!(27, 1, ")") + }, }), - Term::Primitive(Token { - kind: TokenKind::Ident, - span: S!(17, 1, "whitespace") + ws: Some(Token { + kind: TokenKind::Whitespace, + span: S!(28, 1, " "), }), - ] - }) - }] + dot: Token { + kind: TokenKind::Dot, + span: S!(29, 1, "."), + }, + }, + Statement::Whitespace(Token { + kind: TokenKind::Whitespace, + span: S!(30, 1, " ") + }), + Statement::Comment(Token { + kind: TokenKind::Comment, + span: S!(31, 1, "% and a super useful comment\n") + }) + ], + } ) } - #[test] - fn combine_spans() { - use nom::bytes::complete::tag; - let source = "Some Input ;)"; - let input = Span::new(source); - let (input, first) = tag::<&str, Span, nom::error::Error<_>>("Some ")(input).unwrap(); - let (input, second) = tag::<&str, Span, nom::error::Error<_>>("Input")(input).unwrap(); - let span = super::combine_spans(first, second); - assert_eq!(span, Ok(Span::new("Some Input"))) - } - - #[test] - fn combine_spans_error() { - use nom::bytes::complete::tag; - let source = "Some Input ;)"; - let input = Span::new(source); - let (input, first) = tag::<&str, Span, nom::error::Error<_>>("Some")(input).unwrap(); - let (input, _) = tag::<&str, Span, nom::error::Error<_>>(" ")(input).unwrap(); - let (input, second) = tag::<&str, Span, nom::error::Error<_>>("Input")(input).unwrap(); - let span = super::combine_spans(first, second); - assert_eq!(span, Err(str_concat::Error::NotAdjacent)) - } - #[test] fn parser_test() { let str = diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 7a2a2eebd..03c5e6ece 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,301 +1,292 @@ -use std::collections::BTreeMap; +use crate::io::lexer::{Span, Token}; +use std::fmt::Display; -use crate::io::lexer::Token; +pub(crate) mod atom; +pub(crate) mod directive; +pub(crate) mod map; +pub(crate) mod named_tuple; +pub(crate) mod program; +pub(crate) mod statement; +pub(crate) mod term; -struct Position { - offset: usize, - line: u32, - column: u32, -} - -pub(crate) type Program<'a> = Vec>; - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Statement<'a> { - Directive(Directive<'a>), - Fact { - atom: Atom<'a>, - }, - Rule { - head: Vec>, - body: Vec>, - }, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Directive<'a> { - Base { - kw: Token<'a>, - base_iri: Token<'a>, - }, - Prefix { - kw: Token<'a>, - prefix: Token<'a>, - prefix_iri: Token<'a>, - }, - Import { - kw: Token<'a>, - predicate: Token<'a>, - map: Map<'a>, - }, - Export { - kw: Token<'a>, - predicate: Token<'a>, - map: Map<'a>, - }, - // maybe will be deprecated - Output { - kw: Token<'a>, - predicates: Vec>, - }, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Atom<'a> { - Atom(NamedTuple<'a>), - NegativeAtom(NamedTuple<'a>), - InfixAtom { - operation: Token<'a>, - lhs: Term<'a>, - rhs: Term<'a>, - }, - Map(Map<'a>), -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Term<'a> { - Primitive(Token<'a>), - Variable(Token<'a>), - Unary { - operation: Token<'a>, - term: Box>, - }, - Binary { - operation: Token<'a>, - lhs: Box>, - rhs: Box>, - }, - Aggregation { - operation: Token<'a>, - terms: Vec>, - }, - Function(NamedTuple<'a>), - Map(Map<'a>), -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) struct NamedTuple<'a> { - pub(crate) identifier: Token<'a>, - pub(crate) terms: Vec>, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) struct Map<'a> { - pub(crate) identifier: Option>, - pub(crate) pairs: Vec, Term<'a>>>, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) struct Pair { - pub(crate) key: K, - pub(crate) value: V, -} -impl Pair { - pub fn new(key: K, value: V) -> Pair { - Pair { key, value } - } +pub(crate) trait AstNode: std::fmt::Debug { + fn children(&self) -> Option>; + fn span(&self) -> Span; + // fn position(&self) -> Position; } -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum Node<'a> { - Statement(&'a Statement<'a>), - Directive(&'a Directive<'a>), - RuleHead(&'a Vec>), - RuleBody(&'a Vec>), - Atom(&'a Atom<'a>), - Term(&'a Term<'a>), - Terms(&'a Vec>), - Map(&'a Map<'a>), - KeyWord(&'a Token<'a>), - BaseIri(&'a Token<'a>), - Prefix(&'a Token<'a>), - PrefixIri(&'a Token<'a>), - Predicate(&'a Token<'a>), - Predicates(&'a Vec>), - Operation(&'a Token<'a>), - Lhs(&'a Term<'a>), - Rhs(&'a Term<'a>), - Identifier(&'a Token<'a>), - Pairs(&'a Vec, Term<'a>>>), - MapIdentifier(&'a Option>), - Primitive(&'a Token<'a>), - Variable(&'a Token<'a>), +pub(crate) struct Position { + pub(crate) offset: usize, + pub(crate) line: u32, + pub(crate) column: u32, } -trait AstNode { - fn children(&self) -> Vec; - // fn position(&self) -> Position; +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct List<'a, T> { + pub(crate) span: Span<'a>, + pub(crate) first: T, + // ([ws]?[,][ws]?[T])* + pub(crate) rest: Option>, Token<'a>, Option>, T)>>, } - -impl<'a> AstNode for Program<'a> { - fn children(&self) -> Vec { +impl AstNode for List<'_, T> { + fn children(&self) -> Option> { let mut vec = Vec::new(); - for statement in self { - vec.push(Node::Statement(statement)) - } - vec - } - - // fn position(&self) -> Position { - // let first = self.get(0); - // match first { - // Some(elem) => { - // let span; - // match elem { - // Statement::Directive(directive) => match directive { - // Directive::Base { kw, base_iri } => span = kw.span, - // Directive::Prefix { - // kw, - // prefix, - // prefix_iri, - // } => span = kw.span, - // Directive::Import { kw, predicate, map } => span = kw.span, - // Directive::Export { kw, predicate, map } => span = kw.span, - // Directive::Output { kw, predicates } => span = kw.span, - // }, - // Statement::Fact { atom } => match atom { - // Atom::Atom { predicate, terms } => todo!(), - // Atom::InfixAtom { operation, lhs, rhs } => todo!(), - // Atom::Map(_) => todo!(), - // }, - // Statement::Rule { head, body } => todo!(), - // }; - // } - // None => Position { - // offset: 0, - // line: 1, - // column: 0, - // }, - // } - // } -} - -impl<'a> AstNode for Statement<'a> { - fn children(&self) -> Vec { - match self { - Statement::Directive(directive) => directive.children(), - Statement::Fact { atom } => vec![Node::Atom(atom)], - Statement::Rule { head, body } => { - vec![Node::RuleHead(head), Node::RuleBody(body)] + vec.push(&self.first as &dyn AstNode); + if let Some(rest) = &self.rest { + for (ws1, delim, ws2, item) in rest { + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(delim); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(item); } - } + }; + Some(vec) } - // fn position(&self) -> Position { - // todo!() - // } -} - -impl<'a> AstNode for Directive<'a> { - fn children(&self) -> Vec { - match self { - Directive::Base { kw, base_iri } => { - vec![Node::KeyWord(kw), Node::BaseIri(base_iri)] - } - Directive::Prefix { - kw, - prefix, - prefix_iri, - } => vec![ - Node::KeyWord(kw), - Node::Prefix(prefix), - Node::PrefixIri(prefix_iri), - ], - Directive::Import { kw, predicate, map } => vec![ - Node::KeyWord(kw), - Node::Predicate(predicate), - Node::Map(map), - ], - Directive::Export { kw, predicate, map } => vec![ - Node::KeyWord(kw), - Node::Predicate(predicate), - Node::Map(map), - ], - Directive::Output { kw, predicates } => { - vec![Node::KeyWord(kw), Node::Predicates(predicates)] - } - } + fn span(&self) -> Span { + self.span } - - // fn position(&self) -> Position { - // todo!() - // } } -impl<'a> AstNode for Atom<'a> { - fn children(&self) -> Vec { - match self { - Atom::Atom(named_tuple) => { - vec![ - Node::Identifier(&named_tuple.identifier), - Node::Terms(&named_tuple.terms), - ] - } - Atom::NegativeAtom(named_tuple) => { - vec![ - Node::Identifier(&named_tuple.identifier), - Node::Terms(&named_tuple.terms), - ] - } - Atom::InfixAtom { - operation, - lhs, - rhs, - } => vec![Node::Operation(operation), Node::Lhs(lhs), Node::Rhs(rhs)], - Atom::Map(map) => map.children(), +fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { + let mut vec = Vec::new(); + if let Some(children) = node.children() { + for child in children { + vec.append(&mut get_all_tokens(child)); } - } - - // fn position(&self) -> Position { - // todo!() - // } + } else { + vec.push(node); + }; + vec } -impl<'a> AstNode for Term<'a> { - fn children(&self) -> Vec { - match self { - Term::Primitive(primitive) => vec![Node::Primitive(primitive)], - Term::Variable(var) => vec![Node::Variable(var)], - Term::Binary { - operation, - lhs, - rhs, - } => vec![Node::Operation(operation), Node::Lhs(lhs), Node::Rhs(rhs)], - Term::Unary { operation, term } => vec![Node::Operation(operation), Node::Term(term)], - Term::Aggregation { operation, terms } => { - vec![Node::Operation(operation), Node::Terms(terms)] - } - Term::Function(NamedTuple { identifier, terms }) => { - vec![Node::Identifier(identifier), Node::Terms(terms)] - } - Term::Map(map) => map.children(), - } +mod test { + use super::*; + use super::{atom::Atom, directive::Directive, named_tuple::NamedTuple, program::Program, statement::Statement, term::Term}; + use crate::io::lexer::TokenKind; + + macro_rules! s { + ($offset:literal,$line:literal,$str:literal) => { + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } + }; } - // fn position(&self) -> Position { - // todo!() - // } -} + #[test] + fn ast_traversal() { + let input = "\ + %! This is just a test file.\n\ + %! So the documentation of the rules is not important.\n\ + %% This is the prefix used for datatypes\n\ + @prefix xsd: .\n\ + \n\ + % Facts\n\ + %% This is just an example predicate.\n\ + somePredicate(ConstA, ConstB).\n\ + \n\ + % Rules\n\ + %% This is just an example rule.\n\ + someHead(?VarA) :- somePredicate(?VarA, ConstB). % all constants that are in relation with ConstB\n"; + let span = Span::new(input); + let ast = Program { + span, + tl_doc_comment: Some(Token { + kind: TokenKind::TlDocComment, + span: s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") + }), + statements: vec![ + Statement::Directive(Directive::Prefix { + span:s!(125,4,"@prefix xsd: ."), + doc_comment:Some(Token { + kind:TokenKind::DocComment, + span:s!(84,3,"%% This is the prefix used for datatypes\n") + }), + kw: Token{ + kind:TokenKind::Prefix, + span:s!(125,4,"@prefix") + } , + ws1:Some(Token{ + kind:TokenKind::Whitespace, + span:s!(132,4," ") + }) , + prefix: Token { + kind: TokenKind::PrefixIdent, + span: s!(133, 4, "xsd:"), + }, + ws2: Some(Token{ + kind:TokenKind::Whitespace, + span:s!(137,4," ") + }), + prefix_iri: Token { + kind: TokenKind::Iri, + span: s!(138, 4, ""), + }, + ws3: None, + dot: Token{ + kind:TokenKind::Dot, + span:s!(173,4,".") + } + }), + Statement::Whitespace(Token { + kind: TokenKind::Whitespace, + span: s!(174, 4, "\n\n"), + }), + Statement::Comment(Token { + kind: TokenKind::Comment, + span: s!(176, 6, "% Facts\n"), + }), + Statement::Fact { + span:s!(222,8,"somePredicate(ConstA, ConstB)."), + doc_comment: Some(Token { + kind: TokenKind::DocComment, + span:s!(184,7,"%% This is just an example predicate.\n") + }), + atom: Atom::Positive(NamedTuple { + span: s!(222,8,"somePredicate(ConstA, ConstB)"), + identifier: Token { + kind: TokenKind::Ident, + span: s!(222, 8, "somePredicate"), + }, + ws1:None , + open_paren:Token{ + kind:TokenKind::OpenParen, + span:s!(235,8,"(") + } , + ws2:None , + terms: Some(List { + span: s!(236, 8, "ConstA, ConstB"), + first: Term::Primitive(Token { + kind: TokenKind::Ident, + span: s!(236, 8, "ConstA"), + }), + rest: Some(vec![( + None, + Token { + kind: TokenKind::Comma, + span: s!(242, 8, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: s!(243, 8, " "), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: s!(244, 8, "ConstB"), + }), + )]), + }), + ws3: None , + close_paren:Token { + kind: TokenKind::CloseParen, + span:s!(250,8,")") + } + }), + ws: None, + dot: Token { + kind: TokenKind::Dot, + span: s!(251,8,".") + } + }, + Statement::Whitespace(Token { + kind: TokenKind::Whitespace, + span: s!(252, 8, "\n\n"), + }), + Statement::Comment(Token { + kind: TokenKind::Comment, + span: s!(254, 10, "% Rules\n"), + }), + Statement::Rule { + span: s!(295,12,"someHead(?VarA) :- somePredicate(?VarA, ConstB)."), + doc_comment: Some(Token { kind: TokenKind::DocComment, span: s!(262,11,"%% This is just an example rule.\n") }), + head: List { + span: s!(295, 12, "someHead(?VarA)"), + first: Atom::Positive(NamedTuple { + span: s!(295,12,"someHead(?VarA)"), + identifier: Token { + kind: TokenKind::Ident, + span: s!(295, 12, "someHead"), + }, + ws1: None, + open_paren: Token { kind: TokenKind::OpenParen, span: s!(303,12,"(") }, + ws2: None, + terms: Some(List { + span: s!(304, 12, "?VarA"), + first: Term::Variable(Token { + kind: TokenKind::Variable, + span: s!(304, 12, "?VarA"), + }), + rest: None, + }), + ws3: None, + close_paren: Token { kind: TokenKind::CloseParen, span: s!(309,12,")") }, + }), + rest: None, + }, + ws1: Some(Token{kind:TokenKind::Whitespace,span:s!(310,12," ")}), + arrow: Token{kind:TokenKind::Arrow, span:s!(311,12,":-")}, + ws2: Some(Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}), + body: List { + span: s!(314, 12, "somePredicate(?VarA, ConstB)"), + first: Atom::Positive(NamedTuple { + span: s!(314, 12,"somePredicate(?VarA, ConstB)"), + identifier: Token { + kind: TokenKind::Ident, + span: s!(314, 12, "somePredicate"), + }, + ws1: None, + open_paren: Token { kind: TokenKind::OpenParen, span: s!(327,12,"(") }, + ws2: None, + terms: Some(List { + span: s!(328, 12, "?Var, ConstB"), + first: Term::Variable(Token { + kind: TokenKind::Variable, + span: s!(328, 12, "?VarA"), + }), + rest: Some(vec![( + None, + Token { + kind: TokenKind::Comma, + span: s!(333, 12, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: s!(334, 12, " "), + }), + Term::Primitive(Token { + kind: TokenKind::Ident, + span: s!(335, 12, "ConstB"), + }), + )]), + }), + ws3: None, + close_paren: Token { kind: TokenKind::CloseParen, span: s!(341, 12,")") }, + }), + rest: None, + }, + ws3: None, + dot: Token{kind:TokenKind::Dot,span:s!(342, 12,".")}, + }, + Statement::Whitespace(Token { + kind: TokenKind::Whitespace, + span: s!(343, 12, " "), + }), + Statement::Comment(Token { + kind: TokenKind::Comment, + span: s!(346, 12, "% all constants that are in relation with ConstB\n"), + }), + ], + }; -impl<'a> AstNode for Map<'a> { - fn children(&self) -> Vec { - vec![ - Node::MapIdentifier(&self.identifier), - Node::Pairs(&self.pairs), - ] + let tokens1 = get_all_tokens(&ast); + assert_eq!(input, { + let mut result = String::new(); + for token in tokens1 { + result.push_str(token.span().fragment()); + } + result + }); } - - // fn position(&self) -> Position { - // todo!() - // } } diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs new file mode 100644 index 000000000..f78e9b8a5 --- /dev/null +++ b/nemo/src/io/parser/ast/atom.rs @@ -0,0 +1,62 @@ +use super::map::Map; +use super::named_tuple::NamedTuple; +use super::term::Term; +use super::AstNode; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Atom<'a> { + Positive(NamedTuple<'a>), + Negative { + span: Span<'a>, + neg: Token<'a>, + atom: NamedTuple<'a>, + }, + InfixAtom { + span: Span<'a>, + lhs: Term<'a>, + ws1: Option>, + operation: Token<'a>, + ws2: Option>, + rhs: Term<'a>, + }, + Map(Map<'a>), +} +impl AstNode for Atom<'_> { + fn children(&self) -> Option> { + match self { + Atom::Positive(named_tuple) => named_tuple.children(), + Atom::Negative { neg, atom, .. } => Some(vec![neg, atom]), + Atom::InfixAtom { + lhs, + ws1, + operation, + ws2, + rhs, + .. + } => { + let mut vec = Vec::new(); + vec.push(lhs as &dyn AstNode); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(operation); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(rhs); + Some(vec) + } + Atom::Map(map) => map.children(), + } + } + + fn span(&self) -> Span { + match self { + Atom::Positive(named_tuple) => named_tuple.span(), + Atom::Negative { span, .. } => *span, + Atom::InfixAtom { span, .. } => *span, + Atom::Map(map) => map.span(), + } + } +} diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs new file mode 100644 index 000000000..a716a91d3 --- /dev/null +++ b/nemo/src/io/parser/ast/directive.rs @@ -0,0 +1,212 @@ +use super::map::Map; +use super::AstNode; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Directive<'a> { + // "@base ." + Base { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + ws1: Option>, + base_iri: Token<'a>, + ws2: Option>, + dot: Token<'a>, + }, + // "@prefix wikidata: ." + Prefix { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + ws1: Option>, + prefix: Token<'a>, + ws2: Option>, + prefix_iri: Token<'a>, + ws3: Option>, + dot: Token<'a>, + }, + // "@import table :- csv{resource="path/to/file.csv"} ." + Import { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + ws1: Token<'a>, + predicate: Token<'a>, + ws2: Option>, + arrow: Token<'a>, + ws3: Option>, + map: Map<'a>, + ws4: Option>, + dot: Token<'a>, + }, + // "@export result :- turtle{resource="out.ttl"} ." + Export { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + ws1: Token<'a>, + predicate: Token<'a>, + ws2: Option>, + arrow: Token<'a>, + ws3: Option>, + map: Map<'a>, + ws4: Option>, + dot: Token<'a>, + }, + // maybe will get deprecated + Output { + span: Span<'a>, + doc_comment: Option>, + kw: Token<'a>, + predicates: Vec>, + }, +} +impl AstNode for Directive<'_> { + fn children(&self) -> Option> { + match self { + Directive::Base { + span, + doc_comment, + kw, + ws1, + base_iri, + ws2, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(base_iri); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Directive::Prefix { + span, + doc_comment, + kw, + ws1, + prefix, + ws2, + prefix_iri, + ws3, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(prefix); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(prefix_iri); + if let Some(ws) = ws3 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Directive::Import { + span, + doc_comment, + kw, + ws1, + predicate, + ws2, + arrow, + ws3, + map, + ws4, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + vec.push(ws1); + vec.push(predicate); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(arrow); + if let Some(ws) = ws3 { + vec.push(ws); + }; + vec.push(map); + if let Some(ws) = ws4 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Directive::Export { + span, + doc_comment, + kw, + ws1, + predicate, + ws2, + arrow, + ws3, + map, + ws4, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + vec.push(ws1); + vec.push(predicate); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(arrow); + if let Some(ws) = ws3 { + vec.push(ws); + }; + vec.push(map); + if let Some(ws) = ws4 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Directive::Output { .. } => todo!(), + } + } + + fn span(&self) -> Span { + match self { + Directive::Base { span, .. } => *span, + Directive::Prefix { span, .. } => *span, + Directive::Import { span, .. } => *span, + Directive::Export { span, .. } => *span, + Directive::Output { span, .. } => *span, + } + } + + // fn position(&self) -> Position { + // let span = self.span(); + // Position { + // offset: span.location_offset(), + // line: span.location_line(), + // column: span.get_column() as u32, + // } + // } +} diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs new file mode 100644 index 000000000..34261a51a --- /dev/null +++ b/nemo/src/io/parser/ast/map.rs @@ -0,0 +1,72 @@ +use super::term::Term; +use super::{AstNode, List}; +use crate::io::lexer::{Span, Token}; +use std::fmt::Debug; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Map<'a> { + pub(crate) span: Span<'a>, + pub(crate) identifier: Option>, + pub(crate) ws1: Option>, + pub(crate) open_brace: Token<'a>, + pub(crate) ws2: Option>, + pub(crate) pairs: Option, Term<'a>>>>, + pub(crate) ws3: Option>, + pub(crate) close_brace: Token<'a>, +} +impl AstNode for Map<'_> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + if let Some(identifier) = &self.identifier { + vec.push(identifier as &dyn AstNode); + }; + if let Some(ws) = &self.ws1 { + vec.push(ws); + } + vec.push(&self.open_brace); + if let Some(ws) = &self.ws2 { + vec.push(ws); + } + if let Some(pairs) = &self.pairs { + vec.push(pairs); + }; + if let Some(ws) = &self.ws3 { + vec.push(ws); + } + vec.push(&self.close_brace); + Some(vec) + } + + fn span(&self) -> Span { + self.span + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Pair<'a, K, V> { + pub(crate) span: Span<'a>, + pub(crate) key: K, + pub(crate) ws1: Option>, + pub(crate) equal: Token<'a>, + pub(crate) ws2: Option>, + pub(crate) value: V, +} +impl AstNode for Pair<'_, K, V> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + vec.push(&self.key as &dyn AstNode); + if let Some(ws) = &self.ws1 { + vec.push(ws); + } + vec.push(&self.equal); + if let Some(ws) = &self.ws2 { + vec.push(ws); + } + vec.push(&self.value); + Some(vec) + } + + fn span(&self) -> Span { + self.span + } +} diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs new file mode 100644 index 000000000..1a338e9c5 --- /dev/null +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -0,0 +1,40 @@ +use super::term::Term; +use super::{AstNode, List}; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct NamedTuple<'a> { + pub(crate) span: Span<'a>, + pub(crate) identifier: Token<'a>, + pub(crate) ws1: Option>, + pub(crate) open_paren: Token<'a>, + pub(crate) ws2: Option>, + pub(crate) terms: Option>>, + pub(crate) ws3: Option>, + pub(crate) close_paren: Token<'a>, +} +impl AstNode for NamedTuple<'_> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + vec.push(&self.identifier as &dyn AstNode); + if let Some(ws) = &self.ws1 { + vec.push(ws); + } + vec.push(&self.open_paren); + if let Some(ws) = &self.ws2 { + vec.push(ws); + } + if let Some(terms) = &self.terms { + vec.push(terms); + } + if let Some(ws) = &self.ws3 { + vec.push(ws); + } + vec.push(&self.close_paren); + Some(vec) + } + + fn span(&self) -> Span { + self.span + } +} diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs new file mode 100644 index 000000000..2ec5b5826 --- /dev/null +++ b/nemo/src/io/parser/ast/program.rs @@ -0,0 +1,29 @@ +use super::statement::Statement; +use super::AstNode; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Program<'a> { + pub(crate) span: Span<'a>, + pub(crate) tl_doc_comment: Option>, + pub(crate) statements: Vec>, +} +impl AstNode for Program<'_> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + if let Some(dc) = &self.tl_doc_comment { + vec.push(dc as &dyn AstNode); + }; + // NOTE: The current implementation puts the doc comment and all the + // statements in the same vec, so there is no need to implement AstNode + // for Vec, which would be hard for the fn span() implementation + for statement in &self.statements { + vec.push(statement); + } + Some(vec) + } + + fn span(&self) -> Span { + self.span + } +} diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs new file mode 100644 index 000000000..57ab9c335 --- /dev/null +++ b/nemo/src/io/parser/ast/statement.rs @@ -0,0 +1,96 @@ +use super::atom::Atom; +use super::directive::Directive; +use super::{AstNode, List}; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Statement<'a> { + Directive(Directive<'a>), + Fact { + span: Span<'a>, + doc_comment: Option>, + atom: Atom<'a>, + ws: Option>, + dot: Token<'a>, + }, + Rule { + span: Span<'a>, + doc_comment: Option>, + head: List<'a, Atom<'a>>, + ws1: Option>, + arrow: Token<'a>, + ws2: Option>, + body: List<'a, Atom<'a>>, + ws3: Option>, + dot: Token<'a>, + }, + Whitespace(Token<'a>), + Comment(Token<'a>), +} +impl AstNode for Statement<'_> { + fn children(&self) -> Option> { + match self { + Statement::Directive(directive) => directive.children(), + Statement::Fact { + doc_comment, + atom, + ws, + dot, + .. + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(atom); + if let Some(ws) = ws { + vec.push(ws); + } + vec.push(dot); + Some(vec) + } + Statement::Rule { + doc_comment, + head, + ws1, + arrow, + ws2, + body, + ws3, + dot, + .. + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + vec.push(dc as &dyn AstNode); + }; + vec.push(head as &dyn AstNode); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(arrow); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(body); + if let Some(ws) = ws3 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } + Statement::Whitespace(ws) => Some(vec![ws]), + Statement::Comment(c) => Some(vec![c]), + } + } + + fn span(&self) -> Span { + match self { + Statement::Directive(directive) => directive.span(), + Statement::Fact { span, .. } => *span, + Statement::Rule { span, .. } => *span, + Statement::Whitespace(ws) => ws.span(), + Statement::Comment(c) => c.span(), + } + } +} diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs new file mode 100644 index 000000000..8b35182e3 --- /dev/null +++ b/nemo/src/io/parser/ast/term.rs @@ -0,0 +1,103 @@ +use super::map::Map; +use super::named_tuple::NamedTuple; +use super::AstNode; +use super::List; +use crate::io::lexer::{Span, Token}; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Term<'a> { + Primitive(Token<'a>), + Variable(Token<'a>), + // TODO: Is whitespace needed? Figure out how unary terms look + Unary { + span: Span<'a>, + operation: Token<'a>, + term: Box>, + }, + Binary { + span: Span<'a>, + lhs: Box>, + ws1: Option>, + operation: Token<'a>, + ws2: Option>, + rhs: Box>, + }, + Aggregation { + span: Span<'a>, + operation: Token<'a>, + open_paren: Token<'a>, + ws1: Option>, + terms: Box>>, + ws2: Option>, + close_paren: Token<'a>, + }, + Function(Box>), + Map(Box>), +} +impl AstNode for Term<'_> { + fn children(&self) -> Option> { + match self { + Term::Primitive(token) => Some(vec![token]), + Term::Variable(token) => Some(vec![token]), + Term::Unary { + operation, term, .. + } => Some(vec![operation, &**term]), + Term::Binary { + lhs, + ws1, + operation, + ws2, + rhs, + .. + } => { + let mut vec = Vec::new(); + vec.push(&**lhs as &dyn AstNode); + if let Some(ws) = ws1 { + vec.push(ws); + }; + vec.push(operation); + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(&**rhs); + Some(vec) + } + Term::Aggregation { + operation, + open_paren, + ws1, + terms, + ws2, + close_paren, + .. + } => { + let mut vec = Vec::new(); + vec.push(operation as &dyn AstNode); + vec.push(open_paren); + if let Some(ws) = ws1 { + vec.push(ws); + } + vec.push(&**terms); + if let Some(ws) = ws2 { + vec.push(ws); + } + vec.push(close_paren); + Some(vec) + } + Term::Function(named_tuple) => named_tuple.children(), + Term::Map(map) => map.children(), + } + } + + fn span(&self) -> Span { + match self { + Term::Primitive(t) => t.span(), + Term::Variable(t) => t.span(), + Term::Unary { span, .. } => *span, + Term::Binary { span, .. } => *span, + Term::Aggregation { span, .. } => *span, + Term::Function(named_tuple) => named_tuple.span(), + Term::Map(map) => map.span(), + } + } +} From c69ddbd795ea2696ab1055c99ae84b6e8fd4af39 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 09:06:06 +0200 Subject: [PATCH 075/214] Change trivial conversion lint from 'deny' to 'warn' --- nemo/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 246ab7f6c..8d5d47603 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -4,10 +4,10 @@ #![deny( missing_debug_implementations, missing_copy_implementations, - trivial_casts, trivial_numeric_casts )] #![warn( + trivial_casts, missing_docs, unused_import_braces, unused_qualifications, From 9103359aa022f671b40d632d7760ceba902b2914 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 09:12:37 +0200 Subject: [PATCH 076/214] Add Testfile --- nemo/src/io/parser.rs | 3 +-- testfile.rls | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 testfile.rls diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index b29afe701..6f5ca55e8 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -3424,8 +3424,7 @@ mod new { #[test] fn parser_test() { - let str = - std::fs::read_to_string("../testfile.rls").expect("`../testfile.rls` not found"); + let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); let input = Span::new(str.as_str()); dbg!(parse_program(input)); // assert!(false); diff --git a/testfile.rls b/testfile.rls new file mode 100644 index 000000000..b5f5d6db9 --- /dev/null +++ b/testfile.rls @@ -0,0 +1,18 @@ +@base . +@prefix rdfs: . +@import sourceA :- csv { resource = "sources/dataA.csv" } . +@export a :- csv {} . +@output a . + +% Facts: +father(alice, bob). +mother(bob, carla). +father(bob, darius). +mother(alice, carla). + +% Rules: +parent(?X, ?Y) :- mother(?X, ?Y). +parent(?X, ?Y) :- father(?X, ?Y). +parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . +a(?x) :- b(?x, B) . +s(4) :- s(3). From 73433c9c64b30a5dbf0eb96da0c237e2913dd92d Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 10:04:54 +0200 Subject: [PATCH 077/214] Add and implement position method on AstNode trait --- nemo/src/io/parser/ast.rs | 12 +++++++++++- nemo/src/io/parser/ast/atom.rs | 9 +++++++++ nemo/src/io/parser/ast/directive.rs | 16 ++++++++-------- nemo/src/io/parser/ast/map.rs | 7 +++++++ nemo/src/io/parser/ast/named_tuple.rs | 8 ++++++++ nemo/src/io/parser/ast/program.rs | 8 ++++++++ nemo/src/io/parser/ast/statement.rs | 9 +++++++++ nemo/src/io/parser/ast/term.rs | 9 +++++++++ 8 files changed, 69 insertions(+), 9 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 03c5e6ece..de97d4d06 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,3 +1,5 @@ +use nom::Offset; + use crate::io::lexer::{Span, Token}; use std::fmt::Display; @@ -12,7 +14,7 @@ pub(crate) mod term; pub(crate) trait AstNode: std::fmt::Debug { fn children(&self) -> Option>; fn span(&self) -> Span; - // fn position(&self) -> Position; + fn position(&self) -> Position; } pub(crate) struct Position { @@ -50,6 +52,14 @@ impl AstNode for List<'_, T> { fn span(&self) -> Span { self.span } + + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_column() as u32, + } + } } fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index f78e9b8a5..bbe916a39 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -59,4 +59,13 @@ impl AstNode for Atom<'_> { Atom::Map(map) => map.span(), } } + + fn position(&self) -> super::Position { + let span = self.span(); + super::Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index a716a91d3..b3d17c9c8 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -201,12 +201,12 @@ impl AstNode for Directive<'_> { } } - // fn position(&self) -> Position { - // let span = self.span(); - // Position { - // offset: span.location_offset(), - // line: span.location_line(), - // column: span.get_column() as u32, - // } - // } + fn position(&self) -> Position { + let span = self.span(); + Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 34261a51a..ea420a02a 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -69,4 +69,11 @@ impl AstNode for Pair<'_, K, V> { fn span(&self) -> Span { self.span } + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index 1a338e9c5..f961dcb07 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -37,4 +37,12 @@ impl AstNode for NamedTuple<'_> { fn span(&self) -> Span { self.span } + + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 2ec5b5826..23dc43cb1 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -26,4 +26,12 @@ impl AstNode for Program<'_> { fn span(&self) -> Span { self.span } + + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 57ab9c335..c126ec480 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -93,4 +93,13 @@ impl AstNode for Statement<'_> { Statement::Comment(c) => c.span(), } } + + fn position(&self) -> Position { + let span = self.span(); + Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_column() as u32, + } + } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 8b35182e3..3fcb15190 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -100,4 +100,13 @@ impl AstNode for Term<'_> { Term::Map(map) => map.span(), } } + + fn position(&self) -> Position { + let span = self.span(); + Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_column() as u32, + } + } } From a33cb26e95006d06d99c9bac349072312828b215 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 10:24:46 +0200 Subject: [PATCH 078/214] Fix fn position() implementation --- nemo/src/io/lexer.rs | 14 +++++++------- nemo/src/io/parser/ast.rs | 7 ++++++- nemo/src/io/parser/ast/atom.rs | 8 ++++---- nemo/src/io/parser/ast/directive.rs | 4 ++-- nemo/src/io/parser/ast/map.rs | 13 +++++++++++-- nemo/src/io/parser/ast/named_tuple.rs | 4 ++-- nemo/src/io/parser/ast/program.rs | 4 ++-- nemo/src/io/parser/ast/statement.rs | 4 ++-- nemo/src/io/parser/ast/term.rs | 3 ++- 9 files changed, 38 insertions(+), 23 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 521b6b82c..d55231291 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -195,13 +195,13 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { self.span } - // fn position(&self) -> Position { - // Position { - // offset: self.span.location_offset(), - // line: self.span.location_line(), - // column: self.span.get_column() as u32, - // } - // } + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_utf8_column() as u32, + } + } } macro_rules! syntax { diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index de97d4d06..62e1632aa 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -15,6 +15,7 @@ pub(crate) trait AstNode: std::fmt::Debug { fn children(&self) -> Option>; fn span(&self) -> Span; fn position(&self) -> Position; + // fn is_token(&self) -> bool; } pub(crate) struct Position { @@ -57,9 +58,13 @@ impl AstNode for List<'_, T> { Position { offset: self.span.location_offset(), line: self.span.location_line(), - column: self.span.get_column() as u32, + column: self.span.get_utf8_column() as u32, } } + + // fn is_token(&self) -> bool { + // false + // } } fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index bbe916a39..a554fe378 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,7 +1,7 @@ -use super::map::Map; use super::named_tuple::NamedTuple; use super::term::Term; use super::AstNode; +use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -60,12 +60,12 @@ impl AstNode for Atom<'_> { } } - fn position(&self) -> super::Position { + fn position(&self) -> Position { let span = self.span(); - super::Position { + Position { offset: span.location_offset(), line: span.location_line(), - column: span.get_column() as u32, + column: span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index b3d17c9c8..9d5e1c28c 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,5 +1,5 @@ -use super::map::Map; use super::AstNode; +use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -206,7 +206,7 @@ impl AstNode for Directive<'_> { Position { offset: span.location_offset(), line: span.location_line(), - column: span.get_column() as u32, + column: span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index ea420a02a..c6d352d14 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,5 +1,5 @@ use super::term::Term; -use super::{AstNode, List}; +use super::{AstNode, List, Position}; use crate::io::lexer::{Span, Token}; use std::fmt::Debug; @@ -40,6 +40,14 @@ impl AstNode for Map<'_> { fn span(&self) -> Span { self.span } + + fn position(&self) -> Position { + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_utf8_column() as u32, + } + } } #[derive(Debug, Clone, PartialEq)] @@ -69,11 +77,12 @@ impl AstNode for Pair<'_, K, V> { fn span(&self) -> Span { self.span } + fn position(&self) -> Position { Position { offset: self.span.location_offset(), line: self.span.location_line(), - column: self.span.get_column() as u32, + column: self.span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index f961dcb07..36695d12e 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -1,5 +1,5 @@ use super::term::Term; -use super::{AstNode, List}; +use super::{AstNode, List, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -42,7 +42,7 @@ impl AstNode for NamedTuple<'_> { Position { offset: self.span.location_offset(), line: self.span.location_line(), - column: self.span.get_column() as u32, + column: self.span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 23dc43cb1..8f99c7f8b 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,5 +1,5 @@ -use super::statement::Statement; use super::AstNode; +use super::{statement::Statement, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -31,7 +31,7 @@ impl AstNode for Program<'_> { Position { offset: self.span.location_offset(), line: self.span.location_line(), - column: self.span.get_column() as u32, + column: self.span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index c126ec480..284f28ccb 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -1,6 +1,6 @@ use super::atom::Atom; use super::directive::Directive; -use super::{AstNode, List}; +use super::{AstNode, List, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -99,7 +99,7 @@ impl AstNode for Statement<'_> { Position { offset: span.location_offset(), line: span.location_line(), - column: span.get_column() as u32, + column: span.get_utf8_column() as u32, } } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 3fcb15190..bcf29961f 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -2,6 +2,7 @@ use super::map::Map; use super::named_tuple::NamedTuple; use super::AstNode; use super::List; +use super::Position; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -106,7 +107,7 @@ impl AstNode for Term<'_> { Position { offset: span.location_offset(), line: span.location_line(), - column: span.get_column() as u32, + column: span.get_utf8_column() as u32, } } } From 3554c33c6b276ebd293fb1a1363fe3b9c132eef7 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 9 Apr 2024 10:29:09 +0200 Subject: [PATCH 079/214] Add method is_token for trait AstNode and implement it --- nemo/src/io/lexer.rs | 4 ++++ nemo/src/io/parser/ast.rs | 8 ++++---- nemo/src/io/parser/ast/atom.rs | 4 ++++ nemo/src/io/parser/ast/directive.rs | 4 ++++ nemo/src/io/parser/ast/map.rs | 8 ++++++++ nemo/src/io/parser/ast/named_tuple.rs | 4 ++++ nemo/src/io/parser/ast/program.rs | 4 ++++ nemo/src/io/parser/ast/statement.rs | 4 ++++ nemo/src/io/parser/ast/term.rs | 4 ++++ 9 files changed, 40 insertions(+), 4 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index d55231291..142abcafd 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -202,6 +202,10 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + true + } } macro_rules! syntax { diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 62e1632aa..34b8a4f05 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -15,7 +15,7 @@ pub(crate) trait AstNode: std::fmt::Debug { fn children(&self) -> Option>; fn span(&self) -> Span; fn position(&self) -> Position; - // fn is_token(&self) -> bool; + fn is_token(&self) -> bool; } pub(crate) struct Position { @@ -62,9 +62,9 @@ impl AstNode for List<'_, T> { } } - // fn is_token(&self) -> bool { - // false - // } + fn is_token(&self) -> bool { + false + } } fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index a554fe378..f3949e56a 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -68,4 +68,8 @@ impl AstNode for Atom<'_> { column: span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 9d5e1c28c..9788d7cf0 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -209,4 +209,8 @@ impl AstNode for Directive<'_> { column: span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index c6d352d14..61657df30 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -48,6 +48,10 @@ impl AstNode for Map<'_> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } #[derive(Debug, Clone, PartialEq)] @@ -85,4 +89,8 @@ impl AstNode for Pair<'_, K, V> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index 36695d12e..f9379dc70 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -45,4 +45,8 @@ impl AstNode for NamedTuple<'_> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 8f99c7f8b..a868da4e7 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -34,4 +34,8 @@ impl AstNode for Program<'_> { column: self.span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 284f28ccb..8dcb990d8 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -102,4 +102,8 @@ impl AstNode for Statement<'_> { column: span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index bcf29961f..f96b1969c 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -110,4 +110,8 @@ impl AstNode for Term<'_> { column: span.get_utf8_column() as u32, } } + + fn is_token(&self) -> bool { + false + } } From 180c54dcf043e1bbb281f855b86331186640d606 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 10 Apr 2024 15:33:58 +0200 Subject: [PATCH 080/214] Change trivial cast lint back to deny and add local exceptions --- nemo/src/io/parser/ast.rs | 1 + nemo/src/io/parser/ast/atom.rs | 1 + nemo/src/io/parser/ast/directive.rs | 4 ++++ nemo/src/io/parser/ast/map.rs | 2 ++ nemo/src/io/parser/ast/named_tuple.rs | 1 + nemo/src/io/parser/ast/program.rs | 1 + nemo/src/io/parser/ast/statement.rs | 4 +++- nemo/src/io/parser/ast/term.rs | 2 ++ nemo/src/lib.rs | 2 +- 9 files changed, 16 insertions(+), 2 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 34b8a4f05..7c00c2eb3 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -34,6 +34,7 @@ pub(crate) struct List<'a, T> { impl AstNode for List<'_, T> { fn children(&self) -> Option> { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(&self.first as &dyn AstNode); if let Some(rest) = &self.rest { for (ws1, delim, ws2, item) in rest { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index f3949e56a..3bd65b00f 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -36,6 +36,7 @@ impl AstNode for Atom<'_> { .. } => { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(lhs as &dyn AstNode); if let Some(ws) = ws1 { vec.push(ws); diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 9788d7cf0..c0d103697 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -76,6 +76,7 @@ impl AstNode for Directive<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(kw); @@ -102,6 +103,7 @@ impl AstNode for Directive<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(kw); @@ -134,6 +136,7 @@ impl AstNode for Directive<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(kw); @@ -168,6 +171,7 @@ impl AstNode for Directive<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(kw); diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 61657df30..5ac13ec03 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -18,6 +18,7 @@ impl AstNode for Map<'_> { fn children(&self) -> Option> { let mut vec = Vec::new(); if let Some(identifier) = &self.identifier { + #[allow(trivial_casts)] vec.push(identifier as &dyn AstNode); }; if let Some(ws) = &self.ws1 { @@ -66,6 +67,7 @@ pub(crate) struct Pair<'a, K, V> { impl AstNode for Pair<'_, K, V> { fn children(&self) -> Option> { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(&self.key as &dyn AstNode); if let Some(ws) = &self.ws1 { vec.push(ws); diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index f9379dc70..e8e05df9a 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -16,6 +16,7 @@ pub(crate) struct NamedTuple<'a> { impl AstNode for NamedTuple<'_> { fn children(&self) -> Option> { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(&self.identifier as &dyn AstNode); if let Some(ws) = &self.ws1 { vec.push(ws); diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index a868da4e7..8872c2e56 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -12,6 +12,7 @@ impl AstNode for Program<'_> { fn children(&self) -> Option> { let mut vec = Vec::new(); if let Some(dc) = &self.tl_doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; // NOTE: The current implementation puts the doc comment and all the diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 8dcb990d8..2404f1298 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -40,6 +40,7 @@ impl AstNode for Statement<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; vec.push(atom); @@ -62,9 +63,10 @@ impl AstNode for Statement<'_> { } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { + #[allow(trivial_casts)] vec.push(dc as &dyn AstNode); }; - vec.push(head as &dyn AstNode); + vec.push(head); if let Some(ws) = ws1 { vec.push(ws); }; diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index f96b1969c..372e41853 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -52,6 +52,7 @@ impl AstNode for Term<'_> { .. } => { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(&**lhs as &dyn AstNode); if let Some(ws) = ws1 { vec.push(ws); @@ -73,6 +74,7 @@ impl AstNode for Term<'_> { .. } => { let mut vec = Vec::new(); + #[allow(trivial_casts)] vec.push(operation as &dyn AstNode); vec.push(open_paren); if let Some(ws) = ws1 { diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 8d5d47603..246ab7f6c 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -4,10 +4,10 @@ #![deny( missing_debug_implementations, missing_copy_implementations, + trivial_casts, trivial_numeric_casts )] #![warn( - trivial_casts, missing_docs, unused_import_braces, unused_qualifications, From 64652567043ab7cd6bebd79054005147a1eb7fe3 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 11 Apr 2024 10:49:29 +0200 Subject: [PATCH 081/214] Add method name to trait AstNode and implement Display for all Ast nodes --- nemo/src/io/lexer.rs | 12 +++++++-- nemo/src/io/parser/ast.rs | 39 ++++++++++++++++++++++++--- nemo/src/io/parser/ast/atom.rs | 23 +++++++++++++--- nemo/src/io/parser/ast/directive.rs | 22 +++++++++++++-- nemo/src/io/parser/ast/map.rs | 25 ++++++++++++++++- nemo/src/io/parser/ast/named_tuple.rs | 14 +++++++++- nemo/src/io/parser/ast/program.rs | 17 ++++++++++-- nemo/src/io/parser/ast/statement.rs | 22 +++++++++++++-- nemo/src/io/parser/ast/term.rs | 24 ++++++++++++++--- 9 files changed, 178 insertions(+), 20 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 142abcafd..1cc390707 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -176,11 +176,15 @@ impl std::fmt::Display for Token<'_> { let line = self.span.location_line(); let fragment = self.span.fragment(); if self.span.extra == () { - write!(f, "T!{{{0}, S!({offset}, {line}, {fragment})}}", self.kind) + write!( + f, + "T!{{{0}, S!({offset}, {line}, {fragment:?})}}", + self.kind + ) } else { write!( f, - "T!{{{0}, S!({offset}, {line}, {fragment}, {1:?})}}", + "T!{{{0}, S!({offset}, {line}, {fragment:?}, {1:?})}}", self.kind, self.span.extra ) } @@ -206,6 +210,10 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { fn is_token(&self) -> bool { true } + + fn name(&self) -> String { + String::from("Token") + } } macro_rules! syntax { diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 7c00c2eb3..7f1094e8d 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -2,6 +2,7 @@ use nom::Offset; use crate::io::lexer::{Span, Token}; use std::fmt::Display; +use ascii_tree::{Tree, write_tree}; pub(crate) mod atom; pub(crate) mod directive; @@ -11,11 +12,12 @@ pub(crate) mod program; pub(crate) mod statement; pub(crate) mod term; -pub(crate) trait AstNode: std::fmt::Debug { +pub(crate) trait AstNode: std::fmt::Debug + Display { fn children(&self) -> Option>; fn span(&self) -> Span; fn position(&self) -> Position; fn is_token(&self) -> bool; + fn name(&self) -> String; } pub(crate) struct Position { @@ -66,9 +68,20 @@ impl AstNode for List<'_, T> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("List") + } +} +impl Display for List<'_, T> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } -fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { +pub(crate) fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { let mut vec = Vec::new(); if let Some(children) = node.children() { for child in children { @@ -80,6 +93,20 @@ fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { vec } +pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { + let mut vec = Vec::new(); + if let Some(children) = node.children() { + for child in children { + if child.is_token() { + vec.push(Tree::Leaf(vec![format!("{}", child)])); + } else { + vec.push(ast_to_ascii_tree(child)); + } + } + } + Tree::Node(node.name(), vec) +} + mod test { use super::*; use super::{atom::Atom, directive::Directive, named_tuple::NamedTuple, program::Program, statement::Statement, term::Term}; @@ -295,11 +322,15 @@ mod test { }), ], }; - + println!("{}", ast); let tokens1 = get_all_tokens(&ast); + for token in &tokens1 { + println!("{}", token); + } + assert_eq!(input, { let mut result = String::new(); - for token in tokens1 { + for token in &tokens1 { result.push_str(token.span().fragment()); } result diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 3bd65b00f..419ef048d 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,8 +1,9 @@ use super::named_tuple::NamedTuple; use super::term::Term; -use super::AstNode; +use super::{ast_to_ascii_tree, AstNode}; use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Atom<'a> { @@ -25,7 +26,7 @@ pub(crate) enum Atom<'a> { impl AstNode for Atom<'_> { fn children(&self) -> Option> { match self { - Atom::Positive(named_tuple) => named_tuple.children(), + Atom::Positive(named_tuple) => Some(vec![named_tuple]), Atom::Negative { neg, atom, .. } => Some(vec![neg, atom]), Atom::InfixAtom { lhs, @@ -48,7 +49,7 @@ impl AstNode for Atom<'_> { vec.push(rhs); Some(vec) } - Atom::Map(map) => map.children(), + Atom::Map(map) => Some(vec![map]), } } @@ -73,4 +74,20 @@ impl AstNode for Atom<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + match self { + Atom::Positive(_) => "Positive Atom".into(), + Atom::Negative { .. } => "Negative Atom".into(), + Atom::InfixAtom { .. } => "Infix Atom".into(), + Atom::Map(_) => "Map Atom".into(), + } + } +} +impl std::fmt::Display for Atom<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index c0d103697..857b53f6c 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,6 +1,7 @@ -use super::AstNode; -use super::{map::Map, Position}; +use super::map::Map; +use super::{ast_to_ascii_tree, AstNode, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Directive<'a> { @@ -217,4 +218,21 @@ impl AstNode for Directive<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + match self { + Directive::Base { .. } => "Base Directive".into(), + Directive::Prefix { .. } => "Prefix Directive".into(), + Directive::Import { .. } => "Import Directive".into(), + Directive::Export { .. } => "Export Directive".into(), + Directive::Output { .. } => "Output Directive".into(), + } + } +} +impl std::fmt::Display for Directive<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 5ac13ec03..0e043471d 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,6 +1,7 @@ use super::term::Term; -use super::{AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; use std::fmt::Debug; #[derive(Debug, Clone, PartialEq)] @@ -53,6 +54,17 @@ impl AstNode for Map<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("Map") + } +} +impl std::fmt::Display for Map<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } #[derive(Debug, Clone, PartialEq)] @@ -95,4 +107,15 @@ impl AstNode for Pair<'_, K, V> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("Pair") + } +} +impl std::fmt::Display for Pair<'_, K, V> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index e8e05df9a..3aa5b7e82 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -1,6 +1,7 @@ use super::term::Term; -use super::{AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) struct NamedTuple<'a> { @@ -50,4 +51,15 @@ impl AstNode for NamedTuple<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("Named Tuple") + } +} +impl std::fmt::Display for NamedTuple<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 8872c2e56..443697dd9 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,5 +1,7 @@ -use super::AstNode; -use super::{statement::Statement, Position}; +use ascii_tree::write_tree; + +use super::statement::Statement; +use super::{ast_to_ascii_tree, AstNode, Position}; use crate::io::lexer::{Span, Token}; #[derive(Debug, Clone, PartialEq)] @@ -39,4 +41,15 @@ impl AstNode for Program<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + String::from("Program") + } +} +impl std::fmt::Display for Program<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 2404f1298..84cfa3e61 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -1,7 +1,8 @@ use super::atom::Atom; use super::directive::Directive; -use super::{AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Statement<'a> { @@ -30,7 +31,7 @@ pub(crate) enum Statement<'a> { impl AstNode for Statement<'_> { fn children(&self) -> Option> { match self { - Statement::Directive(directive) => directive.children(), + Statement::Directive(directive) => Some(vec![directive]), Statement::Fact { doc_comment, atom, @@ -108,4 +109,21 @@ impl AstNode for Statement<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + match self { + Statement::Directive(_) => "Directive".into(), + Statement::Fact { .. } => "Fact".into(), + Statement::Rule { .. } => "Rule".into(), + Statement::Whitespace(_) => "Whitespace".into(), + Statement::Comment(_) => "Comment".into(), + } + } +} +impl std::fmt::Display for Statement<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 372e41853..fe5febeda 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,9 +1,8 @@ use super::map::Map; use super::named_tuple::NamedTuple; -use super::AstNode; -use super::List; -use super::Position; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Term<'a> { @@ -116,4 +115,23 @@ impl AstNode for Term<'_> { fn is_token(&self) -> bool { false } + + fn name(&self) -> String { + match self { + Term::Primitive(_) => "Primitive".into(), + Term::Variable(_) => "Variable".into(), + Term::Unary { .. } => "Unary Term".into(), + Term::Binary { .. } => "Binary Term".into(), + Term::Aggregation { .. } => "Aggregation".into(), + Term::Function(_) => "Function Symbol".into(), + Term::Map(_) => "Map".into(), + } + } +} +impl std::fmt::Display for Term<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } } From b63608a97eabf5736a6c8e8b4c7277c6a5bd0e0d Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 11 Apr 2024 10:52:18 +0200 Subject: [PATCH 082/214] Change order of parser function --- nemo/src/io/parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 6f5ca55e8..a1b540786 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2981,13 +2981,13 @@ mod new { fn parse_term<'a>(input: Span<'a>) -> IResult> { alt(( + parse_map_term, + parse_function_term, parse_primitive_term, parse_variable, parse_unary_term, // parse_binary_term, // parse_aggregation_term, - parse_function_term, - parse_map_term, ))(input) } @@ -3044,7 +3044,7 @@ mod new { } #[cfg(test)] - mod test { + mod tests { use super::*; use crate::io::{ lexer::*, From 87959e97227e1484d9620a74c268166139f22fe3 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 11 Apr 2024 11:11:08 +0200 Subject: [PATCH 083/214] Change NamedTuple to Tuple to support unnamed tuples --- nemo/src/io/parser.rs | 43 ++++++++++++++----- nemo/src/io/parser/ast.rs | 22 +++++----- nemo/src/io/parser/ast/atom.rs | 6 +-- nemo/src/io/parser/ast/term.rs | 4 +- .../parser/ast/{named_tuple.rs => tuple.rs} | 12 +++--- 5 files changed, 56 insertions(+), 31 deletions(-) rename nemo/src/io/parser/ast/{named_tuple.rs => tuple.rs} (86%) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index a1b540786..a22898573 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2431,7 +2431,7 @@ mod test { /// NEW PARSER mod new { use super::ast::{ - atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, List, + atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, }; use crate::io::lexer::{ arrow, at, close_brace, close_paren, colon, comma, dot, equal, greater, greater_equal, @@ -2839,10 +2839,10 @@ mod new { }) } - fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( - lex_ident, + opt(lex_ident), opt(lex_whitespace), open_paren, opt(lex_whitespace), @@ -2854,7 +2854,7 @@ mod new { |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, - NamedTuple { + Tuple { span: outer_span(input_span, rest_input), identifier, ws1, @@ -3074,12 +3074,12 @@ mod new { statements: vec![Statement::Fact { span: S!(0, 1, "a(B,C)."), doc_comment: None, - atom: Atom::Positive(NamedTuple { + atom: Atom::Positive(Tuple { span: S!(0, 1, "a(B,C)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: S!(0, 1, "a"), - }, + }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, @@ -3343,12 +3343,12 @@ mod new { Statement::Fact { span: S!(0, 1, "some(Fact, with, whitespace) ."), doc_comment: None, - atom: Atom::Positive(NamedTuple { + atom: Atom::Positive(Tuple { span: S!(0, 1, "some(Fact, with, whitespace)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: S!(0, 1, "some"), - }, + }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, @@ -3422,6 +3422,29 @@ mod new { ) } + #[test] + fn display_program() { + let input = Span::new( + r#"% This example finds trees of (some species of lime/linden tree) in Dresden, +% which are more than 200 years old. +% +% It shows how to load (typed) data from (compressed) CSV files, how to +% perform a recursive reachability query, and how to use datatype built-in to +% find old trees. It can be modified to use a different species or genus of +% plant, and by changing the required age. + +@import tree :- csv{format=(string, string, int, int), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/dresden-trees-ages-heights.csv"} . % location URL, species, age, height in m +@import taxon :- csv{format=(string, string, string), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/wikidata-taxon-name-parent.csv.gz"} . % location URL, species, age, height in m + +limeSpecies(?X, "Tilia") :- taxon(?X, "Tilia", ?P). +limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). + +oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, + ); + println!("{}", parse_program(input)); + // assert!(false); + } + #[test] fn parser_test() { let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 7f1094e8d..8be761f36 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -7,7 +7,7 @@ use ascii_tree::{Tree, write_tree}; pub(crate) mod atom; pub(crate) mod directive; pub(crate) mod map; -pub(crate) mod named_tuple; +pub(crate) mod tuple; pub(crate) mod program; pub(crate) mod statement; pub(crate) mod term; @@ -109,7 +109,7 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { mod test { use super::*; - use super::{atom::Atom, directive::Directive, named_tuple::NamedTuple, program::Program, statement::Statement, term::Term}; + use super::{atom::Atom, directive::Directive, tuple::Tuple, program::Program, statement::Statement, term::Term}; use crate::io::lexer::TokenKind; macro_rules! s { @@ -187,12 +187,12 @@ mod test { kind: TokenKind::DocComment, span:s!(184,7,"%% This is just an example predicate.\n") }), - atom: Atom::Positive(NamedTuple { + atom: Atom::Positive(Tuple { span: s!(222,8,"somePredicate(ConstA, ConstB)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: s!(222, 8, "somePredicate"), - }, + }), ws1:None , open_paren:Token{ kind:TokenKind::OpenParen, @@ -246,12 +246,12 @@ mod test { doc_comment: Some(Token { kind: TokenKind::DocComment, span: s!(262,11,"%% This is just an example rule.\n") }), head: List { span: s!(295, 12, "someHead(?VarA)"), - first: Atom::Positive(NamedTuple { + first: Atom::Positive(Tuple { span: s!(295,12,"someHead(?VarA)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: s!(295, 12, "someHead"), - }, + }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(303,12,"(") }, ws2: None, @@ -273,12 +273,12 @@ mod test { ws2: Some(Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), - first: Atom::Positive(NamedTuple { + first: Atom::Positive(Tuple { span: s!(314, 12,"somePredicate(?VarA, ConstB)"), - identifier: Token { + identifier: Some(Token { kind: TokenKind::Ident, span: s!(314, 12, "somePredicate"), - }, + }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(327,12,"(") }, ws2: None, diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 419ef048d..e75c9fb4f 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,5 +1,5 @@ -use super::named_tuple::NamedTuple; use super::term::Term; +use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode}; use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; @@ -7,11 +7,11 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Atom<'a> { - Positive(NamedTuple<'a>), + Positive(Tuple<'a>), Negative { span: Span<'a>, neg: Token<'a>, - atom: NamedTuple<'a>, + atom: Tuple<'a>, }, InfixAtom { span: Span<'a>, diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index fe5febeda..964400d88 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,5 +1,5 @@ use super::map::Map; -use super::named_tuple::NamedTuple; +use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -31,7 +31,7 @@ pub(crate) enum Term<'a> { ws2: Option>, close_paren: Token<'a>, }, - Function(Box>), + Function(Box>), Map(Box>), } impl AstNode for Term<'_> { diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/tuple.rs similarity index 86% rename from nemo/src/io/parser/ast/named_tuple.rs rename to nemo/src/io/parser/ast/tuple.rs index 3aa5b7e82..cf1ec38f7 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -4,9 +4,9 @@ use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) struct NamedTuple<'a> { +pub(crate) struct Tuple<'a> { pub(crate) span: Span<'a>, - pub(crate) identifier: Token<'a>, + pub(crate) identifier: Option>, pub(crate) ws1: Option>, pub(crate) open_paren: Token<'a>, pub(crate) ws2: Option>, @@ -14,11 +14,13 @@ pub(crate) struct NamedTuple<'a> { pub(crate) ws3: Option>, pub(crate) close_paren: Token<'a>, } -impl AstNode for NamedTuple<'_> { +impl AstNode for Tuple<'_> { fn children(&self) -> Option> { let mut vec = Vec::new(); #[allow(trivial_casts)] - vec.push(&self.identifier as &dyn AstNode); + if let Some(identifier) = &self.identifier { + vec.push(identifier as &dyn AstNode); + } if let Some(ws) = &self.ws1 { vec.push(ws); } @@ -56,7 +58,7 @@ impl AstNode for NamedTuple<'_> { String::from("Named Tuple") } } -impl std::fmt::Display for NamedTuple<'_> { +impl std::fmt::Display for Tuple<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); write_tree(&mut output, &ast_to_ascii_tree(self))?; From 7a90c2b379a09c68a2400040c78e1ac0c2d84a94 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 11 Apr 2024 15:16:38 +0200 Subject: [PATCH 084/214] Fix parsing of output directives --- nemo/src/io/parser.rs | 111 ++++++++++++++++++++++++---- nemo/src/io/parser/ast/directive.rs | 9 ++- 2 files changed, 101 insertions(+), 19 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index a22898573..980c2e946 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2739,19 +2739,54 @@ mod new { let input_span = input.clone(); tuple(( opt(lex_doc_comment), - at, - verify(lex_ident, |token| token.kind == TokenKind::Output), - ignore_ws_and_comments(separated_list0(comma, ignore_ws_and_comments(lex_ident))), - ignore_ws_and_comments(dot), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + )), + lex_whitespace, + opt(parse_identifier_list), + opt(lex_whitespace), + dot, ))(input) - .map(|(rest_input, (doc_comment, _, kw, predicates, _))| { + .map( + |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { + ( + rest_input, + Directive::Output { + span: outer_span(input_span, rest_input), + doc_comment, + kw: Token { + kind: TokenKind::Output, + span: kw, + }, + ws1, + predicates, + ws2, + dot, + }, + ) + }, + ) + } + + fn parse_identifier_list<'a>(input: Span<'a>) -> IResult>> { + let input_span = input.clone(); + pair( + lex_ident, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + lex_ident, + ))), + )(input) + .map(|(rest_input, (first, rest))| { ( rest_input, - Directive::Output { + List { span: outer_span(input_span, rest_input), - doc_comment, - kw, - predicates, + first, + rest: if rest.is_empty() { None } else { Some(rest) }, }, ) }) @@ -3124,7 +3159,7 @@ mod new { #[test] fn syntax() { let input = Span::new( - r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a."#, + r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, ); assert_eq!( parse_program(input), @@ -3300,16 +3335,60 @@ mod new { }, }), Statement::Directive(Directive::Output { - span: S!(153, 1, "@output a."), + span: S!(153, 1, "@output a, b, c."), doc_comment: None, kw: Token { kind: TokenKind::Output, - span: S!(154, 1, "output") + span: S!(153, 1, "@output") }, - predicates: vec![Token { - kind: TokenKind::Ident, - span: S!(161, 1, "a") - }], + ws1: Token { + kind: TokenKind::Whitespace, + span: S!(160, 1, " "), + }, + predicates: Some(List { + span: S!(161, 1, "a, b, c"), + first: Token { + kind: TokenKind::Ident, + span: S!(161, 1, "a"), + }, + rest: Some(vec![ + ( + None, + Token { + kind: TokenKind::Comma, + span: S!(162, 1, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: S!(163, 1, " "), + }), + Token { + kind: TokenKind::Ident, + span: S!(164, 1, "b"), + }, + ), + ( + None, + Token { + kind: TokenKind::Comma, + span: S!(165, 1, ","), + }, + Some(Token { + kind: TokenKind::Whitespace, + span: S!(166, 1, " "), + }), + Token { + kind: TokenKind::Ident, + span: S!(167, 1, "c"), + }, + ), + ]), + }), + ws2: None, + dot: Token { + kind: TokenKind::Dot, + span: S!(168, 1, "."), + } }), ], } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 857b53f6c..cf024ed8c 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,5 +1,5 @@ use super::map::Map; -use super::{ast_to_ascii_tree, AstNode, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -55,12 +55,15 @@ pub(crate) enum Directive<'a> { ws4: Option>, dot: Token<'a>, }, - // maybe will get deprecated + // "@output A, B, C." Output { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - predicates: Vec>, + ws1: Token<'a>, + predicates: Option>>, + ws2: Option>, + dot: Token<'a>, }, } impl AstNode for Directive<'_> { From 854e344a69866a82ddc721a7b35e5f65f3315465 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 12 Apr 2024 12:20:34 +0200 Subject: [PATCH 085/214] Change order of parser functions because of ordered choice --- nemo/src/io/parser.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 980c2e946..936ef2618 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -3074,8 +3074,9 @@ mod new { }) } + // Order of functions is important, because of ordered choice and no backtracking fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { - alt((equal, unequal, less, less_equal, greater, greater_equal))(input) + alt((less_equal, greater_equal, equal, unequal, less, greater))(input) } #[cfg(test)] From db213c1f199e60ac8cf68a831abca80a0d033eeb Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 12 Apr 2024 12:21:30 +0200 Subject: [PATCH 086/214] Fix 'name' method of tuple --- nemo/src/io/parser/ast/tuple.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index cf1ec38f7..9d771d289 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -55,7 +55,7 @@ impl AstNode for Tuple<'_> { } fn name(&self) -> String { - String::from("Named Tuple") + String::from("Tuple") } } impl std::fmt::Display for Tuple<'_> { From 43c46bdd27ff7a4b78c5e9ebd5cab6e64ee2b37c Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 15 Apr 2024 21:05:53 +0200 Subject: [PATCH 087/214] Rename fn parse_named_tuple to parse_tuple --- nemo/src/io/parser.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 936ef2618..41f969966 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2832,13 +2832,13 @@ mod new { } fn parse_normal_atom<'a>(input: Span<'a>) -> IResult> { - parse_named_tuple(input) + parse_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); - pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { + pair(tilde, parse_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { ( rest_input, Atom::Negative { @@ -2874,7 +2874,7 @@ mod new { }) } - fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + fn parse_tuple<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( opt(lex_ident), @@ -3054,7 +3054,7 @@ mod new { } fn parse_function_term<'a>(input: Span<'a>) -> IResult> { - parse_named_tuple(input) + parse_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Function(Box::new(named_tuple)))) } From 19e7f4725b42a779b71aec776435584bc87b0fb2 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 15 Apr 2024 21:25:10 +0200 Subject: [PATCH 088/214] Add parse_aggregation_term function --- nemo/src/io/lexer.rs | 7 +++++++ nemo/src/io/parser.rs | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 1cc390707..53a495e0d 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -78,6 +78,8 @@ pub(crate) enum TokenKind { Ident, /// Variable, Variable, + /// Aggregate identifier like `#sum` + Aggregate, /// IRI, delimited with `<` and `>` Iri, /// Base 10 digits @@ -141,6 +143,7 @@ impl std::fmt::Display for TokenKind { TokenKind::Slash => write!(f, "Slash"), TokenKind::Ident => write!(f, "Ident"), TokenKind::Variable => write!(f, "Variable"), + TokenKind::Aggregate => write!(f, "Aggregate"), TokenKind::Iri => write!(f, "Iri"), TokenKind::Number => write!(f, "Number"), TokenKind::String => write!(f, "String"), @@ -290,6 +293,10 @@ pub(crate) fn lex_operators(input: Span) -> IResult { ))(input) } +pub(crate) fn lex_unary_operators(input: Span) -> IResult { + alt((plus, minus))(input) +} + pub(crate) fn lex_ident(input: Span) -> IResult { let (rest, result) = recognize(pair( alpha1, diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 41f969966..c5345d2e2 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2435,9 +2435,9 @@ mod new { }; use crate::io::lexer::{ arrow, at, close_brace, close_paren, colon, comma, dot, equal, greater, greater_equal, - less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, - lex_operators, lex_string, lex_toplevel_doc_comment, lex_whitespace, open_brace, - open_paren, question_mark, tilde, unequal, Span, Token, TokenKind, + hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, + lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_operators, lex_whitespace, + open_brace, open_paren, question_mark, tilde, unequal, Span, Token, TokenKind, }; use nom::combinator::{all_consuming, opt, recognize}; use nom::sequence::{delimited, pair}; @@ -2506,7 +2506,7 @@ mod new { } fn parse_fact<'a>(input: Span<'a>) -> IResult> { - let input_span = input; + // let input_span = input; tuple(( opt(lex_doc_comment), parse_normal_atom, @@ -2517,7 +2517,7 @@ mod new { ( rest_input, Statement::Fact { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, atom, ws, @@ -3022,7 +3022,7 @@ mod new { parse_variable, parse_unary_term, // parse_binary_term, - // parse_aggregation_term, + parse_aggregation_term, ))(input) } @@ -3033,7 +3033,7 @@ mod new { fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); - pair(lex_operators, parse_term)(input).map(|(rest_input, (operation, term))| { + pair(lex_unary_operators, parse_term)(input).map(|(rest_input, (operation, term))| { ( rest_input, Term::Unary { @@ -3050,7 +3050,33 @@ mod new { } fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_aggregation_term`!") + tuple(( + recognize(pair(hash, lex_ident)), + open_paren, + opt(lex_whitespace), + parse_term_list, + opt(lex_whitespace), + close_paren, + ))(input) + .map( + |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { + ( + rest_input, + Term::Aggregation { + span: outer_span(input, rest_input), + operation: Token { + kind: TokenKind::Aggregate, + span: operation, + }, + open_paren, + ws1, + terms: Box::new(terms), + ws2, + close_paren, + }, + ) + }, + ) } fn parse_function_term<'a>(input: Span<'a>) -> IResult> { From 5bf6cab4140be670fb71fcf932166e559cd38e5f Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 16 Apr 2024 08:03:19 +0200 Subject: [PATCH 089/214] Add parsing of existential variables --- nemo/src/io/lexer.rs | 4 +++- nemo/src/io/parser.rs | 26 ++++++++++++++++++++------ nemo/src/io/parser/ast/term.rs | 4 ++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 53a495e0d..bcecf1bee 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -76,8 +76,10 @@ pub(crate) enum TokenKind { // Multi-char tokens: /// Identifier for keywords and names Ident, - /// Variable, + /// Variable like `?var` Variable, + /// Existential Variable like `!var` + Existential, /// Aggregate identifier like `#sum` Aggregate, /// IRI, delimited with `<` and `>` diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index c5345d2e2..e1c48ef4c 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2434,10 +2434,11 @@ mod new { atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, }; use crate::io::lexer::{ - arrow, at, close_brace, close_paren, colon, comma, dot, equal, greater, greater_equal, - hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, - lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_operators, lex_whitespace, - open_brace, open_paren, question_mark, tilde, unequal, Span, Token, TokenKind, + arrow, at, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, greater, + greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, + lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_operators, + lex_whitespace, open_brace, open_paren, question_mark, tilde, unequal, Span, Token, + TokenKind, }; use nom::combinator::{all_consuming, opt, recognize}; use nom::sequence::{delimited, pair}; @@ -3020,6 +3021,7 @@ mod new { parse_function_term, parse_primitive_term, parse_variable, + parse_existential, parse_unary_term, // parse_binary_term, parse_aggregation_term, @@ -3089,9 +3091,9 @@ mod new { } fn parse_variable<'a>(input: Span<'a>) -> IResult> { - recognize(pair(question_mark, lex_ident))(input).map(|(rest, var)| { + recognize(pair(question_mark, lex_ident))(input).map(|(rest_input, var)| { ( - rest, + rest_input, Term::Variable(Token { kind: TokenKind::Variable, span: var, @@ -3100,6 +3102,18 @@ mod new { }) } + fn parse_existential<'a>(input: Span<'a>) -> IResult> { + recognize(pair(exclamation_mark, lex_ident))(input).map(|(rest_input, existential)| { + ( + rest_input, + Term::Existential(Token { + kind: TokenKind::Existential, + span: existential, + }), + ) + }) + } + // Order of functions is important, because of ordered choice and no backtracking fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { alt((less_equal, greater_equal, equal, unequal, less, greater))(input) diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 964400d88..44ccc1ee3 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -8,6 +8,7 @@ use ascii_tree::write_tree; pub(crate) enum Term<'a> { Primitive(Token<'a>), Variable(Token<'a>), + Existential(Token<'a>), // TODO: Is whitespace needed? Figure out how unary terms look Unary { span: Span<'a>, @@ -39,6 +40,7 @@ impl AstNode for Term<'_> { match self { Term::Primitive(token) => Some(vec![token]), Term::Variable(token) => Some(vec![token]), + Term::Existential(token) => Some(vec![token]), Term::Unary { operation, term, .. } => Some(vec![operation, &**term]), @@ -95,6 +97,7 @@ impl AstNode for Term<'_> { match self { Term::Primitive(t) => t.span(), Term::Variable(t) => t.span(), + Term::Existential(t) => t.span(), Term::Unary { span, .. } => *span, Term::Binary { span, .. } => *span, Term::Aggregation { span, .. } => *span, @@ -120,6 +123,7 @@ impl AstNode for Term<'_> { match self { Term::Primitive(_) => "Primitive".into(), Term::Variable(_) => "Variable".into(), + Term::Existential(_) => "Existential Variable".into(), Term::Unary { .. } => "Unary Term".into(), Term::Binary { .. } => "Binary Term".into(), Term::Aggregation { .. } => "Aggregation".into(), From 4f85a9c19670727897c4feaa312ed4182bc7b7ce Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Sat, 20 Apr 2024 12:49:06 +0200 Subject: [PATCH 090/214] Add parsing of binary infix arithmetic terms --- nemo/src/io/lexer.rs | 10 +- nemo/src/io/parser.rs | 532 ++++++++++++++++++++++++++-- nemo/src/io/parser/ast.rs | 2 +- nemo/src/io/parser/ast/atom.rs | 19 +- nemo/src/io/parser/ast/directive.rs | 55 ++- nemo/src/io/parser/ast/map.rs | 7 +- nemo/src/io/parser/ast/program.rs | 7 +- nemo/src/io/parser/ast/statement.rs | 21 +- nemo/src/io/parser/ast/term.rs | 45 ++- nemo/src/io/parser/ast/tuple.rs | 7 +- 10 files changed, 640 insertions(+), 65 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index bcecf1bee..9082f98bf 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -145,6 +145,7 @@ impl std::fmt::Display for TokenKind { TokenKind::Slash => write!(f, "Slash"), TokenKind::Ident => write!(f, "Ident"), TokenKind::Variable => write!(f, "Variable"), + TokenKind::Existential => write!(f, "Existential"), TokenKind::Aggregate => write!(f, "Aggregate"), TokenKind::Iri => write!(f, "Iri"), TokenKind::Number => write!(f, "Number"), @@ -179,17 +180,20 @@ impl std::fmt::Display for Token<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let offset = self.span.location_offset(); let line = self.span.location_line(); + let column = self.span.get_utf8_column(); let fragment = self.span.fragment(); if self.span.extra == () { write!( f, - "T!{{{0}, S!({offset}, {line}, {fragment:?})}}", + // "T!{{{0}, S!({offset}, {line}, {fragment:?})}}", + "\x1b[93mTOKEN {0} \x1b[34m@{line}:{column} ({offset}) \x1b[93m{fragment:?}\x1b[0m", self.kind ) } else { write!( f, - "T!{{{0}, S!({offset}, {line}, {fragment:?}, {1:?})}}", + // "T!{{{0}, S!({offset}, {line}, {fragment:?}, {1:?})}}", + "\x1b[93mTOKEN {0} \x1b[34m@{line}:{column} ({offset}) \x1b[93m{fragment:?}\x1b[0m, {1:?}\x1b[0m", self.kind, self.span.extra ) } @@ -295,7 +299,7 @@ pub(crate) fn lex_operators(input: Span) -> IResult { ))(input) } -pub(crate) fn lex_unary_operators(input: Span) -> IResult { +pub(crate) fn lex_unary_prefix_operators(input: Span) -> IResult { alt((plus, minus))(input) } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index e1c48ef4c..235218c9b 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2436,10 +2436,11 @@ mod new { use crate::io::lexer::{ arrow, at, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, - lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_operators, - lex_whitespace, open_brace, open_paren, question_mark, tilde, unequal, Span, Token, - TokenKind, + lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, + lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, + question_mark, slash, star, tilde, unequal, Span, Token, TokenKind, }; + use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, opt, recognize}; use nom::sequence::{delimited, pair}; use nom::Parser; @@ -2478,8 +2479,9 @@ mod new { ) } + /// Parse a full program consisting of directives, facts, rules and comments. fn parse_program<'a>(input: Span<'a>) -> Program<'a> { - let span = input.clone(); + // let span = input.clone(); let (_, (tl_doc_comment, statements)) = all_consuming(pair( opt(lex_toplevel_doc_comment), many1(alt(( @@ -2492,20 +2494,23 @@ mod new { ))(input) .expect("Expect EOF"); Program { - span, + span: input, tl_doc_comment, statements, } } + /// Parse whitespace that is between directives, facts, rules and comments. fn parse_whitespace<'a>(input: Span<'a>) -> IResult> { lex_whitespace(input).map(|(rest, ws)| (rest, Statement::Whitespace(ws))) } + /// Parse normal comments that start with a `%` and ends at the line ending. fn parse_comment<'a>(input: Span<'a>) -> IResult> { lex_comment(input).map(|(rest, comment)| (rest, Statement::Comment(comment))) } + /// Parse a fact of the form `predicateName(term1, term2, …).` fn parse_fact<'a>(input: Span<'a>) -> IResult> { // let input_span = input; tuple(( @@ -2528,8 +2533,9 @@ mod new { }) } + /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` fn parse_rule<'a>(input: Span<'a>) -> IResult> { - let input_span = input; + // let input_span = input; tuple(( opt(lex_doc_comment), parse_head, @@ -2545,7 +2551,7 @@ mod new { ( rest_input, Statement::Rule { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, head, ws1, @@ -2560,14 +2566,17 @@ mod new { ) } + /// Parse the head atoms of a rule. fn parse_head<'a>(input: Span<'a>) -> IResult>> { parse_atom_list(input, parse_head_atoms) } + /// Parse the body atoms of a rule. fn parse_body<'a>(input: Span<'a>) -> IResult>> { parse_atom_list(input, parse_body_atoms) } + /// Parse the directives (@base, @prefix, @import, @export, @output). fn parse_directive<'a>(input: Span<'a>) -> IResult> { alt(( parse_base_directive, @@ -2579,6 +2588,7 @@ mod new { .map(|(rest, directive)| (rest, Statement::Directive(directive))) } + /// Parse the base directive. fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2611,6 +2621,7 @@ mod new { }) } + /// Parse the prefix directive. fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2652,6 +2663,7 @@ mod new { ) } + /// Parse the import directive. fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2694,6 +2706,7 @@ mod new { ) } + /// Parse the export directive. fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2736,6 +2749,7 @@ mod new { ) } + /// Parse the output directive. fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2770,6 +2784,7 @@ mod new { ) } + /// Parse a list of `ident1, ident2, …` fn parse_identifier_list<'a>(input: Span<'a>) -> IResult>> { let input_span = input.clone(); pair( @@ -2793,6 +2808,7 @@ mod new { }) } + /// Parse a list of atoms, like `atom1(…), atom2(…), infix = atom, …` fn parse_atom_list<'a>( input: Span<'a>, parse_atom: fn(Span<'a>) -> IResult>, @@ -2819,10 +2835,12 @@ mod new { }) } + /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. fn parse_head_atoms<'a>(input: Span<'a>) -> IResult> { alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) } + /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. fn parse_body_atoms<'a>(input: Span<'a>) -> IResult> { alt(( parse_normal_atom, @@ -2832,14 +2850,16 @@ mod new { ))(input) } + /// Parse an atom of the form `predicateName(term1, term2, …)`. fn parse_normal_atom<'a>(input: Span<'a>) -> IResult> { - parse_tuple(input) + parse_named_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } + /// Parse an atom of the form `~predicateName(term1, term2, …)`. fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); - pair(tilde, parse_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { + pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { ( rest_input, Atom::Negative { @@ -2851,6 +2871,8 @@ mod new { }) } + /// Parse an "infix atom" of the form `term1 term2`. + /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2875,6 +2897,8 @@ mod new { }) } + /// Parse a tuple with an optional name, like `ident(term1, term2)` + /// or just `(int, int, skip)`. fn parse_tuple<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2905,6 +2929,40 @@ mod new { ) } + /// Parse a named tuple. This function is like `parse_tuple` with the difference, + /// that is enforces the existence of an identifier for the tuple. + fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { + let input_span = input.clone(); + tuple(( + lex_ident, + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_term_list), + opt(lex_whitespace), + close_paren, + ))(input) + .map( + |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { + ( + rest_input, + Tuple { + span: outer_span(input_span, rest_input), + identifier: Some(identifier), + ws1, + open_paren, + ws2, + terms, + ws3, + close_paren, + }, + ) + }, + ) + } + + /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. + /// Inside the curly braces ist a list of pairs. fn parse_map<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); tuple(( @@ -2935,10 +2993,12 @@ mod new { ) } + /// Parse a map in an atom position. fn parse_map_atom<'a>(input: Span<'a>) -> IResult> { parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) } + /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. fn parse_pair_list<'a>( input: Span<'a>, ) -> IResult, Term<'a>>>>> { @@ -2968,6 +3028,7 @@ mod new { }) } + /// Parse a pair of the form `key = value`. fn parse_pair<'a>(input: Span<'a>) -> IResult, Term<'a>>> { let input_span = input.clone(); tuple(( @@ -2992,6 +3053,7 @@ mod new { }) } + /// Parse a list of terms of the form `term1, term2, …`. fn parse_term_list<'a>(input: Span<'a>) -> IResult>> { let input_span = input.clone(); pair( @@ -3015,42 +3077,138 @@ mod new { }) } + /// Parse a term. A term can be a primitive value (constant, number, string, …), + /// a variable (universal or existential), a map, a function (-symbol), an arithmetic + /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. fn parse_term<'a>(input: Span<'a>) -> IResult> { alt(( + parse_binary_term, + parse_tuple_term, + parse_unary_prefix_term, parse_map_term, - parse_function_term, parse_primitive_term, parse_variable, parse_existential, - parse_unary_term, - // parse_binary_term, parse_aggregation_term, ))(input) } + /// Parse a primitive term (simple constant, iri constant, number, string). fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { alt((lex_ident, lex_iri, lex_number, lex_string))(input) .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } - fn parse_unary_term<'a>(input: Span<'a>) -> IResult> { + /// Parse an unary term. + fn parse_unary_prefix_term<'a>(input: Span<'a>) -> IResult> { let input_span = input.clone(); - pair(lex_unary_operators, parse_term)(input).map(|(rest_input, (operation, term))| { + pair(lex_unary_prefix_operators, parse_term)(input).map( + |(rest_input, (operation, term))| { + ( + rest_input, + Term::UnaryPrefix { + span: outer_span(input_span, rest_input), + operation, + term: Box::new(term), + }, + ) + }, + ) + } + + /// Parse a binary infix operation of the form `term1 term2`. + fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { + pair( + parse_arithmetic_product, + opt(tuple(( + opt(lex_whitespace), + alt((plus, minus)), + opt(lex_whitespace), + parse_binary_term, + ))), + )(input) + .map(|(rest_input, (lhs, opt))| { ( rest_input, - Term::Unary { - span: outer_span(input_span, rest_input), - operation, - term: Box::new(term), + if let Some((ws1, operation, ws2, rhs)) = opt { + Term::Binary { + span: outer_span(input, rest_input), + lhs: Box::new(lhs), + ws1, + operation, + ws2, + rhs: Box::new(rhs), + } + } else { + lhs }, ) }) } - fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { - todo!("`parse_binary_term`!") + /// Parse an arithmetic product, i.e. an expression involving + /// only `*` and `/` over subexpressions. + fn parse_arithmetic_product<'a>(input: Span<'a>) -> IResult> { + pair( + parse_arithmetic_factor, + opt(tuple(( + opt(lex_whitespace), + alt((star, slash)), + opt(lex_whitespace), + parse_arithmetic_product, + ))), + )(input) + .map(|(rest_input, (lhs, opt))| { + ( + rest_input, + if let Some((ws1, operation, ws2, rhs)) = opt { + Term::Binary { + span: outer_span(input, rest_input), + lhs: Box::new(lhs), + ws1, + operation, + ws2, + rhs: Box::new(rhs), + } + } else { + lhs + }, + ) + }) + } + + fn parse_arithmetic_factor<'a>(input: Span<'a>) -> IResult> { + alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + ))(input) + } + + fn fold_arithmetic_expression<'a>( + initial: Term<'a>, + sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, + span_vec: Vec>, + ) -> Term<'a> { + sequence + .into_iter() + .enumerate() + .fold(initial, |acc, (i, pair)| { + let (ws1, operation, ws2, expression) = pair; + Term::Binary { + span: span_vec[i], + lhs: Box::new(acc), + ws1, + operation, + ws2, + rhs: Box::new(expression), + } + }) } + /// Parse an aggregation term of the form `#sum(…)`. fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { tuple(( recognize(pair(hash, lex_ident)), @@ -3081,15 +3239,19 @@ mod new { ) } - fn parse_function_term<'a>(input: Span<'a>) -> IResult> { + /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with + /// parenthesis. + fn parse_tuple_term<'a>(input: Span<'a>) -> IResult> { parse_tuple(input) - .map(|(rest_input, named_tuple)| (rest_input, Term::Function(Box::new(named_tuple)))) + .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } + /// Parse a map as a term. fn parse_map_term<'a>(input: Span<'a>) -> IResult> { parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } + /// Parse a variable. fn parse_variable<'a>(input: Span<'a>) -> IResult> { recognize(pair(question_mark, lex_ident))(input).map(|(rest_input, var)| { ( @@ -3102,6 +3264,7 @@ mod new { }) } + /// Parse an existential variable. fn parse_existential<'a>(input: Span<'a>) -> IResult> { recognize(pair(exclamation_mark, lex_ident))(input).map(|(rest_input, existential)| { ( @@ -3115,6 +3278,7 @@ mod new { } // Order of functions is important, because of ordered choice and no backtracking + /// Parse the operator for an infix atom. fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { alt((less_equal, greater_equal, equal, unequal, less, greater))(input) } @@ -3561,16 +3725,334 @@ limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, ); - println!("{}", parse_program(input)); - // assert!(false); + let ast = parse_program(input); + println!("{}", ast); + assert_eq!( + { + let mut result = String::new(); + for token in get_all_tokens(&ast) { + result.push_str(token.span().fragment()); + } + println!("{}", result); + result + }, + *input.fragment(), + ); } #[test] fn parser_test() { let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); let input = Span::new(str.as_str()); - dbg!(parse_program(input)); + println!("{}", parse_program(input)); // assert!(false); } + + #[test] + fn arithmetic_expressions() { + use TokenKind::*; + macro_rules! T { + ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { + Token::new($tok_kind, unsafe { + Span::new_from_raw_offset($offset, $line, $str, ()) + }) + }; + } + macro_rules! s { + ($offset:literal,$line:literal,$str:literal) => { + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } + }; + } + + assert_eq!( + { + let result = parse_term(Span::new("42")); + result.unwrap().1 + }, + Term::Primitive(T! {Number, 0, 1, "42"}), + ); + + assert_eq!( + { + let result = parse_term(Span::new("35+7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "35+7"), + lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "35"})), + ws1: None, + operation: T! {Plus, 2, 1, "+"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "7"})) + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("6*7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "6*7"), + lhs: Box::new(Term::Primitive(T! {Number, 0,1,"6"})), + ws1: None, + operation: T! {Star, 1,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 2,1,"7"})), + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("49-7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "49-7"), + lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "49"})), + ws1: None, + operation: T! {Minus, 2, 1, "-"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "7"})) + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("84/2")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "84/2"), + lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "84"})), + ws1: None, + operation: T! {Slash, 2, 1, "/"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "2"})) + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("5*7+7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "5*7+7"), + lhs: Box::new(Term::Binary { + span: s!(0, 1, "5*7"), + lhs: Box::new(Term::Primitive(T! {Number, 0,1,"5"})), + ws1: None, + operation: T! {Star, 1,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 2,1,"7"})) + }), + ws1: None, + operation: T! {Plus, 3,1,"+"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 4,1,"7"})), + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("7+5*7")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "7+5*7"), + lhs: Box::new(Term::Primitive(T! {Number, 0,1,"7"})), + ws1: None, + operation: T! {Plus, 1,1,"+"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(2, 1, "5*7"), + lhs: Box::new(Term::Primitive(T! {Number, 2,1,"5"})), + ws1: None, + operation: T! {Star, 3,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 4,1,"7"})) + }), + } + ); + + assert_eq!( + { + let result = parse_term(Span::new("(15+3*2-(7+35)*8)/3")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(0, 1, "(15+3*2-(7+35)*8)"), + identifier: None, + ws1: None, + open_paren: T!(OpenParen, 0, 1, "("), + ws2: None, + terms: Some(List { + span: s!(1, 1, "15+3*2-(7+35)*8"), + first: Term::Binary { + span: s!(1, 1, "15+3*2-(7+35)*8"), + lhs: Box::new(Term::Primitive(T! {Number, 1,1,"15"})), + ws1: None, + operation: T! {Plus, 3,1,"+"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(4, 1, "3*2-(7+35)*8"), + lhs: Box::new(Term::Binary { + span: s!(4, 1, "3*2"), + lhs: Box::new(Term::Primitive(T! {Number, 4,1,"3"})), + ws1: None, + operation: T! {Star, 5,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 6,1,"2"})), + }), + ws1: None, + operation: T! {Minus, 7,1,"-"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(8, 1, "(7+35)*8"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(8, 1, "(7+35)"), + identifier: None, + ws1: None, + open_paren: T! {OpenParen, 8, 1, "("}, + ws2: None, + terms: Some(List { + span: s!(9, 1, "7+35"), + first: Term::Binary { + span: s!(9, 1, "7+35"), + lhs: Box::new(Term::Primitive( + T! {Number, 9,1,"7"} + )), + ws1: None, + operation: T! {Plus, 10,1,"+"}, + ws2: None, + rhs: Box::new(Term::Primitive( + T! {Number, 11,1,"35"} + )), + }, + rest: None + }), + ws3: None, + close_paren: T! {CloseParen, 13,1,")"}, + }))), + ws1: None, + operation: T! {Star, 14,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 15,1,"8"})), + }), + }), + }, + rest: None + }), + ws3: None, + close_paren: T!(CloseParen, 16, 1, ")") + }))), + ws1: None, + operation: T! {Slash, 17,1,"/"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 18,1,"3"})), + } + ); + // Term::Binary { + // span: s!(), + // lhs: Box::new(), + // ws1: None, + // operation: , + // ws2: None, + // rhs: Box::new(), + // } + + assert_eq!( + { + let result = parse_term(Span::new("15+3*2-(7+35)*8/3")); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "15+3*2-(7+35)*8/3"), + lhs: Box::new(Term::Primitive(T! {Number, 0,1,"15"})), + ws1: None, + operation: T! {Plus, 2,1,"+"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(3, 1, "3*2-(7+35)*8/3"), + lhs: Box::new(Term::Binary { + span: s!(3, 1, "3*2"), + lhs: Box::new(Term::Primitive(T! {Number, 3,1,"3"})), + ws1: None, + operation: T! {Star, 4,1,"*"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 5,1,"2"})), + }), + ws1: None, + operation: T! {Minus, 6,1,"-"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(7, 1, "(7+35)*8/3"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(7, 1, "(7+35)"), + identifier: None, + ws1: None, + open_paren: T! {OpenParen, 7,1,"("}, + ws2: None, + terms: Some(List { + span: s!(8, 1, "7+35"), + first: Term::Binary { + span: s!(8, 1, "7+35"), + lhs: Box::new(Term::Primitive(T! {Number, 8,1,"7"})), + ws1: None, + operation: T! {Plus, 9,1,"+"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 10,1,"35"})), + }, + rest: None, + }), + ws3: None, + close_paren: T! {CloseParen, 12,1,")"}, + }))), + ws1: None, + operation: T! {Star, 13,1,"*"}, + ws2: None, + rhs: Box::new(Term::Binary { + span: s!(14, 1, "8/3"), + lhs: Box::new(Term::Primitive(T! {Number, 14,1,"8"})), + ws1: None, + operation: T! {Slash, 15, 1, "/"}, + ws2: None, + rhs: Box::new(Term::Primitive(T! {Number, 16,1,"3"})), + }), + }), + }), + } + ); + + // assert_eq!({ + // let result = parse_term(Span::new("1*2*3*4*5")); + // result.unwrap().1 + // },); + + // assert_eq!({ + // let result = parse_term(Span::new("(5+3)")); + // result.unwrap().1 + // },); + + // assert_eq!({ + // let result = parse_term(Span::new("( int , int , string , skip )")); + // result.unwrap().1 + // },); + + // assert_eq!({ + // let result = parse_term(Span::new("(14+4)+3")); + // result.unwrap().1 + // },); + + // assert_eq!({ + // let result = parse_term(Span::new( + // "(3 + #sum(?X, ?Y)) * (LENGTH(\"Hello, World!\") + 3)", + // )); + // result.unwrap().1 + // },); + } } } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 8be761f36..1c838a026 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -70,7 +70,7 @@ impl AstNode for List<'_, T> { } fn name(&self) -> String { - String::from("List") + format!("List \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", self.span.location_line(), self.span.get_utf8_column(), self.span.fragment()) } } impl Display for List<'_, T> { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index e75c9fb4f..52e8d5383 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -76,11 +76,22 @@ impl AstNode for Atom<'_> { } fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } match self { - Atom::Positive(_) => "Positive Atom".into(), - Atom::Negative { .. } => "Negative Atom".into(), - Atom::InfixAtom { .. } => "Infix Atom".into(), - Atom::Map(_) => "Map Atom".into(), + Atom::Positive(_) => name!("Positive Atom"), + Atom::Negative { .. } => name!("Negative Atom"), + Atom::InfixAtom { .. } => name!("Infix Atom"), + Atom::Map(_) => name!("Map Atom"), } } } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index cf024ed8c..c75351256 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -70,13 +70,13 @@ impl AstNode for Directive<'_> { fn children(&self) -> Option> { match self { Directive::Base { - span, doc_comment, kw, ws1, base_iri, ws2, dot, + .. } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { @@ -95,7 +95,6 @@ impl AstNode for Directive<'_> { Some(vec) } Directive::Prefix { - span, doc_comment, kw, ws1, @@ -104,6 +103,7 @@ impl AstNode for Directive<'_> { prefix_iri, ws3, dot, + .. } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { @@ -126,7 +126,6 @@ impl AstNode for Directive<'_> { Some(vec) } Directive::Import { - span, doc_comment, kw, ws1, @@ -137,6 +136,7 @@ impl AstNode for Directive<'_> { map, ws4, dot, + .. } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { @@ -161,7 +161,6 @@ impl AstNode for Directive<'_> { Some(vec) } Directive::Export { - span, doc_comment, kw, ws1, @@ -172,6 +171,7 @@ impl AstNode for Directive<'_> { map, ws4, dot, + .. } => { let mut vec = Vec::new(); if let Some(dc) = doc_comment { @@ -195,7 +195,31 @@ impl AstNode for Directive<'_> { vec.push(dot); Some(vec) } - Directive::Output { .. } => todo!(), + Directive::Output { + span, + doc_comment, + kw, + ws1, + predicates, + ws2, + dot, + } => { + let mut vec = Vec::new(); + if let Some(dc) = doc_comment { + #[allow(trivial_casts)] + vec.push(dc as &dyn AstNode); + }; + vec.push(kw); + vec.push(ws1); + if let Some(p) = predicates { + vec.push(p); + }; + if let Some(ws) = ws2 { + vec.push(ws); + }; + vec.push(dot); + Some(vec) + } } } @@ -223,12 +247,23 @@ impl AstNode for Directive<'_> { } fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } match self { - Directive::Base { .. } => "Base Directive".into(), - Directive::Prefix { .. } => "Prefix Directive".into(), - Directive::Import { .. } => "Import Directive".into(), - Directive::Export { .. } => "Export Directive".into(), - Directive::Output { .. } => "Output Directive".into(), + Directive::Base { .. } => name!("Base Directive"), + Directive::Prefix { .. } => name!("Prefix Directive"), + Directive::Import { .. } => name!("Import Directive"), + Directive::Export { .. } => name!("Export Directive"), + Directive::Output { .. } => name!("Output Directive"), } } } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 0e043471d..669963ae8 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -109,7 +109,12 @@ impl AstNode for Pair<'_, K, V> { } fn name(&self) -> String { - String::from("Pair") + format!( + "Pair \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) } } impl std::fmt::Display for Pair<'_, K, V> { diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 443697dd9..a7397f7ec 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -43,7 +43,12 @@ impl AstNode for Program<'_> { } fn name(&self) -> String { - String::from("Program") + format!( + "Program \x1b[34m@{}:{} \x1b[92m\"{}…\"\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + &self.span.fragment()[..60], + ) } } impl std::fmt::Display for Program<'_> { diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 84cfa3e61..e26ec39dd 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -111,12 +111,23 @@ impl AstNode for Statement<'_> { } fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } match self { - Statement::Directive(_) => "Directive".into(), - Statement::Fact { .. } => "Fact".into(), - Statement::Rule { .. } => "Rule".into(), - Statement::Whitespace(_) => "Whitespace".into(), - Statement::Comment(_) => "Comment".into(), + Statement::Directive(_) => name!("Directive"), + Statement::Fact { .. } => name!("Fact"), + Statement::Rule { .. } => name!("Rule"), + Statement::Whitespace(_) => name!("Whitespace"), + Statement::Comment(_) => name!("Comment"), } } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 44ccc1ee3..a058e401b 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -10,7 +10,7 @@ pub(crate) enum Term<'a> { Variable(Token<'a>), Existential(Token<'a>), // TODO: Is whitespace needed? Figure out how unary terms look - Unary { + UnaryPrefix { span: Span<'a>, operation: Token<'a>, term: Box>, @@ -32,7 +32,7 @@ pub(crate) enum Term<'a> { ws2: Option>, close_paren: Token<'a>, }, - Function(Box>), + Tuple(Box>), Map(Box>), } impl AstNode for Term<'_> { @@ -41,7 +41,7 @@ impl AstNode for Term<'_> { Term::Primitive(token) => Some(vec![token]), Term::Variable(token) => Some(vec![token]), Term::Existential(token) => Some(vec![token]), - Term::Unary { + Term::UnaryPrefix { operation, term, .. } => Some(vec![operation, &**term]), Term::Binary { @@ -88,7 +88,7 @@ impl AstNode for Term<'_> { vec.push(close_paren); Some(vec) } - Term::Function(named_tuple) => named_tuple.children(), + Term::Tuple(named_tuple) => named_tuple.children(), Term::Map(map) => map.children(), } } @@ -98,10 +98,10 @@ impl AstNode for Term<'_> { Term::Primitive(t) => t.span(), Term::Variable(t) => t.span(), Term::Existential(t) => t.span(), - Term::Unary { span, .. } => *span, + Term::UnaryPrefix { span, .. } => *span, Term::Binary { span, .. } => *span, Term::Aggregation { span, .. } => *span, - Term::Function(named_tuple) => named_tuple.span(), + Term::Tuple(named_tuple) => named_tuple.span(), Term::Map(map) => map.span(), } } @@ -120,15 +120,32 @@ impl AstNode for Term<'_> { } fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } match self { - Term::Primitive(_) => "Primitive".into(), - Term::Variable(_) => "Variable".into(), - Term::Existential(_) => "Existential Variable".into(), - Term::Unary { .. } => "Unary Term".into(), - Term::Binary { .. } => "Binary Term".into(), - Term::Aggregation { .. } => "Aggregation".into(), - Term::Function(_) => "Function Symbol".into(), - Term::Map(_) => "Map".into(), + Term::Primitive(_) => name!("Primitive"), + Term::Variable(_) => name!("Variable"), + Term::Existential(_) => name!("Existential Variable"), + Term::UnaryPrefix { .. } => name!("Unary Term"), + Term::Binary { .. } => name!("Binary Term"), + Term::Aggregation { .. } => name!("Aggregation"), + Term::Tuple(f) => { + if let Some(_) = f.identifier { + name!("Function Symbol") + } else { + name!("Tuple") + } + } + Term::Map(_) => name!("Map"), } } } diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 9d771d289..a49f67012 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -55,7 +55,12 @@ impl AstNode for Tuple<'_> { } fn name(&self) -> String { - String::from("Tuple") + format!( + "Tuple \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) } } impl std::fmt::Display for Tuple<'_> { From 895d46a9ce4424b56b51bdc5baf261e118ebed66 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 24 Apr 2024 17:37:53 +0200 Subject: [PATCH 091/214] Add error reporting with VerboseError and ContextError --- nemo/src/io/lexer.rs | 102 ++-- nemo/src/io/parser.rs | 1196 +++++++++++++++++++++++++---------------- 2 files changed, 808 insertions(+), 490 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 9082f98bf..4dd5f0ded 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -4,7 +4,8 @@ use nom::{ branch::alt, bytes::complete::{is_not, tag, take}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, - combinator::{all_consuming, map, recognize}, + combinator::{all_consuming, cut, map, recognize}, + error::{ContextError, ParseError}, multi::{many0, many1}, sequence::{delimited, pair, tuple}, IResult, @@ -227,7 +228,9 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { macro_rules! syntax { ($func_name: ident, $tag_string: literal, $token: expr) => { - pub(crate) fn $func_name<'a>(input: Span) -> IResult { + pub(crate) fn $func_name<'a, E: ParseError>>( + input: Span<'a>, + ) -> IResult, Token, E> { map(tag($tag_string), |span| Token::new($token, span))(input) } }; @@ -251,7 +254,9 @@ syntax!(hash, "#", TokenKind::Hash); syntax!(underscore, "_", TokenKind::Underscore); syntax!(at, "@", TokenKind::At); -pub(crate) fn lex_punctuations(input: Span) -> IResult { +pub(crate) fn lex_punctuations<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { alt(( arrow, open_paren, @@ -284,7 +289,9 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators(input: Span) -> IResult { +pub(crate) fn lex_operators<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { alt(( less_equal, greater_equal, @@ -299,11 +306,15 @@ pub(crate) fn lex_operators(input: Span) -> IResult { ))(input) } -pub(crate) fn lex_unary_prefix_operators(input: Span) -> IResult { +pub(crate) fn lex_unary_prefix_operators<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token<'a>, E> { alt((plus, minus))(input) } -pub(crate) fn lex_ident(input: Span) -> IResult { +pub(crate) fn lex_ident<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { let (rest, result) = recognize(pair( alpha1, many0(alt((alphanumeric1, tag("_"), tag("-")))), @@ -319,48 +330,66 @@ pub(crate) fn lex_ident(input: Span) -> IResult { Ok((rest, token)) } -pub(crate) fn lex_iri(input: Span) -> IResult { - recognize(delimited(tag("<"), is_not("> \n"), tag(">")))(input) +pub(crate) fn lex_iri<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { + recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">"))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result))) } -pub(crate) fn lex_number(input: Span) -> IResult { +pub(crate) fn lex_number<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { digit1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Number, result))) } -pub(crate) fn lex_string(input: Span) -> IResult { - recognize(delimited(tag("\""), is_not("\""), tag("\"")))(input) +pub(crate) fn lex_string<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { + recognize(delimited(tag("\""), is_not("\""), cut(tag("\""))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::String, result))) } -pub(crate) fn lex_comment(input: Span) -> IResult { +pub(crate) fn lex_comment<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Token, E> { recognize(tuple((tag("%"), many0(is_not("\n")), line_ending)))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result))) } -pub(crate) fn lex_doc_comment(input: Span) -> IResult { +pub(crate) fn lex_doc_comment<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Token, E> { recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result))) } -pub(crate) fn lex_toplevel_doc_comment(input: Span) -> IResult { +pub(crate) fn lex_toplevel_doc_comment<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Token, E> { recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending))))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result))) } -pub(crate) fn lex_comments(input: Span) -> IResult { +pub(crate) fn lex_comments<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Token, E> { alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment))(input) } -pub(crate) fn lex_whitespace(input: Span) -> IResult { +pub(crate) fn lex_whitespace<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result))) } -pub(crate) fn lex_illegal(input: Span) -> IResult { +pub(crate) fn lex_illegal<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, Token, E> { take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result))) } -pub(crate) fn lex_tokens(input: Span) -> IResult> { +pub(crate) fn lex_tokens<'a, E: ParseError> + ContextError>>( + input: Span<'a>, +) -> IResult, Vec, E> { all_consuming(many0(alt(( lex_iri, lex_operators, @@ -394,14 +423,17 @@ mod test { #[test] fn empty_input() { let input = Span::new(""); - assert_eq!(lex_tokens(input).unwrap().1, vec![T!(Eof, 0, 1, "")]) + assert_eq!( + lex_tokens::>(input).unwrap().1, + vec![T!(Eof, 0, 1, "")] + ) } #[test] fn base() { let input = Span::new("@base"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } @@ -410,7 +442,7 @@ mod test { fn prefix() { let input = Span::new("@prefix"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Prefix, 1, 1, "prefix"), @@ -423,7 +455,7 @@ mod test { fn output() { let input = Span::new("@output"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Output, 1, 1, "output"), @@ -436,7 +468,7 @@ mod test { fn import() { let input = Span::new("@import"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Import, 1, 1, "import"), @@ -449,7 +481,7 @@ mod test { fn export() { let input = Span::new("@export"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Export, 1, 1, "export"), @@ -462,7 +494,7 @@ mod test { fn idents_with_keyword_prefix() { let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Ident, 1, 1, "baseA"), @@ -492,7 +524,7 @@ mod test { fn tokenize() { let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "P"), T!(OpenParen, 1, 1, "("), @@ -523,7 +555,7 @@ mod test { fn comment() { let input = Span::new(" % Some Comment\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " "), T!(Comment, 4, 1, "% Some Comment\n"), @@ -538,7 +570,7 @@ mod test { fn ident() { let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "some_Ident"), T!(OpenParen, 10, 1, "("), @@ -556,7 +588,7 @@ mod test { fn forbidden_ident() { let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Underscore, 0, 1, "_"), T!(Ident, 1, 1, "someIdent"), @@ -575,7 +607,7 @@ mod test { fn iri() { let input = Span::new(""); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Eof, 31, 1, ""), @@ -587,7 +619,7 @@ mod test { fn iri_pct_enc() { let input = Span::new("\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Whitespace, 37, 1, "\n"), @@ -602,7 +634,7 @@ mod test { fn constraints() { let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "A"), T!(OpenParen, 1, 1, "("), @@ -635,7 +667,7 @@ mod test { fn pct_enc_comment() { let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Comment, 0, 1, "%d4 this should be a comment,\n"), T!( @@ -654,7 +686,7 @@ mod test { fn fact() { let input = Span::new("somePred(term1, term2)."); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "somePred"), T!(OpenParen, 8, 1, "("), @@ -673,7 +705,7 @@ mod test { fn whitespace() { let input = Span::new(" \t \n\n\t \n"); assert_eq!( - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " \t \n\n\t \n"), T!(Eof, 12, 4, ""), diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 235218c9b..e027612ad 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2434,14 +2434,15 @@ mod new { atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, }; use crate::io::lexer::{ - arrow, at, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, greater, - greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, - lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, + arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, + greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, + lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, question_mark, slash, star, tilde, unequal, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; - use nom::combinator::{all_consuming, opt, recognize}; + use nom::combinator::{all_consuming, cut, map, opt, recognize}; + use nom::error::{context, ContextError, ParseError}; use nom::sequence::{delimited, pair}; use nom::Parser; use nom::{ @@ -2465,12 +2466,11 @@ mod new { } } - fn ignore_ws_and_comments<'a, F, O>( + fn ignore_ws_and_comments<'a, F, O, E: ParseError> + ContextError>>( inner: F, - ) -> impl FnMut(Span<'a>) -> IResult, O, nom::error::Error>> + ) -> impl FnMut(Span<'a>) -> IResult, O, E> where - F: Parser, O, nom::error::Error>> - + FnMut(Span<'a>) -> IResult, O, nom::error::Error>>, + F: Parser, O, E> + FnMut(Span<'a>) -> IResult, O, E>, { delimited( many0(alt((lex_whitespace, lex_comment))), @@ -2480,45 +2480,63 @@ mod new { } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a>(input: Span<'a>) -> Program<'a> { - // let span = input.clone(); - let (_, (tl_doc_comment, statements)) = all_consuming(pair( - opt(lex_toplevel_doc_comment), - many1(alt(( - parse_fact, - parse_rule, - parse_whitespace, - parse_directive, - parse_comment, - ))), - ))(input) - .expect("Expect EOF"); - Program { - span: input, - tl_doc_comment, - statements, - } + fn parse_program<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Program<'a>, E> { + context( + "parse program", + all_consuming(pair( + opt(lex_toplevel_doc_comment), + many1(alt(( + parse_rule, + parse_fact, + parse_whitespace, + parse_directive, + parse_comment, + ))), + )), + )(input) + .map(|(rest_input, (tl_doc_comment, statements))| { + ( + rest_input, + Program { + span: input, + tl_doc_comment, + statements, + }, + ) + }) } /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace<'a>(input: Span<'a>) -> IResult> { - lex_whitespace(input).map(|(rest, ws)| (rest, Statement::Whitespace(ws))) + fn parse_whitespace<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse whitespace", lex_whitespace)(input) + .map(|(rest, ws)| (rest, Statement::Whitespace(ws))) } /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment<'a>(input: Span<'a>) -> IResult> { - lex_comment(input).map(|(rest, comment)| (rest, Statement::Comment(comment))) + fn parse_comment<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse comment", lex_comment)(input) + .map(|(rest, comment)| (rest, Statement::Comment(comment))) } /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a>(input: Span<'a>) -> IResult> { - // let input_span = input; - tuple(( - opt(lex_doc_comment), - parse_normal_atom, - opt(lex_whitespace), - dot, - ))(input) + fn parse_fact<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse fact", + tuple(( + opt(lex_doc_comment), + parse_normal_atom, + opt(lex_whitespace), + cut(dot), + )), + )(input) .map(|(rest_input, (doc_comment, atom, ws, dot))| { ( rest_input, @@ -2534,18 +2552,22 @@ mod new { } /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a>(input: Span<'a>) -> IResult> { - // let input_span = input; - tuple(( - opt(lex_doc_comment), - parse_head, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_body, - opt(lex_whitespace), - dot, - ))(input) + fn parse_rule<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse rule", + tuple(( + opt(lex_doc_comment), + parse_head, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_body, + opt(lex_whitespace), + cut(dot), + )), + )(input) .map( |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { ( @@ -2567,46 +2589,59 @@ mod new { } /// Parse the head atoms of a rule. - fn parse_head<'a>(input: Span<'a>) -> IResult>> { - parse_atom_list(input, parse_head_atoms) + fn parse_head<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult>, E> { + context("parse head", parse_atom_list(parse_head_atoms))(input) } /// Parse the body atoms of a rule. - fn parse_body<'a>(input: Span<'a>) -> IResult>> { - parse_atom_list(input, parse_body_atoms) + fn parse_body<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult>, E> { + context("parse body", parse_atom_list(parse_body_atoms))(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive<'a>(input: Span<'a>) -> IResult> { - alt(( - parse_base_directive, - parse_prefix_directive, - parse_import_directive, - parse_export_directive, - parse_output_directive, - ))(input) + fn parse_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse directive", + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, + )), + )(input) .map(|(rest, directive)| (rest, Statement::Directive(directive))) } /// Parse the base directive. - fn parse_base_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Base), + fn parse_base_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse base directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Base), + )), + opt(lex_whitespace), + lex_iri, + opt(lex_whitespace), + cut(dot), )), - opt(lex_whitespace), - lex_iri, - opt(lex_whitespace), - dot, - ))(input) + )(input) .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { ( rest_input, Directive::Base { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Base, @@ -2622,27 +2657,31 @@ mod new { } /// Parse the prefix directive. - fn parse_prefix_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), + fn parse_prefix_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse prefix directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), + )), + opt(lex_whitespace), + recognize(pair(lex_ident, colon)), + opt(lex_whitespace), + lex_iri, + opt(lex_whitespace), + cut(dot), )), - opt(lex_whitespace), - recognize(pair(lex_ident, colon)), - opt(lex_whitespace), - lex_iri, - opt(lex_whitespace), - dot, - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { ( rest_input, Directive::Prefix { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Prefix, @@ -2664,29 +2703,33 @@ mod new { } /// Parse the import directive. - fn parse_import_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Import), + fn parse_import_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse import directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Import), + )), + lex_whitespace, + lex_ident, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + cut(dot), )), - lex_whitespace, - lex_ident, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_map, - opt(lex_whitespace), - dot, - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( rest_input, Directive::Import { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Import, @@ -2707,29 +2750,33 @@ mod new { } /// Parse the export directive. - fn parse_export_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Export), + fn parse_export_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse export directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Export), + )), + lex_whitespace, + lex_ident, + opt(lex_whitespace), + arrow, + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + cut(dot), )), - lex_whitespace, - lex_ident, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_map, - opt(lex_whitespace), - dot, - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( rest_input, Directive::Export { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Export, @@ -2750,25 +2797,29 @@ mod new { } /// Parse the output directive. - fn parse_output_directive<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Output), + fn parse_output_directive<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse output directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + )), + lex_whitespace, + opt(parse_identifier_list), + opt(lex_whitespace), + cut(dot), )), - lex_whitespace, - opt(parse_identifier_list), - opt(lex_whitespace), - dot, - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { ( rest_input, Directive::Output { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), doc_comment, kw: Token { kind: TokenKind::Output, @@ -2785,22 +2836,26 @@ mod new { } /// Parse a list of `ident1, ident2, …` - fn parse_identifier_list<'a>(input: Span<'a>) -> IResult>> { - let input_span = input.clone(); - pair( - lex_ident, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), + fn parse_identifier_list<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult>, E> { + context( + "parse identifier list", + pair( lex_ident, - ))), + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + lex_ident, + ))), + ), )(input) .map(|(rest_input, (first, rest))| { ( rest_input, List { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), first, rest: if rest.is_empty() { None } else { Some(rest) }, }, @@ -2808,85 +2863,106 @@ mod new { }) } - /// Parse a list of atoms, like `atom1(…), atom2(…), infix = atom, …` - fn parse_atom_list<'a>( - input: Span<'a>, - parse_atom: fn(Span<'a>) -> IResult>, - ) -> IResult>> { - let input_span = input.clone(); - pair( - parse_atom, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_atom, - ))), - )(input) - .map(|(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input_span, rest_input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }) + fn parse_atom_list<'a, E: ParseError> + ContextError>>( + parse_atom: fn(Span<'a>) -> IResult, E>, + ) -> impl Fn(Span<'a>) -> IResult, List<'a, Atom<'a>>, E> { + move |input| { + context( + "parse atom list", + pair( + parse_atom, + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_atom, + ))), + ), + )(input) + .map(|(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input, rest_input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) + }) + } } /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms<'a>(input: Span<'a>) -> IResult> { - alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) + fn parse_head_atoms<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "harse head atoms", + alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), + )(input) } /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms<'a>(input: Span<'a>) -> IResult> { - alt(( - parse_normal_atom, - parse_negative_atom, - parse_infix_atom, - parse_map_atom, - ))(input) + fn parse_body_atoms<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse body atoms", + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, + )), + )(input) } /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom<'a>(input: Span<'a>) -> IResult> { - parse_named_tuple(input) + fn parse_normal_atom<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse normal atom", parse_named_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { - ( - rest_input, - Atom::Negative { - span: outer_span(input_span, rest_input), - neg: tilde, - atom: named_tuple, - }, - ) - }) + fn parse_negative_atom<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse negative atom", pair(tilde, parse_named_tuple))(input).map( + |(rest_input, (tilde, named_tuple))| { + ( + rest_input, + Atom::Negative { + span: outer_span(input, rest_input), + neg: tilde, + atom: named_tuple, + }, + ) + }, + ) } /// Parse an "infix atom" of the form `term1 term2`. /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - parse_term, - opt(lex_whitespace), - parse_operation_token, - opt(lex_whitespace), - parse_term, - ))(input) + fn parse_infix_atom<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse infix atom", + tuple(( + parse_term, + opt(lex_whitespace), + parse_operation_token, + opt(lex_whitespace), + parse_term, + )), + )(input) .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { ( rest_input, Atom::InfixAtom { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), lhs, ws1, operation, @@ -2899,23 +2975,27 @@ mod new { /// Parse a tuple with an optional name, like `ident(term1, term2)` /// or just `(int, int, skip)`. - fn parse_tuple<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_ident), - opt(lex_whitespace), - open_paren, - opt(lex_whitespace), - opt(parse_term_list), - opt(lex_whitespace), - close_paren, - ))(input) + fn parse_tuple<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse tuple", + tuple(( + opt(lex_ident), + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_term_list), + opt(lex_whitespace), + cut(close_paren), + )), + )(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, Tuple { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), identifier, ws1, open_paren, @@ -2931,23 +3011,27 @@ mod new { /// Parse a named tuple. This function is like `parse_tuple` with the difference, /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - lex_ident, - opt(lex_whitespace), - open_paren, - opt(lex_whitespace), - opt(parse_term_list), - opt(lex_whitespace), - close_paren, - ))(input) + fn parse_named_tuple<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse named tuple", + tuple(( + lex_ident, + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_term_list), + opt(lex_whitespace), + cut(close_paren), + )), + )(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, Tuple { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), identifier: Some(identifier), ws1, open_paren, @@ -2963,23 +3047,27 @@ mod new { /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. /// Inside the curly braces ist a list of pairs. - fn parse_map<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - tuple(( - opt(lex_ident), - opt(lex_whitespace), - open_brace, - opt(lex_whitespace), - parse_pair_list, - opt(lex_whitespace), - close_brace, - ))(input) + fn parse_map<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse map", + tuple(( + opt(lex_ident), + opt(lex_whitespace), + open_brace, + opt(lex_whitespace), + parse_pair_list, + opt(lex_whitespace), + cut(close_brace), + )), + )(input) .map( |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { ( rest_input, Map { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), identifier, ws1, open_brace, @@ -2994,30 +3082,35 @@ mod new { } /// Parse a map in an atom position. - fn parse_map_atom<'a>(input: Span<'a>) -> IResult> { - parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) + fn parse_map_atom<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse map atom", parse_map)(input) + .map(|(rest_input, map)| (rest_input, Atom::Map(map))) } /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - fn parse_pair_list<'a>( + fn parse_pair_list<'a, E: ParseError> + ContextError>>( input: Span<'a>, - ) -> IResult, Term<'a>>>>> { - let input_span = input.clone(); - opt(pair( - parse_pair, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), + ) -> IResult, Term<'a>>>>, E> { + context( + "parse pair list", + opt(pair( parse_pair, - ))), - ))(input) + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_pair, + ))), + )), + )(input) .map(|(rest_input, pair_list)| { if let Some((first, rest)) = pair_list { ( rest_input, Some(List { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), first, rest: if rest.is_empty() { None } else { Some(rest) }, }), @@ -3029,20 +3122,24 @@ mod new { } /// Parse a pair of the form `key = value`. - fn parse_pair<'a>(input: Span<'a>) -> IResult, Term<'a>>> { - let input_span = input.clone(); - tuple(( - parse_term, - opt(lex_whitespace), - equal, - opt(lex_whitespace), - parse_term, - ))(input) + fn parse_pair<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Term<'a>>, E> { + context( + "parse pair", + tuple(( + parse_term, + opt(lex_whitespace), + equal, + opt(lex_whitespace), + parse_term, + )), + )(input) .map(|(rest_input, (key, ws1, equal, ws2, value))| { ( rest_input, Pair { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), key, ws1, equal, @@ -3054,22 +3151,26 @@ mod new { } /// Parse a list of terms of the form `term1, term2, …`. - fn parse_term_list<'a>(input: Span<'a>) -> IResult>> { - let input_span = input.clone(); - pair( - parse_term, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), + fn parse_term_list<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult>, E> { + context( + "parse term list", + pair( parse_term, - ))), + many0(tuple(( + opt(lex_whitespace), + comma, + opt(lex_whitespace), + parse_term, + ))), + ), )(input) .map(|(rest_input, (first, rest))| { ( rest_input, List { - span: outer_span(input_span, rest_input), + span: outer_span(input, rest_input), first, rest: if rest.is_empty() { None } else { Some(rest) }, }, @@ -3080,52 +3181,128 @@ mod new { /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a>(input: Span<'a>) -> IResult> { - alt(( - parse_binary_term, - parse_tuple_term, - parse_unary_prefix_term, - parse_map_term, - parse_primitive_term, - parse_variable, - parse_existential, - parse_aggregation_term, - ))(input) + fn parse_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse term", + alt(( + parse_binary_term, + parse_tuple_term, + parse_unary_prefix_term, + parse_map_term, + parse_primitive_term, + parse_variable, + parse_existential, + parse_aggregation_term, + )), + )(input) } /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term<'a>(input: Span<'a>) -> IResult> { - alt((lex_ident, lex_iri, lex_number, lex_string))(input) - .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) + fn parse_primitive_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse primitive term", + alt(( + parse_rdf_literal, + parse_ident, + parse_iri, + parse_number, + parse_string, + )), + )(input) + .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } - /// Parse an unary term. - fn parse_unary_prefix_term<'a>(input: Span<'a>) -> IResult> { - let input_span = input.clone(); - pair(lex_unary_prefix_operators, parse_term)(input).map( - |(rest_input, (operation, term))| { - ( - rest_input, - Term::UnaryPrefix { - span: outer_span(input_span, rest_input), - operation, - term: Box::new(term), + /// Parse a rdf literal e.g. "2023-06-19"^^ + fn parse_rdf_literal<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context( + "parse rdf literal", + tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), + )(input) + .map(|(rest_input, (string, carets, iri))| { + ( + rest_input, + Primitive::RdfLiteral { + span: outer_span(input, rest_input), + string, + carets: Token { + kind: TokenKind::Caret, + span: carets, }, - ) - }, - ) + iri, + }, + ) + }) + } + + fn parse_ident<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse identifier", lex_ident)(input) + .map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) + } + + fn parse_iri<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse iri", lex_iri)(input) + .map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) + } + + fn parse_number<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse number", lex_number)(input) + .map(|(rest_input, number)| (rest_input, Primitive::Number(number))) + } + + fn parse_string<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse string", lex_string)(input) + .map(|(rest_input, string)| (rest_input, Primitive::String(string))) + } + + /// Parse an unary term. + fn parse_unary_prefix_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse unary prefix term", + pair(lex_unary_prefix_operators, parse_term), + )(input) + .map(|(rest_input, (operation, term))| { + ( + rest_input, + Term::UnaryPrefix { + span: outer_span(input, rest_input), + operation, + term: Box::new(term), + }, + ) + }) } /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term<'a>(input: Span<'a>) -> IResult> { - pair( - parse_arithmetic_product, - opt(tuple(( - opt(lex_whitespace), - alt((plus, minus)), - opt(lex_whitespace), - parse_binary_term, - ))), + fn parse_binary_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse binary term", + pair( + parse_arithmetic_product, + opt(tuple(( + opt(lex_whitespace), + alt((plus, minus)), + opt(lex_whitespace), + parse_binary_term, + ))), + ), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3148,15 +3325,20 @@ mod new { /// Parse an arithmetic product, i.e. an expression involving /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product<'a>(input: Span<'a>) -> IResult> { - pair( - parse_arithmetic_factor, - opt(tuple(( - opt(lex_whitespace), - alt((star, slash)), - opt(lex_whitespace), - parse_arithmetic_product, - ))), + fn parse_arithmetic_product<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse arithmetic product", + pair( + parse_arithmetic_factor, + opt(tuple(( + opt(lex_whitespace), + alt((star, slash)), + opt(lex_whitespace), + parse_arithmetic_product, + ))), + ), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3177,47 +3359,57 @@ mod new { }) } - fn parse_arithmetic_factor<'a>(input: Span<'a>) -> IResult> { - alt(( - parse_tuple_term, - parse_aggregation_term, - parse_primitive_term, - parse_variable, - parse_existential, - ))(input) - } - - fn fold_arithmetic_expression<'a>( - initial: Term<'a>, - sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, - span_vec: Vec>, - ) -> Term<'a> { - sequence - .into_iter() - .enumerate() - .fold(initial, |acc, (i, pair)| { - let (ws1, operation, ws2, expression) = pair; - Term::Binary { - span: span_vec[i], - lhs: Box::new(acc), - ws1, - operation, - ws2, - rhs: Box::new(expression), - } - }) + fn parse_arithmetic_factor<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse arithmetic factor", + alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + )), + )(input) } + // fn fold_arithmetic_expression<'a>( + // initial: Term<'a>, + // sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, + // span_vec: Vec>, + // ) -> Term<'a> { + // sequence + // .into_iter() + // .enumerate() + // .fold(initial, |acc, (i, pair)| { + // let (ws1, operation, ws2, expression) = pair; + // Term::Binary { + // span: span_vec[i], + // lhs: Box::new(acc), + // ws1, + // operation, + // ws2, + // rhs: Box::new(expression), + // } + // }) + // } + /// Parse an aggregation term of the form `#sum(…)`. - fn parse_aggregation_term<'a>(input: Span<'a>) -> IResult> { - tuple(( - recognize(pair(hash, lex_ident)), - open_paren, - opt(lex_whitespace), - parse_term_list, - opt(lex_whitespace), - close_paren, - ))(input) + fn parse_aggregation_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse aggregation term", + tuple(( + recognize(pair(hash, lex_ident)), + open_paren, + opt(lex_whitespace), + parse_term_list, + opt(lex_whitespace), + close_paren, + )), + )(input) .map( |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { ( @@ -3241,32 +3433,47 @@ mod new { /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. - fn parse_tuple_term<'a>(input: Span<'a>) -> IResult> { - parse_tuple(input) + fn parse_tuple_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse tuple term", parse_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } /// Parse a map as a term. - fn parse_map_term<'a>(input: Span<'a>) -> IResult> { - parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) + fn parse_map_term<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse map term", parse_map)(input) + .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } /// Parse a variable. - fn parse_variable<'a>(input: Span<'a>) -> IResult> { - recognize(pair(question_mark, lex_ident))(input).map(|(rest_input, var)| { - ( - rest_input, - Term::Variable(Token { - kind: TokenKind::Variable, - span: var, - }), - ) - }) + fn parse_variable<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse variable", recognize(pair(question_mark, lex_ident)))(input).map( + |(rest_input, var)| { + ( + rest_input, + Term::Variable(Token { + kind: TokenKind::Variable, + span: var, + }), + ) + }, + ) } /// Parse an existential variable. - fn parse_existential<'a>(input: Span<'a>) -> IResult> { - recognize(pair(exclamation_mark, lex_ident))(input).map(|(rest_input, existential)| { + fn parse_existential<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse existential", + recognize(pair(exclamation_mark, lex_ident)), + )(input) + .map(|(rest_input, existential)| { ( rest_input, Term::Existential(Token { @@ -3279,12 +3486,19 @@ mod new { // Order of functions is important, because of ordered choice and no backtracking /// Parse the operator for an infix atom. - fn parse_operation_token<'a>(input: Span<'a>) -> IResult> { - alt((less_equal, greater_equal, equal, unequal, less, greater))(input) + fn parse_operation_token<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context( + "parse operation token", + alt((less_equal, greater_equal, equal, unequal, less, greater)), + )(input) } #[cfg(test)] mod tests { + use nom::error::{convert_error, VerboseError}; + use super::*; use crate::io::{ lexer::*, @@ -3300,6 +3514,19 @@ mod new { }; } + fn convert_located_span_error<'a>(input: Span<'a>, err: VerboseError>) -> String { + convert_error( + *(input.fragment()), + VerboseError { + errors: err + .errors + .into_iter() + .map(|(span, tag)| (*(span.fragment()), tag)) + .collect(), + }, + ) + } + #[test] fn fact() { // let input = Tokens { @@ -3307,7 +3534,7 @@ mod new { // }; let input = Span::new("a(B,C)."); assert_eq!( - parse_program(input), + parse_program::>(input).unwrap().1, Program { span: input, tl_doc_comment: None, @@ -3328,10 +3555,10 @@ mod new { ws2: None, terms: Some(List { span: S!(2, 1, "B,C"), - first: Term::Primitive(Token { + first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(2, 1, "B"), - }), + })), rest: Some(vec![( None, Token { @@ -3339,10 +3566,10 @@ mod new { span: S!(3, 1, ",") }, None, - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(4, 1, "C"), - }), + })), )]), }), ws3: None, @@ -3367,7 +3594,7 @@ mod new { r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, ); assert_eq!( - parse_program(input), + parse_program::>(input).unwrap().1, Program { tl_doc_comment: None, span: input, @@ -3464,20 +3691,20 @@ mod new { span: S!(106, 1, "resource=\"sources/dataA.csv\""), first: Pair { span: S!(106, 1, "resource=\"sources/dataA.csv\""), - key: Term::Primitive(Token { + key: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(106, 1, "resource"), - }), + })), ws1: None, equal: Token { kind: TokenKind::Equal, span: S!(114, 1, "="), }, ws2: None, - value: Term::Primitive(Token { + value: Term::Primitive(Primitive::String(Token { kind: TokenKind::String, span: S!(115, 1, "\"sources/dataA.csv\""), - }) + })), }, rest: None, }), @@ -3604,7 +3831,7 @@ mod new { fn ignore_ws_and_comments() { let input = Span::new(" Hi %cool comment\n"); assert_eq!( - super::ignore_ws_and_comments(lex_ident)(input), + super::ignore_ws_and_comments(lex_ident::>)(input), Ok(( S!(22, 2, ""), Token { @@ -3619,7 +3846,7 @@ mod new { fn fact_with_ws() { let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); assert_eq!( - parse_program(input), + parse_program::>(input).unwrap().1, Program { span: input, tl_doc_comment: None, @@ -3641,10 +3868,10 @@ mod new { ws2: None, terms: Some(List { span: S!(5, 1, "Fact, with, whitespace"), - first: Term::Primitive(Token { + first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(5, 1, "Fact"), - }), + })), rest: Some(vec![ ( None, @@ -3656,10 +3883,10 @@ mod new { kind: TokenKind::Whitespace, span: S!(10, 1, " "), }), - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(11, 1, "with") - }), + })), ), ( None, @@ -3671,10 +3898,10 @@ mod new { kind: TokenKind::Whitespace, span: S!(16, 1, " "), }), - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: S!(17, 1, "whitespace") - }), + })), ), ]), }), @@ -3717,7 +3944,7 @@ mod new { % find old trees. It can be modified to use a different species or genus of % plant, and by changing the required age. -@import tree :- csv{format=(string, string, int, int), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/dresden-trees-ages-heights.csv"} . % location URL, species, age, height in m +@import tree :- csv{format=(string, string, string, int, int), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/dresden-trees-ages-heights.csv"} . % location URL, species, age, height in m @import taxon :- csv{format=(string, string, string), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/wikidata-taxon-name-parent.csv.gz"} . % location URL, species, age, height in m limeSpecies(?X, "Tilia") :- taxon(?X, "Tilia", ?P). @@ -3725,27 +3952,47 @@ limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, ); - let ast = parse_program(input); - println!("{}", ast); - assert_eq!( - { - let mut result = String::new(); - for token in get_all_tokens(&ast) { - result.push_str(token.span().fragment()); - } - println!("{}", result); - result - }, - *input.fragment(), - ); + let ast = parse_program::>(input); + match &ast { + Ok((rest_input, ast)) => { + println!("Rest Input:\n{:#?}\n\n{}", rest_input, ast); + assert_eq!( + { + let mut string_from_tokens = String::new(); + for token in get_all_tokens(ast) { + string_from_tokens.push_str(token.span().fragment()); + } + println!("String from Tokens:\n"); + println!("{}\n", string_from_tokens); + string_from_tokens + }, + *input.fragment(), + ); + } + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + println!( + "PRINT ERROR:\n\n{}", + convert_located_span_error(input, err.clone()) + ); + } + Err(err) => panic!("{}", err), + } + assert!(ast.is_ok()); } #[test] fn parser_test() { let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); let input = Span::new(str.as_str()); - println!("{}", parse_program(input)); - // assert!(false); + let result = parse_program::>(input); + match result { + Ok(ast) => println!("{}", ast.1), + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + println!("{}", convert_located_span_error(input, err)) + } + Err(_) => (), + } + assert!(false); } #[test] @@ -3766,120 +4013,143 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term(Span::new("42")); + let result = parse_term::>(Span::new("42")); result.unwrap().1 }, - Term::Primitive(T! {Number, 0, 1, "42"}), + Term::Primitive(Primitive::Number(T! {Number, 0, 1, "42"})), ); assert_eq!( { - let result = parse_term(Span::new("35+7")); + let result = parse_term::>(Span::new("35+7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "35+7"), - lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "35"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "35"}))), ws1: None, operation: T! {Plus, 2, 1, "+"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "7"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "7"}))), } ); assert_eq!( { - let result = parse_term(Span::new("6*7")); + let result = parse_term::>(Span::new("6*7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "6*7"), - lhs: Box::new(Term::Primitive(T! {Number, 0,1,"6"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"6"}))), ws1: None, operation: T! {Star, 1,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 2,1,"7"})), + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"7"}))), } ); assert_eq!( { - let result = parse_term(Span::new("49-7")); + let result = parse_term::>(Span::new("49-7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "49-7"), - lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "49"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "49"}))), ws1: None, operation: T! {Minus, 2, 1, "-"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "7"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "7"}))), } ); assert_eq!( { - let result = parse_term(Span::new("84/2")); + let result = parse_term::>(Span::new("84/2")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "84/2"), - lhs: Box::new(Term::Primitive(T! {Number, 0, 1, "84"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "84"}))), ws1: None, operation: T! {Slash, 2, 1, "/"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 3, 1, "2"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "2"}))), } ); assert_eq!( { - let result = parse_term(Span::new("5*7+7")); + let result = parse_term::>(Span::new("5*7+7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "5*7+7"), lhs: Box::new(Term::Binary { span: s!(0, 1, "5*7"), - lhs: Box::new(Term::Primitive(T! {Number, 0,1,"5"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"5"}))), ws1: None, operation: T! {Star, 1,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 2,1,"7"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"7"}))), }), ws1: None, operation: T! {Plus, 3,1,"+"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 4,1,"7"})), + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 4,1,"7"}))), } ); assert_eq!( { - let result = parse_term(Span::new("7+5*7")); + let result = parse_term::>(Span::new("7+5*7")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "7+5*7"), - lhs: Box::new(Term::Primitive(T! {Number, 0,1,"7"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"7"}))), ws1: None, operation: T! {Plus, 1,1,"+"}, ws2: None, rhs: Box::new(Term::Binary { span: s!(2, 1, "5*7"), - lhs: Box::new(Term::Primitive(T! {Number, 2,1,"5"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"5"}))), ws1: None, operation: T! {Star, 3,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 4,1,"7"})) + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 4,1,"7"}))), }), } ); assert_eq!( { - let result = parse_term(Span::new("(15+3*2-(7+35)*8)/3")); - result.unwrap().1 + let input = Span::new("(15+3*2-(7+35)*8)/3"); + let result = parse_term::>(input); + // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); + match result { + Ok(ast) => { + println!("{}", ast.1); + ast.1 + } + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + panic!( + "{}", + convert_error( + *(input.fragment()), + VerboseError { + errors: err + .errors + .into_iter() + .map(|(span, tag)| { (*(span.fragment()), tag) }) + .collect() + } + ) + ) + } + Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), + } }, Term::Binary { span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), @@ -3893,7 +4163,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(1, 1, "15+3*2-(7+35)*8"), first: Term::Binary { span: s!(1, 1, "15+3*2-(7+35)*8"), - lhs: Box::new(Term::Primitive(T! {Number, 1,1,"15"})), + lhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 1,1,"15"} + ))), ws1: None, operation: T! {Plus, 3,1,"+"}, ws2: None, @@ -3901,11 +4173,15 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(4, 1, "3*2-(7+35)*8"), lhs: Box::new(Term::Binary { span: s!(4, 1, "3*2"), - lhs: Box::new(Term::Primitive(T! {Number, 4,1,"3"})), + lhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 4,1,"3"} + ))), ws1: None, operation: T! {Star, 5,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 6,1,"2"})), + rhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 6,1,"2"} + ))), }), ws1: None, operation: T! {Minus, 7,1,"-"}, @@ -3923,13 +4199,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters first: Term::Binary { span: s!(9, 1, "7+35"), lhs: Box::new(Term::Primitive( - T! {Number, 9,1,"7"} + Primitive::Number(T! {Number, 9,1,"7"}) )), ws1: None, operation: T! {Plus, 10,1,"+"}, ws2: None, rhs: Box::new(Term::Primitive( - T! {Number, 11,1,"35"} + Primitive::Number(T! {Number, 11,1,"35"}) )), }, rest: None @@ -3940,7 +4216,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws1: None, operation: T! {Star, 14,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 15,1,"8"})), + rhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 15,1,"8"} + ))), }), }), }, @@ -3952,7 +4230,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws1: None, operation: T! {Slash, 17,1,"/"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 18,1,"3"})), + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 18,1,"3"}))), } ); // Term::Binary { @@ -3966,12 +4244,12 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term(Span::new("15+3*2-(7+35)*8/3")); + let result = parse_term::>(Span::new("15+3*2-(7+35)*8/3")); result.unwrap().1 }, Term::Binary { span: s!(0, 1, "15+3*2-(7+35)*8/3"), - lhs: Box::new(Term::Primitive(T! {Number, 0,1,"15"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"15"}))), ws1: None, operation: T! {Plus, 2,1,"+"}, ws2: None, @@ -3979,11 +4257,11 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(3, 1, "3*2-(7+35)*8/3"), lhs: Box::new(Term::Binary { span: s!(3, 1, "3*2"), - lhs: Box::new(Term::Primitive(T! {Number, 3,1,"3"})), + lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3,1,"3"}))), ws1: None, operation: T! {Star, 4,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 5,1,"2"})), + rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 5,1,"2"}))), }), ws1: None, operation: T! {Minus, 6,1,"-"}, @@ -4000,11 +4278,15 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(8, 1, "7+35"), first: Term::Binary { span: s!(8, 1, "7+35"), - lhs: Box::new(Term::Primitive(T! {Number, 8,1,"7"})), + lhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 8,1,"7"} + ))), ws1: None, operation: T! {Plus, 9,1,"+"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 10,1,"35"})), + rhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 10,1,"35"} + ))), }, rest: None, }), @@ -4016,11 +4298,15 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws2: None, rhs: Box::new(Term::Binary { span: s!(14, 1, "8/3"), - lhs: Box::new(Term::Primitive(T! {Number, 14,1,"8"})), + lhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 14,1,"8"} + ))), ws1: None, operation: T! {Slash, 15, 1, "/"}, ws2: None, - rhs: Box::new(Term::Primitive(T! {Number, 16,1,"3"})), + rhs: Box::new(Term::Primitive(Primitive::Number( + T! {Number, 16,1,"3"} + ))), }), }), }), @@ -4028,27 +4314,27 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ); // assert_eq!({ - // let result = parse_term(Span::new("1*2*3*4*5")); + // let result = parse_term::>(Span::new("1*2*3*4*5")); // result.unwrap().1 // },); // assert_eq!({ - // let result = parse_term(Span::new("(5+3)")); + // let result = parse_term::>(Span::new("(5+3)")); // result.unwrap().1 // },); // assert_eq!({ - // let result = parse_term(Span::new("( int , int , string , skip )")); + // let result = parse_term::>(Span::new("( int , int , string , skip )")); // result.unwrap().1 // },); // assert_eq!({ - // let result = parse_term(Span::new("(14+4)+3")); + // let result = parse_term::>(Span::new("(14+4)+3")); // result.unwrap().1 // },); // assert_eq!({ - // let result = parse_term(Span::new( + // let result = parse_term::>(Span::new( // "(3 + #sum(?X, ?Y)) * (LENGTH(\"Hello, World!\") + 3)", // )); // result.unwrap().1 From 6d56414ae9dfd9e6421d95e79e73b40cd0fee810 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 24 Apr 2024 17:38:24 +0200 Subject: [PATCH 092/214] Add enum Primitive for primitives composed of more than one token --- nemo/src/io/parser/ast.rs | 14 +++--- nemo/src/io/parser/ast/term.rs | 83 +++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 8 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 1c838a026..722aff57f 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -109,7 +109,7 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { mod test { use super::*; - use super::{atom::Atom, directive::Directive, tuple::Tuple, program::Program, statement::Statement, term::Term}; + use super::{atom::Atom, directive::Directive, tuple::Tuple, program::Program, statement::Statement, term::Term, term::Primitive}; use crate::io::lexer::TokenKind; macro_rules! s { @@ -201,10 +201,10 @@ mod test { ws2:None , terms: Some(List { span: s!(236, 8, "ConstA, ConstB"), - first: Term::Primitive(Token { + first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(236, 8, "ConstA"), - }), + })), rest: Some(vec![( None, Token { @@ -215,10 +215,10 @@ mod test { kind: TokenKind::Whitespace, span: s!(243, 8, " "), }), - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(244, 8, "ConstB"), - }), + })), )]), }), ws3: None , @@ -298,10 +298,10 @@ mod test { kind: TokenKind::Whitespace, span: s!(334, 12, " "), }), - Term::Primitive(Token { + Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(335, 12, "ConstB"), - }), + })), )]), }), ws3: None, diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index a058e401b..e81ccf09e 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -6,7 +6,7 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub(crate) enum Term<'a> { - Primitive(Token<'a>), + Primitive(Primitive<'a>), Variable(Token<'a>), Existential(Token<'a>), // TODO: Is whitespace needed? Figure out how unary terms look @@ -156,3 +156,84 @@ impl std::fmt::Display for Term<'_> { write!(f, "{output}") } } + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum Primitive<'a> { + Constant(Token<'a>), + Number(Token<'a>), + String(Token<'a>), + Iri(Token<'a>), + RdfLiteral { + span: Span<'a>, + string: Token<'a>, + carets: Token<'a>, + iri: Token<'a>, + }, +} +impl AstNode for Primitive<'_> { + fn children(&self) -> Option> { + match self { + Primitive::Constant(token) => Some(vec![token]), + Primitive::Number(token) => Some(vec![token]), + Primitive::String(token) => Some(vec![token]), + Primitive::Iri(token) => Some(vec![token]), + Primitive::RdfLiteral { + string, + carets, + iri, + .. + } => Some(vec![string, carets, iri]), + } + } + + fn span(&self) -> Span { + match self { + Primitive::Constant(token) => token.span, + Primitive::Number(token) => token.span, + Primitive::String(token) => token.span, + Primitive::Iri(token) => token.span, + Primitive::RdfLiteral { span, .. } => *span, + } + } + + fn position(&self) -> Position { + let span = self.span(); + Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + } + } + + fn is_token(&self) -> bool { + false + } + + fn name(&self) -> String { + macro_rules! name { + ($name:literal) => { + format!( + "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + $name, + self.span().location_line(), + self.span().get_utf8_column(), + self.span().fragment() + ) + }; + } + match self { + Primitive::Constant(_) => name!("Constant"), + Primitive::Number(_) => name!("Number"), + Primitive::String(_) => name!("String"), + Primitive::Iri(_) => name!("Iri"), + Primitive::RdfLiteral { .. } => name!("RDF Literal"), + } + } +} +impl std::fmt::Display for Primitive<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self))?; + write!(f, "{output}") + } +} From 1fadcd2f936891bfe5c39ef45699dab49e1ce0e9 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 25 Apr 2024 16:57:31 +0200 Subject: [PATCH 093/214] Add decimal number parsing --- nemo/src/io/lexer.rs | 9 + nemo/src/io/parser.rs | 533 ++++++++++++++++++++++++--------- nemo/src/io/parser/ast/term.rs | 81 ++++- 3 files changed, 480 insertions(+), 143 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 4dd5f0ded..ebb608d10 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -74,6 +74,8 @@ pub(crate) enum TokenKind { Star, /// '/' Slash, + /// 'e' or 'E' + Exponent, // Multi-char tokens: /// Identifier for keywords and names Ident, @@ -144,6 +146,7 @@ impl std::fmt::Display for TokenKind { TokenKind::Minus => write!(f, "Minus"), TokenKind::Star => write!(f, "Star"), TokenKind::Slash => write!(f, "Slash"), + TokenKind::Exponent => write!(f, "Exponent"), TokenKind::Ident => write!(f, "Ident"), TokenKind::Variable => write!(f, "Variable"), TokenKind::Existential => write!(f, "Existential"), @@ -253,6 +256,12 @@ syntax!(caret, "^", TokenKind::Caret); syntax!(hash, "#", TokenKind::Hash); syntax!(underscore, "_", TokenKind::Underscore); syntax!(at, "@", TokenKind::At); +syntax!(exp_lower, "e", TokenKind::Exponent); +syntax!(exp_upper, "E", TokenKind::Exponent); + +pub(crate) fn exp<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { + alt((exp_lower, exp_upper))(input) +} pub(crate) fn lex_punctuations<'a, E: ParseError>>( input: Span<'a>, diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index e027612ad..5f87b2fcf 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2435,8 +2435,8 @@ mod new { }; use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, - greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, - lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, + exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, + lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, question_mark, slash, star, tilde, unequal, Span, Token, TokenKind, }; @@ -3257,8 +3257,63 @@ mod new { fn parse_number<'a, E: ParseError> + ContextError>>( input: Span<'a>, ) -> IResult, Primitive<'a>, E> { - context("parse number", lex_number)(input) - .map(|(rest_input, number)| (rest_input, Primitive::Number(number))) + context("parse number", alt((parse_decimal, parse_integer)))(input) + } + + fn parse_decimal<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context( + "parse decimal", + tuple(( + opt(alt((plus, minus))), + opt(lex_number), + dot, + lex_number, + opt(parse_exponent), + )), + )(input) + .map(|(rest_input, (sign, before, dot, after, exponent))| { + dbg!(&sign, &before, &dot, &after, &exponent); + ( + rest_input, + Primitive::Number { + span: outer_span(input, rest_input), + sign, + before, + dot: Some(dot), + after, + exponent, + }, + ) + }) + } + + fn parse_integer<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Primitive<'a>, E> { + context("parse integer", pair(opt(alt((plus, minus))), lex_number))(input).map( + |(rest_input, (sign, number))| { + ( + rest_input, + Primitive::Number { + span: outer_span(input, rest_input), + sign, + before: None, + dot: None, + after: number, + exponent: None, + }, + ) + }, + ) + } + + fn parse_exponent<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, Exponent<'a>, E> { + tuple((exp, opt(alt((plus, minus))), lex_number))(input) + .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) } fn parse_string<'a, E: ParseError> + ContextError>>( @@ -3508,7 +3563,14 @@ mod new { // }, }; - macro_rules! S { + macro_rules! T { + ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { + Token::new($tok_kind, unsafe { + Span::new_from_raw_offset($offset, $line, $str, ()) + }) + }; + } + macro_rules! s { ($offset:literal,$line:literal,$str:literal) => { unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } }; @@ -3539,49 +3601,49 @@ mod new { span: input, tl_doc_comment: None, statements: vec![Statement::Fact { - span: S!(0, 1, "a(B,C)."), + span: s!(0, 1, "a(B,C)."), doc_comment: None, atom: Atom::Positive(Tuple { - span: S!(0, 1, "a(B,C)"), + span: s!(0, 1, "a(B,C)"), identifier: Some(Token { kind: TokenKind::Ident, - span: S!(0, 1, "a"), + span: s!(0, 1, "a"), }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, - span: S!(1, 1, "("), + span: s!(1, 1, "("), }, ws2: None, terms: Some(List { - span: S!(2, 1, "B,C"), + span: s!(2, 1, "B,C"), first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(2, 1, "B"), + span: s!(2, 1, "B"), })), rest: Some(vec![( None, Token { kind: TokenKind::Comma, - span: S!(3, 1, ",") + span: s!(3, 1, ",") }, None, Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(4, 1, "C"), + span: s!(4, 1, "C"), })), )]), }), ws3: None, close_paren: Token { kind: TokenKind::CloseParen, - span: S!(5, 1, ")"), + span: s!(5, 1, ")"), }, }), ws: None, dot: Token { kind: TokenKind::Dot, - span: S!(6, 1, ".") + span: s!(6, 1, ".") } }], } @@ -3600,28 +3662,28 @@ mod new { span: input, statements: vec![ Statement::Directive(Directive::Base { - span: S!(0, 1, "@base ."), + span: s!(0, 1, "@base ."), doc_comment: None, kw: Token { kind: TokenKind::Base, - span: S!(0, 1, "@base"), + span: s!(0, 1, "@base"), }, ws1: Some(Token { kind: TokenKind::Whitespace, - span: S!(5, 1, " ") + span: s!(5, 1, " ") }), base_iri: Token { kind: TokenKind::Iri, - span: S!(6, 1, "") + span: s!(6, 1, "") }, ws2: None, dot: Token { kind: TokenKind::Dot, - span: S!(31, 1, ".") + span: s!(31, 1, ".") }, }), Statement::Directive(Directive::Prefix { - span: S!( + span: s!( 32, 1, "@prefix rdfs:." @@ -3629,29 +3691,29 @@ mod new { doc_comment: None, kw: Token { kind: TokenKind::Prefix, - span: S!(32, 1, "@prefix"), + span: s!(32, 1, "@prefix"), }, ws1: Some(Token { kind: TokenKind::Whitespace, - span: S!(39, 1, " ") + span: s!(39, 1, " ") }), prefix: Token { kind: TokenKind::Ident, - span: S!(40, 1, "rdfs:"), + span: s!(40, 1, "rdfs:"), }, ws2: None, prefix_iri: Token { kind: TokenKind::Iri, - span: S!(45, 1, ""), + span: s!(45, 1, ""), }, ws3: None, dot: Token { kind: TokenKind::Dot, - span: S!(84, 1, ".") + span: s!(84, 1, ".") } }), Statement::Directive(Directive::Import { - span: S!( + span: s!( 85, 1, r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# @@ -3659,51 +3721,51 @@ mod new { doc_comment: None, kw: Token { kind: TokenKind::Import, - span: S!(85, 1, "@import"), + span: s!(85, 1, "@import"), }, ws1: Token { kind: TokenKind::Whitespace, - span: S!(92, 1, " "), + span: s!(92, 1, " "), }, predicate: Token { kind: TokenKind::Ident, - span: S!(93, 1, "sourceA"), + span: s!(93, 1, "sourceA"), }, ws2: None, arrow: Token { kind: TokenKind::Arrow, - span: S!(100, 1, ":-"), + span: s!(100, 1, ":-"), }, ws3: None, map: Map { - span: S!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), + span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), identifier: Some(Token { kind: TokenKind::Ident, - span: S!(102, 1, "csv") + span: s!(102, 1, "csv") }), ws1: None, open_brace: Token { kind: TokenKind::OpenBrace, - span: S!(105, 1, "{") + span: s!(105, 1, "{") }, ws2: None, pairs: Some(List { - span: S!(106, 1, "resource=\"sources/dataA.csv\""), + span: s!(106, 1, "resource=\"sources/dataA.csv\""), first: Pair { - span: S!(106, 1, "resource=\"sources/dataA.csv\""), + span: s!(106, 1, "resource=\"sources/dataA.csv\""), key: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(106, 1, "resource"), + span: s!(106, 1, "resource"), })), ws1: None, equal: Token { kind: TokenKind::Equal, - span: S!(114, 1, "="), + span: s!(114, 1, "="), }, ws2: None, value: Term::Primitive(Primitive::String(Token { kind: TokenKind::String, - span: S!(115, 1, "\"sources/dataA.csv\""), + span: s!(115, 1, "\"sources/dataA.csv\""), })), }, rest: None, @@ -3711,107 +3773,107 @@ mod new { ws3: None, close_brace: Token { kind: TokenKind::CloseBrace, - span: S!(134, 1, "}") + span: s!(134, 1, "}") }, }, ws4: None, dot: Token { kind: TokenKind::Dot, - span: S!(135, 1, ".") + span: s!(135, 1, ".") } }), Statement::Directive(Directive::Export { - span: S!(136, 1, "@export a:-csv{}."), + span: s!(136, 1, "@export a:-csv{}."), doc_comment: None, kw: Token { kind: TokenKind::Export, - span: S!(136, 1, "@export"), + span: s!(136, 1, "@export"), }, ws1: Token { kind: TokenKind::Whitespace, - span: S!(143, 1, " "), + span: s!(143, 1, " "), }, predicate: Token { kind: TokenKind::Ident, - span: S!(144, 1, "a"), + span: s!(144, 1, "a"), }, ws2: None, arrow: Token { kind: TokenKind::Arrow, - span: S!(145, 1, ":-"), + span: s!(145, 1, ":-"), }, ws3: None, map: Map { - span: S!(147, 1, "csv{}"), + span: s!(147, 1, "csv{}"), identifier: Some(Token { kind: TokenKind::Ident, - span: S!(147, 1, "csv"), + span: s!(147, 1, "csv"), }), ws1: None, open_brace: Token { kind: TokenKind::OpenBrace, - span: S!(150, 1, "{"), + span: s!(150, 1, "{"), }, ws2: None, pairs: None, ws3: None, close_brace: Token { kind: TokenKind::CloseBrace, - span: S!(151, 1, "}"), + span: s!(151, 1, "}"), }, }, ws4: None, dot: Token { kind: TokenKind::Dot, - span: S!(152, 1, "."), + span: s!(152, 1, "."), }, }), Statement::Directive(Directive::Output { - span: S!(153, 1, "@output a, b, c."), + span: s!(153, 1, "@output a, b, c."), doc_comment: None, kw: Token { kind: TokenKind::Output, - span: S!(153, 1, "@output") + span: s!(153, 1, "@output") }, ws1: Token { kind: TokenKind::Whitespace, - span: S!(160, 1, " "), + span: s!(160, 1, " "), }, predicates: Some(List { - span: S!(161, 1, "a, b, c"), + span: s!(161, 1, "a, b, c"), first: Token { kind: TokenKind::Ident, - span: S!(161, 1, "a"), + span: s!(161, 1, "a"), }, rest: Some(vec![ ( None, Token { kind: TokenKind::Comma, - span: S!(162, 1, ","), + span: s!(162, 1, ","), }, Some(Token { kind: TokenKind::Whitespace, - span: S!(163, 1, " "), + span: s!(163, 1, " "), }), Token { kind: TokenKind::Ident, - span: S!(164, 1, "b"), + span: s!(164, 1, "b"), }, ), ( None, Token { kind: TokenKind::Comma, - span: S!(165, 1, ","), + span: s!(165, 1, ","), }, Some(Token { kind: TokenKind::Whitespace, - span: S!(166, 1, " "), + span: s!(166, 1, " "), }), Token { kind: TokenKind::Ident, - span: S!(167, 1, "c"), + span: s!(167, 1, "c"), }, ), ]), @@ -3819,7 +3881,7 @@ mod new { ws2: None, dot: Token { kind: TokenKind::Dot, - span: S!(168, 1, "."), + span: s!(168, 1, "."), } }), ], @@ -3833,10 +3895,10 @@ mod new { assert_eq!( super::ignore_ws_and_comments(lex_ident::>)(input), Ok(( - S!(22, 2, ""), + s!(22, 2, ""), Token { kind: TokenKind::Ident, - span: S!(3, 1, "Hi") + span: s!(3, 1, "Hi") } )) ) @@ -3852,55 +3914,55 @@ mod new { tl_doc_comment: None, statements: vec![ Statement::Fact { - span: S!(0, 1, "some(Fact, with, whitespace) ."), + span: s!(0, 1, "some(Fact, with, whitespace) ."), doc_comment: None, atom: Atom::Positive(Tuple { - span: S!(0, 1, "some(Fact, with, whitespace)"), + span: s!(0, 1, "some(Fact, with, whitespace)"), identifier: Some(Token { kind: TokenKind::Ident, - span: S!(0, 1, "some"), + span: s!(0, 1, "some"), }), ws1: None, open_paren: Token { kind: TokenKind::OpenParen, - span: S!(4, 1, "(") + span: s!(4, 1, "(") }, ws2: None, terms: Some(List { - span: S!(5, 1, "Fact, with, whitespace"), + span: s!(5, 1, "Fact, with, whitespace"), first: Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(5, 1, "Fact"), + span: s!(5, 1, "Fact"), })), rest: Some(vec![ ( None, Token { kind: TokenKind::Comma, - span: S!(9, 1, ","), + span: s!(9, 1, ","), }, Some(Token { kind: TokenKind::Whitespace, - span: S!(10, 1, " "), + span: s!(10, 1, " "), }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(11, 1, "with") + span: s!(11, 1, "with") })), ), ( None, Token { kind: TokenKind::Comma, - span: S!(15, 1, ","), + span: s!(15, 1, ","), }, Some(Token { kind: TokenKind::Whitespace, - span: S!(16, 1, " "), + span: s!(16, 1, " "), }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, - span: S!(17, 1, "whitespace") + span: s!(17, 1, "whitespace") })), ), ]), @@ -3908,25 +3970,25 @@ mod new { ws3: None, close_paren: Token { kind: TokenKind::CloseParen, - span: S!(27, 1, ")") + span: s!(27, 1, ")") }, }), ws: Some(Token { kind: TokenKind::Whitespace, - span: S!(28, 1, " "), + span: s!(28, 1, " "), }), dot: Token { kind: TokenKind::Dot, - span: S!(29, 1, "."), + span: s!(29, 1, "."), }, }, Statement::Whitespace(Token { kind: TokenKind::Whitespace, - span: S!(30, 1, " ") + span: s!(30, 1, " ") }), Statement::Comment(Token { kind: TokenKind::Comment, - span: S!(31, 1, "% and a super useful comment\n") + span: s!(31, 1, "% and a super useful comment\n") }) ], } @@ -3998,25 +4060,20 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters #[test] fn arithmetic_expressions() { use TokenKind::*; - macro_rules! T { - ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { - Token::new($tok_kind, unsafe { - Span::new_from_raw_offset($offset, $line, $str, ()) - }) - }; - } - macro_rules! s { - ($offset:literal,$line:literal,$str:literal) => { - unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } - }; - } assert_eq!( { let result = parse_term::>(Span::new("42")); result.unwrap().1 }, - Term::Primitive(Primitive::Number(T! {Number, 0, 1, "42"})), + Term::Primitive(Primitive::Number { + span: s!(0, 1, "42"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "42"}, + exponent: None, + }), ); assert_eq!( @@ -4026,11 +4083,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "35+7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "35"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "35"}, + exponent: None, + })), ws1: None, operation: T! {Plus, 2, 1, "+"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "7"}, + exponent: None, + })), } ); @@ -4041,11 +4112,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "6*7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"6"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "6"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"6"}, + exponent: None, + })), ws1: None, operation: T! {Star, 1,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(2, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 2,1,"7"}, + exponent: None, + })), } ); @@ -4056,11 +4141,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "49-7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "49"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "49"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "49"}, + exponent: None, + })), ws1: None, operation: T! {Minus, 2, 1, "-"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "7"}, + exponent: None, + })), } ); @@ -4071,11 +4170,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "84/2"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0, 1, "84"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "84"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "84"}, + exponent: None, + })), ws1: None, operation: T! {Slash, 2, 1, "/"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3, 1, "2"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "2"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "2"}, + exponent: None, + })), } ); @@ -4088,16 +4201,37 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(0, 1, "5*7+7"), lhs: Box::new(Term::Binary { span: s!(0, 1, "5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"5"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "5"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"5"}, + exponent: None, + })), ws1: None, operation: T! {Star, 1,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(2, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 2,1,"7"}, + exponent: None, + })), }), ws1: None, operation: T! {Plus, 3,1,"+"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 4,1,"7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 4,1,"7"}, + exponent: None, + })), } ); @@ -4108,17 +4242,38 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "7+5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"7"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"7"}, + exponent: None + })), ws1: None, operation: T! {Plus, 1,1,"+"}, ws2: None, rhs: Box::new(Term::Binary { span: s!(2, 1, "5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 2,1,"5"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(2, 1, "5"), + sign: None, + before: None, + dot: None, + after: T! {Number, 2,1,"5"}, + exponent: None + })), ws1: None, operation: T! {Star, 3,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 4,1,"7"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 4,1,"7"}, + exponent: None + })), }), } ); @@ -4163,9 +4318,14 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(1, 1, "15+3*2-(7+35)*8"), first: Term::Binary { span: s!(1, 1, "15+3*2-(7+35)*8"), - lhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 1,1,"15"} - ))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(1, 1, "15"), + sign: None, + before: None, + dot: None, + after: T! {Number, 1,1,"15"}, + exponent: None, + })), ws1: None, operation: T! {Plus, 3,1,"+"}, ws2: None, @@ -4173,15 +4333,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(4, 1, "3*2-(7+35)*8"), lhs: Box::new(Term::Binary { span: s!(4, 1, "3*2"), - lhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 4,1,"3"} - ))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 4,1,"3"}, + exponent: None, + })), ws1: None, operation: T! {Star, 5,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 6,1,"2"} - ))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(6, 1, "2"), + sign: None, + before: None, + dot: None, + after: T! {Number, 6,1,"2"}, + exponent: None, + })), }), ws1: None, operation: T! {Minus, 7,1,"-"}, @@ -4199,13 +4369,27 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters first: Term::Binary { span: s!(9, 1, "7+35"), lhs: Box::new(Term::Primitive( - Primitive::Number(T! {Number, 9,1,"7"}) + Primitive::Number { + span: s!(9, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 9,1,"7"}, + exponent: None, + } )), ws1: None, operation: T! {Plus, 10,1,"+"}, ws2: None, rhs: Box::new(Term::Primitive( - Primitive::Number(T! {Number, 11,1,"35"}) + Primitive::Number { + span: s!(11, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 11,1,"35"}, + exponent: None, + } )), }, rest: None @@ -4216,9 +4400,14 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws1: None, operation: T! {Star, 14,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 15,1,"8"} - ))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(15, 1, "8"), + sign: None, + before: None, + dot: None, + after: T! {Number, 15,1,"8"}, + exponent: None, + })), }), }), }, @@ -4230,7 +4419,14 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws1: None, operation: T! {Slash, 17,1,"/"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 18,1,"3"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(18, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 18,1,"3"}, + exponent: None, + })), } ); // Term::Binary { @@ -4249,7 +4445,14 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, Term::Binary { span: s!(0, 1, "15+3*2-(7+35)*8/3"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 0,1,"15"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "15"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"15"}, + exponent: None, + })), ws1: None, operation: T! {Plus, 2,1,"+"}, ws2: None, @@ -4257,11 +4460,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(3, 1, "3*2-(7+35)*8/3"), lhs: Box::new(Term::Binary { span: s!(3, 1, "3*2"), - lhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 3,1,"3"}))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3,1,"3"}, + exponent: None, + })), ws1: None, operation: T! {Star, 4,1,"*"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number(T! {Number, 5,1,"2"}))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(5, 1, "2"), + sign: None, + before: None, + dot: None, + after: T! {Number, 5,1,"2"}, + exponent: None, + })), }), ws1: None, operation: T! {Minus, 6,1,"-"}, @@ -4278,15 +4495,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters span: s!(8, 1, "7+35"), first: Term::Binary { span: s!(8, 1, "7+35"), - lhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 8,1,"7"} - ))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(8, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 8,1,"7"}, + exponent: None, + })), ws1: None, operation: T! {Plus, 9,1,"+"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 10,1,"35"} - ))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(10, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 10,1,"35"}, + exponent: None, + })), }, rest: None, }), @@ -4298,15 +4525,25 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters ws2: None, rhs: Box::new(Term::Binary { span: s!(14, 1, "8/3"), - lhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 14,1,"8"} - ))), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(14, 1, "8"), + sign: None, + before: None, + dot: None, + after: T! {Number, 14,1,"8"}, + exponent: None, + })), ws1: None, operation: T! {Slash, 15, 1, "/"}, ws2: None, - rhs: Box::new(Term::Primitive(Primitive::Number( - T! {Number, 16,1,"3"} - ))), + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(16, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 16,1,"3"}, + exponent: None, + })), }), }), }), @@ -4340,5 +4577,23 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters // result.unwrap().1 // },); } + + #[test] + fn number_exp() { + assert_eq!( + { + let input = Span::new("e42"); + parse_exponent::>(input) + }, + Ok(( + s!(3, 1, ""), + Exponent { + e: T! {TokenKind::Exponent, 0,1,"e"}, + sign: None, + number: T! {TokenKind::Number, 1,1,"42"} + } + )) + ) + } } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index e81ccf09e..c8f734481 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -160,7 +160,14 @@ impl std::fmt::Display for Term<'_> { #[derive(Debug, Clone, PartialEq)] pub(crate) enum Primitive<'a> { Constant(Token<'a>), - Number(Token<'a>), + Number { + span: Span<'a>, + sign: Option>, + before: Option>, + dot: Option>, + after: Token<'a>, + exponent: Option>, + }, String(Token<'a>), Iri(Token<'a>), RdfLiteral { @@ -174,7 +181,33 @@ impl AstNode for Primitive<'_> { fn children(&self) -> Option> { match self { Primitive::Constant(token) => Some(vec![token]), - Primitive::Number(token) => Some(vec![token]), + Primitive::Number { + sign, + before, + dot, + after, + exponent, + .. + } => { + let mut vec = Vec::new(); + #[allow(trivial_casts)] + if let Some(s) = sign { + vec.push(s as &dyn AstNode); + } + if let Some(b) = before { + vec.push(b); + } + if let Some(d) = dot { + vec.push(d); + } + vec.push(after); + if let Some(exp) = exponent { + if let Some(mut children) = exp.children() { + vec.append(&mut children); + } + } + Some(vec) + } Primitive::String(token) => Some(vec![token]), Primitive::Iri(token) => Some(vec![token]), Primitive::RdfLiteral { @@ -189,7 +222,7 @@ impl AstNode for Primitive<'_> { fn span(&self) -> Span { match self { Primitive::Constant(token) => token.span, - Primitive::Number(token) => token.span, + Primitive::Number { span, .. } => *span, Primitive::String(token) => token.span, Primitive::Iri(token) => token.span, Primitive::RdfLiteral { span, .. } => *span, @@ -223,7 +256,7 @@ impl AstNode for Primitive<'_> { } match self { Primitive::Constant(_) => name!("Constant"), - Primitive::Number(_) => name!("Number"), + Primitive::Number { .. } => name!("Number"), Primitive::String(_) => name!("String"), Primitive::Iri(_) => name!("Iri"), Primitive::RdfLiteral { .. } => name!("RDF Literal"), @@ -237,3 +270,43 @@ impl std::fmt::Display for Primitive<'_> { write!(f, "{output}") } } + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Exponent<'a> { + pub(crate) e: Token<'a>, + pub(crate) sign: Option>, + pub(crate) number: Token<'a>, +} +impl AstNode for Exponent<'_> { + fn children(&self) -> Option> { + let mut vec = Vec::new(); + #[allow(trivial_casts)] + vec.push(&self.e as &dyn AstNode); + if let Some(s) = &self.sign { + vec.push(s); + }; + vec.push(&self.number); + Some(vec) + } + + fn span(&self) -> Span { + todo!() + } + + fn position(&self) -> Position { + todo!() + } + + fn is_token(&self) -> bool { + todo!() + } + + fn name(&self) -> String { + todo!() + } +} +impl std::fmt::Display for Exponent<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} From 5972d7dd6c923a2d5b9a6c4612982001526438e9 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 25 Apr 2024 17:13:19 +0200 Subject: [PATCH 094/214] Add parsing of blanks --- nemo/src/io/parser.rs | 11 ++++++++++- nemo/src/io/parser/ast/term.rs | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 5f87b2fcf..236c6e313 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2438,7 +2438,7 @@ mod new { exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, - question_mark, slash, star, tilde, unequal, Span, Token, TokenKind, + question_mark, slash, star, tilde, underscore, unequal, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, cut, map, opt, recognize}; @@ -3195,6 +3195,7 @@ mod new { parse_variable, parse_existential, parse_aggregation_term, + parse_blank, )), )(input) } @@ -3486,6 +3487,14 @@ mod new { ) } + /// Parse a `_` + fn parse_blank<'a, E: ParseError> + ContextError>>( + input: Span<'a>, + ) -> IResult, E> { + context("parse blank", underscore)(input) + .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) + } + /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. fn parse_tuple_term<'a, E: ParseError> + ContextError>>( diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index c8f734481..8be1e79e7 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -34,6 +34,7 @@ pub(crate) enum Term<'a> { }, Tuple(Box>), Map(Box>), + Blank(Token<'a>), } impl AstNode for Term<'_> { fn children(&self) -> Option> { @@ -88,8 +89,10 @@ impl AstNode for Term<'_> { vec.push(close_paren); Some(vec) } + // TODO: check whether directly the children or Some(vec![named_tuple]) should get returned (for fidelity in ast) Term::Tuple(named_tuple) => named_tuple.children(), Term::Map(map) => map.children(), + Term::Blank(token) => Some(vec![token]), } } @@ -103,6 +106,7 @@ impl AstNode for Term<'_> { Term::Aggregation { span, .. } => *span, Term::Tuple(named_tuple) => named_tuple.span(), Term::Map(map) => map.span(), + Term::Blank(t) => t.span(), } } @@ -146,6 +150,7 @@ impl AstNode for Term<'_> { } } Term::Map(_) => name!("Map"), + Term::Blank(_) => name!("Blank"), } } } From 8b5cbf709f817c16ff8a3041286dcda0d6ed21a1 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 15 May 2024 07:40:07 +0200 Subject: [PATCH 095/214] Add error reporting and recovery --- nemo/src/io/lexer.rs | 451 +++++-- nemo/src/io/parser.rs | 1734 ++++++++++++++++----------- nemo/src/io/parser/ast.rs | 29 + nemo/src/io/parser/ast/program.rs | 21 +- nemo/src/io/parser/ast/statement.rs | 4 + nemo/src/io/parser/types.rs | 265 +++- testfile.rls | 17 + testfile2.rls | 11 + 8 files changed, 1704 insertions(+), 828 deletions(-) create mode 100644 testfile2.rls diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index ebb608d10..555e9e74f 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1,21 +1,75 @@ //! Lexical tokenization of rulewerk-style rules. +use std::{cell::RefCell, ops::Range}; + use nom::{ branch::alt, - bytes::complete::{is_not, tag, take}, + bytes::complete::{is_not, tag, take, take_till, take_while}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, combinator::{all_consuming, cut, map, recognize}, error::{ContextError, ParseError}, multi::{many0, many1}, sequence::{delimited, pair, tuple}, - IResult, }; use nom_locate::LocatedSpan; -use super::parser::ast::Position; +#[derive(Debug)] +pub(crate) enum NewParseError { + MissingWhitespace, + Rule, + Fact, + Directive, + Comment, + SyntaxError(String), + MissingTlDocComment, +} +impl nom::error::ParseError> for NewParseError { + fn from_error_kind(input: Input, kind: nom::error::ErrorKind) -> Self { + NewParseError::SyntaxError(kind.description().to_string()) + } + + fn append(_: Input, _: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +pub(crate) type IResult = nom::IResult; + +use super::parser::{ + ast::Position, + types::{Input, Label, ToRange}, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct Error(pub(crate) Position, pub(crate) String); + +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) struct ParserState<'a> { + pub(crate) errors: &'a RefCell>, + pub(crate) labels: &'a RefCell>, +} +impl ParserState<'_> { + pub fn report_error(&self, error: Error) { + self.errors.borrow_mut().push(error); + } +} pub(crate) type Span<'a> = LocatedSpan<&'a str>; +impl ToRange for Span<'_> { + fn to_range(&self) -> Range { + let start = self.location_offset(); + let end = start + self.fragment().len(); + start..end + } +} + +pub(crate) fn to_range<'a>(span: Span<'a>) -> Range { + let start = span.location_offset(); + let end = start + span.fragment().len(); + start..end +} + /// All the tokens the input gets parsed into. #[derive(Debug, PartialEq, Copy, Clone)] pub(crate) enum TokenKind { @@ -115,6 +169,8 @@ pub(crate) enum TokenKind { Illegal, /// signals end of file Eof, + /// signals an error + Error, } impl std::fmt::Display for TokenKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -166,6 +222,7 @@ impl std::fmt::Display for TokenKind { TokenKind::PrefixIdent => write!(f, "PrefixIdent"), TokenKind::Illegal => write!(f, "Illegal"), TokenKind::Eof => write!(f, "Eof"), + TokenKind::Error => write!(f, "\x1b[1;31mError\x1b[0m"), } } } @@ -229,12 +286,27 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { } } +pub(crate) fn map_err<'a, 'e, O, E: ParseError>>( + mut f: impl nom::Parser, O, E>, + mut op: impl FnMut(E) -> NewParseError, +) -> impl FnMut(Input<'a, 'e>) -> IResult, O> { + move |input| { + f.parse(input).map_err(|e| match e { + nom::Err::Incomplete(err) => nom::Err::Incomplete(err), + nom::Err::Error(err) => nom::Err::Error(op(err)), + nom::Err::Failure(err) => nom::Err::Error(op(err)), + }) + } +} + macro_rules! syntax { ($func_name: ident, $tag_string: literal, $token: expr) => { - pub(crate) fn $func_name<'a, E: ParseError>>( - input: Span<'a>, - ) -> IResult, Token, E> { - map(tag($tag_string), |span| Token::new($token, span))(input) + pub(crate) fn $func_name<'a, 'e>( + input: Input<'a, 'e>, + ) -> nom::IResult, Token<'a>> { + map(tag($tag_string), |span: Input| { + Token::new($token, span.input) + })(input) } }; } @@ -259,13 +331,13 @@ syntax!(at, "@", TokenKind::At); syntax!(exp_lower, "e", TokenKind::Exponent); syntax!(exp_upper, "E", TokenKind::Exponent); -pub(crate) fn exp<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { +pub(crate) fn exp<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { alt((exp_lower, exp_upper))(input) } -pub(crate) fn lex_punctuations<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_punctuations<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { alt(( arrow, open_paren, @@ -298,9 +370,9 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_operators<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { alt(( less_equal, greater_equal, @@ -315,90 +387,79 @@ pub(crate) fn lex_operators<'a, E: ParseError>>( ))(input) } -pub(crate) fn lex_unary_prefix_operators<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token<'a>, E> { - alt((plus, minus))(input) -} +// pub(crate) fn lex_unary_prefix_operators<'a, 'e>( +// input: Input<'a, 'e>, +// ) -> IResult, Token<'a>> { +// alt((plus, minus))(input) +// } -pub(crate) fn lex_ident<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { - let (rest, result) = recognize(pair( +pub(crate) fn lex_ident<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { + let (rest_input, ident) = recognize(pair( alpha1, many0(alt((alphanumeric1, tag("_"), tag("-")))), ))(input)?; - let token = match *result.fragment() { - "base" => Token::new(TokenKind::Base, result), - "prefix" => Token::new(TokenKind::Prefix, result), - "import" => Token::new(TokenKind::Import, result), - "export" => Token::new(TokenKind::Export, result), - "output" => Token::new(TokenKind::Output, result), - _ => Token::new(TokenKind::Ident, result), + let token = match *ident.input.fragment() { + "base" => Token::new(TokenKind::Base, ident.input), + "prefix" => Token::new(TokenKind::Prefix, ident.input), + "import" => Token::new(TokenKind::Import, ident.input), + "export" => Token::new(TokenKind::Export, ident.input), + "output" => Token::new(TokenKind::Output, ident.input), + _ => Token::new(TokenKind::Ident, ident.input), }; - Ok((rest, token)) + Ok((rest_input, token)) } -pub(crate) fn lex_iri<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { +pub(crate) fn lex_iri<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">"))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) } -pub(crate) fn lex_number<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { - digit1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Number, result))) +pub(crate) fn lex_number<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { + digit1(input) + .map(|(rest_input, result)| (rest_input, Token::new(TokenKind::Number, result.input))) } -pub(crate) fn lex_string<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_string<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { recognize(delimited(tag("\""), is_not("\""), cut(tag("\""))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::String, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) } -pub(crate) fn lex_comment<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_comment<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { recognize(tuple((tag("%"), many0(is_not("\n")), line_ending)))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) } -pub(crate) fn lex_doc_comment<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_doc_comment<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) } -pub(crate) fn lex_toplevel_doc_comment<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_toplevel_doc_comment<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result))) + .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) } -pub(crate) fn lex_comments<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Token, E> { +pub(crate) fn lex_comments<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment))(input) } -pub(crate) fn lex_whitespace<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { - multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result))) +pub(crate) fn lex_whitespace<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Token<'a>> { + multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) } -pub(crate) fn lex_illegal<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, Token, E> { - take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result))) +pub(crate) fn lex_illegal<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { + take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) } -pub(crate) fn lex_tokens<'a, E: ParseError> + ContextError>>( - input: Span<'a>, -) -> IResult, Vec, E> { +pub(crate) fn lex_tokens<'a, 'e>( + input: Input<'a, 'e>, +) -> nom::IResult, Vec>> { all_consuming(many0(alt(( lex_iri, lex_operators, @@ -411,13 +472,28 @@ pub(crate) fn lex_tokens<'a, E: ParseError> + ContextError>>( lex_illegal, ))))(input) .map(|(span, mut vec)| { - vec.append(&mut vec![Token::new(TokenKind::Eof, span)]); + vec.append(&mut vec![Token::new(TokenKind::Eof, span.input)]); (span, vec) }) } +pub(crate) fn skip_to_dot<'a, 'e>(input: Input<'a, 'e>) -> (Input<'a, 'e>, Token<'a>) { + let (rest_input, error_input) = recognize(pair( + take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), + tag("."), + ))(input) + .expect("Skipping to the next dot should not fail!"); + ( + rest_input, + Token { + kind: TokenKind::Error, + span: error_input.input, + }, + ) +} + #[cfg(test)] -mod test { +mod tests { use super::TokenKind::*; use super::*; @@ -432,8 +508,19 @@ mod test { #[test] fn empty_input() { let input = Span::new(""); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![T!(Eof, 0, 1, "")] ) } @@ -441,8 +528,19 @@ mod test { #[test] fn base() { let input = Span::new("@base"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } @@ -450,8 +548,19 @@ mod test { #[test] fn prefix() { let input = Span::new("@prefix"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Prefix, 1, 1, "prefix"), @@ -463,8 +572,19 @@ mod test { #[test] fn output() { let input = Span::new("@output"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Output, 1, 1, "output"), @@ -476,8 +596,19 @@ mod test { #[test] fn import() { let input = Span::new("@import"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Import, 1, 1, "import"), @@ -489,8 +620,19 @@ mod test { #[test] fn export() { let input = Span::new("@export"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Export, 1, 1, "export"), @@ -502,8 +644,19 @@ mod test { #[test] fn idents_with_keyword_prefix() { let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Ident, 1, 1, "baseA"), @@ -532,8 +685,19 @@ mod test { #[test] fn tokenize() { let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Ident, 0, 1, "P"), T!(OpenParen, 1, 1, "("), @@ -563,8 +727,19 @@ mod test { #[test] fn comment() { let input = Span::new(" % Some Comment\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " "), T!(Comment, 4, 1, "% Some Comment\n"), @@ -578,8 +753,19 @@ mod test { #[test] fn ident() { let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Ident, 0, 1, "some_Ident"), T!(OpenParen, 10, 1, "("), @@ -596,8 +782,19 @@ mod test { #[test] fn forbidden_ident() { let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Underscore, 0, 1, "_"), T!(Ident, 1, 1, "someIdent"), @@ -615,8 +812,19 @@ mod test { #[test] fn iri() { let input = Span::new(""); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Eof, 31, 1, ""), @@ -627,8 +835,19 @@ mod test { #[test] fn iri_pct_enc() { let input = Span::new("\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Whitespace, 37, 1, "\n"), @@ -642,8 +861,19 @@ mod test { #[test] fn constraints() { let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Ident, 0, 1, "A"), T!(OpenParen, 1, 1, "("), @@ -675,8 +905,19 @@ mod test { #[test] fn pct_enc_comment() { let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Comment, 0, 1, "%d4 this should be a comment,\n"), T!( @@ -694,8 +935,19 @@ mod test { #[test] fn fact() { let input = Span::new("somePred(term1, term2)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Ident, 0, 1, "somePred"), T!(OpenParen, 8, 1, "("), @@ -713,12 +965,39 @@ mod test { #[test] fn whitespace() { let input = Span::new(" \t \n\n\t \n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - lex_tokens::>(input).unwrap().1, + // lex_tokens::>(input).unwrap().1, + lex_tokens(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " \t \n\n\t \n"), T!(Eof, 12, 4, ""), ] ) } + + #[test] + fn skip_to_dot() { + let input = Span::new("some ?broken :- rule). A(Fact)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; + dbg!(super::skip_to_dot(input)); + } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 236c6e313..758bb9061 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -23,7 +23,7 @@ use nom::{ use macros::traced; pub(crate) mod ast; -mod types; +pub(crate) mod types; use types::{ConstraintOperator, IntermediateResult, Span}; pub(crate) mod iri; @@ -2430,15 +2430,19 @@ mod test { /// NEW PARSER mod new { + use std::cell::RefCell; + use super::ast::{ - atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, + atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, }; + use super::types::{Input, Label, ParserLabel, ToRange}; use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, - lex_unary_prefix_operators, lex_whitespace, minus, open_brace, open_paren, plus, - question_mark, slash, star, tilde, underscore, unequal, Span, Token, TokenKind, + lex_whitespace, map_err, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, + slash, star, tilde, underscore, unequal, Error, NewParseError, ParserState, Span, Token, + TokenKind, }; use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, cut, map, opt, recognize}; @@ -2466,82 +2470,244 @@ mod new { } } - fn ignore_ws_and_comments<'a, F, O, E: ParseError> + ContextError>>( - inner: F, - ) -> impl FnMut(Span<'a>) -> IResult, O, E> - where - F: Parser, O, E> + FnMut(Span<'a>) -> IResult, O, E>, - { - delimited( - many0(alt((lex_whitespace, lex_comment))), - inner, - many0(alt((lex_whitespace, lex_comment))), - ) + // fn ignore_ws_and_comments<'a, F, O, E: ParseError> + ContextError>>( + // inner: F, + // ) -> impl FnMut(Span<'a>) -> IResult, O, E> + // where + // F: Parser, O, E> + FnMut(Span<'a>) -> IResult, O, E>, + // { + // delimited( + // many0(alt((lex_whitespace, lex_comment))), + // inner, + // many0(alt((lex_whitespace, lex_comment))), + // ) + // } + + // fn expect<'a, F, E, T>( + // parser: F, + // error_msg: E, + // state: Errors, + // ) -> impl Fn(Span<'a>) -> IResult, T> + // where + // F: Fn(Span<'a>) -> IResult, T>, + // E: ToString, + // { + // move |input| match parser(input) { + // Ok((rest_input, output)) => Ok((rest_input, output)), + // Err(nom::Err::Error(nom::error::Error { input, .. })) + // | Err(nom::Err::Failure(nom::error::Error { input, .. })) => { + // let err = crate::io::lexer::Error(to_range(input), error_msg.to_string()); + // state.report_error(err); + // Ok(( + // input, + // Token { + // kind: TokenKind::Error, + // span: outer_span(input, rest_input), + // }, + // )) + // } + // Err(err) => Err(err), + // } + // } + + fn expect< + 'a, + 'e, + O: Copy, + E: ParseError>, + F: nom::Parser, O, E>, + >( + mut parser: F, + error_msg: impl ToString, + error_output: O, + errors: ParserState<'e>, + ) -> impl FnMut(Input<'a, 'e>) -> IResult, O, E> { + move |input| match parser.parse(input.clone()) { + Ok(result) => Ok(result), + Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { + let err = Error( + Position { + offset: input.input.location_offset(), + line: input.input.location_line(), + column: input.input.get_utf8_column() as u32, + }, + error_msg.to_string(), + ); + errors.report_error(err); + Ok((input, error_output)) + } + Err(err) => Err(err), + } + } + + fn recover<'a, 'e, E>( + mut parser: impl nom::Parser, Statement<'a>, E>, + error_msg: impl ToString, + errors: ParserState<'e>, + ) -> impl FnMut(Input<'a, 'e>) -> IResult, Statement<'a>, E> { + move |input: Input<'a, 'e>| match parser.parse(input) { + Ok(result) => Ok(result), + Err(err) if input.input.is_empty() => Err(err), + Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => { + let err = Error( + Position { + offset: input.input.location_offset(), + line: input.input.location_line(), + column: input.input.get_utf8_column() as u32, + }, + error_msg.to_string(), + ); + errors.report_error(err); + let (rest_input, token) = skip_to_dot(input); + Ok((rest_input, Statement::Error(token))) + } + Err(err) => Err(err), + } + } + + fn report_label<'a, 's, O, E>( + mut parser: impl nom::Parser, O, E>, + label: ParserLabel, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + move |input| match parser.parse(input) { + Ok(result) => Ok(result), + Err(err) => { + match err { + nom::Err::Incomplete(_) => (), + nom::Err::Error(_) | nom::Err::Failure(_) => { + if !input.input.is_empty() { + input.parser_state.labels.borrow_mut().push(Label { + label, + pos: Position { + offset: input.input.location_offset(), + line: input.input.location_line(), + column: input.input.get_utf8_column() as u32, + }, + }) + }; + } + }; + Err(err) + } + } + } + + fn report_error<'a, 's, O, E>( + mut parser: impl nom::Parser, O, E>, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + move |input| match parser.parse(input) { + Ok(result) => { + input.parser_state.labels.borrow_mut().clear(); + Ok(result) + } + Err(err) => { + match err { + nom::Err::Incomplete(_) => (), + nom::Err::Error(_) | nom::Err::Failure(_) => { + // println!("LABELS BEFORE REPORT!!!!: {:#?}", input.parser_state.labels); + let mut furthest_errors: Vec = Vec::new(); + let labels = + as Clone>::clone(&input.parser_state.labels.borrow()) + .into_iter(); + for label in labels { + if let Some(last) = furthest_errors.last() { + if label.pos.offset >= (*last).0.offset { + let err = + Error(label.pos, format!("expected {:?}", label.label)); + furthest_errors.push(err); + } + } else { + let err = Error(label.pos, format!("expected {:?}", label.label)); + furthest_errors.push(err); + }; + } + for err in furthest_errors { + input.parser_state.report_error(err) + } + // for label in furthest_errors { + // println!( + // "Syntax error: Parser got stuck at line {} column {}, expected {:?}", + // label.position.line, label.position.column, label.label + // ); + // println!( + // "\n{}", + // input + // .parser_state + // .source + // .fragment() + // .lines() + // .collect::>() + // .get((label.position.line - 1) as usize) + // .unwrap() + // ); + // println!("{1:>0$}", label.position.column, "^"); + // } + } + }; + Err(err) + } + } } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Program<'a>, E> { - context( - "parse program", - all_consuming(pair( - opt(lex_toplevel_doc_comment), - many1(alt(( - parse_rule, - parse_fact, + fn parse_program<'a, 'e>(input: Input<'a, 'e>) -> (Program<'a>, Vec) { + let (rest_input, (tl_doc_comment, statements)) = all_consuming(pair( + opt(lex_toplevel_doc_comment), + many1(recover( + report_error(alt(( + // TODO: Discuss wether directives should only get parsed at the beginning of the source file + report_label(parse_rule, ParserLabel::Rule), + report_label(parse_fact, ParserLabel::Fact), parse_whitespace, - parse_directive, + report_label(parse_directive, ParserLabel::Directive), parse_comment, ))), + "failed to parse a statement", + input.parser_state, )), - )(input) - .map(|(rest_input, (tl_doc_comment, statements))| { - ( - rest_input, - Program { - span: input, - tl_doc_comment, - statements, - }, - ) - }) + ))(input) + .expect("Parser can't fail. If it fails it's a bug! Pleas report it. Got"); + ( + Program { + span: input.input, + tl_doc_comment, + statements, + }, + rest_input.parser_state.errors.take(), + ) } /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse whitespace", lex_whitespace)(input) - .map(|(rest, ws)| (rest, Statement::Whitespace(ws))) + fn parse_whitespace<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) } /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse comment", lex_comment)(input) - .map(|(rest, comment)| (rest, Statement::Comment(comment))) + fn parse_comment<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) } /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse fact", - tuple(( - opt(lex_doc_comment), - parse_normal_atom, - opt(lex_whitespace), - cut(dot), - )), - )(input) + fn parse_fact<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + tuple(( + opt(lex_doc_comment), + parse_normal_atom, + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map(|(rest_input, (doc_comment, atom, ws, dot))| { ( rest_input, Statement::Fact { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, atom, ws, @@ -2552,28 +2718,31 @@ mod new { } /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse rule", - tuple(( - opt(lex_doc_comment), - parse_head, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_body, - opt(lex_whitespace), - cut(dot), - )), - )(input) + fn parse_rule<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + tuple(( + opt(lex_doc_comment), + parse_head, + opt(lex_whitespace), + report_label(arrow, ParserLabel::Arrow), + opt(lex_whitespace), + parse_body, + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { ( rest_input, Statement::Rule { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, head, ws1, @@ -2589,63 +2758,57 @@ mod new { } /// Parse the head atoms of a rule. - fn parse_head<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult>, E> { - context("parse head", parse_atom_list(parse_head_atoms))(input) + fn parse_head<'a, 'e>(input: Input<'a, 'e>) -> IResult, List<'a, Atom<'a>>> { + parse_list(parse_head_atoms)(input) } /// Parse the body atoms of a rule. - fn parse_body<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult>, E> { - context("parse body", parse_atom_list(parse_body_atoms))(input) + fn parse_body<'a, 'e>(input: Input<'a, 'e>) -> IResult, List<'a, Atom<'a>>> { + parse_list(parse_body_atoms)(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse directive", - alt(( - parse_base_directive, - parse_prefix_directive, - parse_import_directive, - parse_export_directive, - parse_output_directive, - )), - )(input) + fn parse_directive<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, + ))(input) .map(|(rest, directive)| (rest, Statement::Directive(directive))) } /// Parse the base directive. - fn parse_base_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse base directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Base), - )), - opt(lex_whitespace), - lex_iri, - opt(lex_whitespace), - cut(dot), + fn parse_base_directive<'a, 'e>(input: Input<'a, 'e>) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Base), )), - )(input) + opt(lex_whitespace), + report_label(lex_iri, ParserLabel::Iri), + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { ( rest_input, Directive::Base { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Base, - span: kw, + span: kw.input, }, ws1, base_iri, @@ -2657,40 +2820,45 @@ mod new { } /// Parse the prefix directive. - fn parse_prefix_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse prefix directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), - )), - opt(lex_whitespace), - recognize(pair(lex_ident, colon)), - opt(lex_whitespace), - lex_iri, - opt(lex_whitespace), - cut(dot), + fn parse_prefix_directive<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), )), - )(input) + opt(lex_whitespace), + report_label(recognize(pair(lex_ident, colon)), ParserLabel::Prefix), + opt(lex_whitespace), + report_label(lex_iri, ParserLabel::Iri), + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { ( rest_input, Directive::Prefix { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Prefix, - span: kw, + span: kw.input, }, ws1, prefix: Token { kind: TokenKind::Ident, - span: prefix, + span: prefix.input, }, ws2, prefix_iri, @@ -2703,37 +2871,42 @@ mod new { } /// Parse the import directive. - fn parse_import_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse import directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Import), - )), - lex_whitespace, - lex_ident, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_map, - opt(lex_whitespace), - cut(dot), + fn parse_import_directive<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Import), )), - )(input) + lex_whitespace, + report_label(lex_ident, ParserLabel::Identifier), + opt(lex_whitespace), + report_label(arrow, ParserLabel::Arrow), + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( rest_input, Directive::Import { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Import, - span: kw, + span: kw.input, }, ws1, predicate, @@ -2750,37 +2923,42 @@ mod new { } /// Parse the export directive. - fn parse_export_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse export directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Export), - )), - lex_whitespace, - lex_ident, - opt(lex_whitespace), - arrow, - opt(lex_whitespace), - parse_map, - opt(lex_whitespace), - cut(dot), + fn parse_export_directive<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Export), )), - )(input) + lex_whitespace, + report_label(lex_ident, ParserLabel::Identifier), + opt(lex_whitespace), + report_label(arrow, ParserLabel::Arrow), + opt(lex_whitespace), + parse_map, + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( rest_input, Directive::Export { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Export, - span: kw, + span: kw.input, }, ws1, predicate, @@ -2797,33 +2975,38 @@ mod new { } /// Parse the output directive. - fn parse_output_directive<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse output directive", - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Output), - )), - lex_whitespace, - opt(parse_identifier_list), - opt(lex_whitespace), - cut(dot), + fn parse_output_directive<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Directive<'a>> { + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), )), - )(input) + lex_whitespace, + opt(parse_list(lex_ident)), + opt(lex_whitespace), + expect( + report_label(dot, ParserLabel::Dot), + "missing `.`", + Token { + kind: TokenKind::Error, + span: Span::new("ERROR! missing `.`"), + }, + input.parser_state, + ), + ))(input) .map( |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { ( rest_input, Directive::Output { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), doc_comment, kw: Token { kind: TokenKind::Output, - span: kw, + span: kw.input, }, ws1, predicates, @@ -2835,55 +3018,49 @@ mod new { ) } - /// Parse a list of `ident1, ident2, …` - fn parse_identifier_list<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult>, E> { - context( - "parse identifier list", + // /// Parse a list of `ident1, ident2, …` + // fn parse_identifier_list<'a, 'e>( + // input: Input<'a, 'e>, + // ) -> IResult, List<'a, Token<'a>>> { + // pair( + // lex_ident, + // many0(tuple(( + // opt(lex_whitespace), + // comma, + // opt(lex_whitespace), + // lex_ident, + // ))), + // )(input) + // .map(|(rest_input, (first, rest))| { + // ( + // rest_input, + // List { + // span: outer_span(input.input, rest_input.input), + // first, + // rest: if rest.is_empty() { None } else { Some(rest) }, + // }, + // ) + // }) + // } + + fn parse_list<'a, 'e, T>( + parse_t: fn(Input<'a, 'e>) -> IResult, T>, + ) -> impl Fn(Input<'a, 'e>) -> IResult, List<'a, T>> { + move |input: Input<'a, 'e>| { pair( - lex_ident, + parse_t, many0(tuple(( opt(lex_whitespace), comma, opt(lex_whitespace), - lex_ident, + parse_t, ))), - ), - )(input) - .map(|(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input, rest_input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }) - } - - fn parse_atom_list<'a, E: ParseError> + ContextError>>( - parse_atom: fn(Span<'a>) -> IResult, E>, - ) -> impl Fn(Span<'a>) -> IResult, List<'a, Atom<'a>>, E> { - move |input| { - context( - "parse atom list", - pair( - parse_atom, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_atom, - ))), - ), )(input) .map(|(rest_input, (first, rest))| { ( rest_input, List { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), first, rest: if rest.is_empty() { None } else { Some(rest) }, }, @@ -2893,76 +3070,55 @@ mod new { } /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "harse head atoms", - alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), - )(input) + fn parse_head_atoms<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) } /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse body atoms", - alt(( - parse_normal_atom, - parse_negative_atom, - parse_infix_atom, - parse_map_atom, - )), - )(input) + fn parse_body_atoms<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, + ))(input) } /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse normal atom", parse_named_tuple)(input) + fn parse_normal_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + parse_named_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse negative atom", pair(tilde, parse_named_tuple))(input).map( - |(rest_input, (tilde, named_tuple))| { - ( - rest_input, - Atom::Negative { - span: outer_span(input, rest_input), - neg: tilde, - atom: named_tuple, - }, - ) - }, - ) + fn parse_negative_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { + ( + rest_input, + Atom::Negative { + span: outer_span(input.input, rest_input.input), + neg: tilde, + atom: named_tuple, + }, + ) + }) } /// Parse an "infix atom" of the form `term1 term2`. /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse infix atom", - tuple(( - parse_term, - opt(lex_whitespace), - parse_operation_token, - opt(lex_whitespace), - parse_term, - )), - )(input) + fn parse_infix_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + tuple(( + parse_term, + opt(lex_whitespace), + parse_operation_token, + opt(lex_whitespace), + parse_term, + ))(input) .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { ( rest_input, Atom::InfixAtom { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), lhs, ws1, operation, @@ -2975,27 +3131,22 @@ mod new { /// Parse a tuple with an optional name, like `ident(term1, term2)` /// or just `(int, int, skip)`. - fn parse_tuple<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse tuple", - tuple(( - opt(lex_ident), - opt(lex_whitespace), - open_paren, - opt(lex_whitespace), - opt(parse_term_list), - opt(lex_whitespace), - cut(close_paren), - )), - )(input) + fn parse_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { + tuple(( + opt(lex_ident), + opt(lex_whitespace), + open_paren, + opt(lex_whitespace), + opt(parse_list(parse_term)), + opt(lex_whitespace), + report_label(close_paren, ParserLabel::CloseParen), + ))(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, Tuple { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), identifier, ws1, open_paren, @@ -3011,27 +3162,22 @@ mod new { /// Parse a named tuple. This function is like `parse_tuple` with the difference, /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse named tuple", - tuple(( - lex_ident, - opt(lex_whitespace), - open_paren, - opt(lex_whitespace), - opt(parse_term_list), - opt(lex_whitespace), - cut(close_paren), - )), - )(input) + fn parse_named_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { + tuple(( + lex_ident, + opt(lex_whitespace), + report_label(open_paren, ParserLabel::OpenParen), + opt(lex_whitespace), + opt(parse_list(parse_term)), + opt(lex_whitespace), + report_label(close_paren, ParserLabel::CloseParen), + ))(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( rest_input, Tuple { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), identifier: Some(identifier), ws1, open_paren, @@ -3047,27 +3193,22 @@ mod new { /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. /// Inside the curly braces ist a list of pairs. - fn parse_map<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse map", - tuple(( - opt(lex_ident), - opt(lex_whitespace), - open_brace, - opt(lex_whitespace), - parse_pair_list, - opt(lex_whitespace), - cut(close_brace), - )), - )(input) + fn parse_map<'a, 'e>(input: Input<'a, 'e>) -> IResult, Map<'a>> { + tuple(( + opt(lex_ident), + opt(lex_whitespace), + open_brace, + opt(lex_whitespace), + opt(parse_list(parse_pair)), + opt(lex_whitespace), + report_label(close_brace, ParserLabel::CloseBrace), + ))(input) .map( |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { ( rest_input, Map { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), identifier, ws1, open_brace, @@ -3082,64 +3223,59 @@ mod new { } /// Parse a map in an atom position. - fn parse_map_atom<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse map atom", parse_map)(input) - .map(|(rest_input, map)| (rest_input, Atom::Map(map))) - } - - /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - fn parse_pair_list<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Term<'a>>>>, E> { - context( - "parse pair list", - opt(pair( - parse_pair, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_pair, - ))), - )), - )(input) - .map(|(rest_input, pair_list)| { - if let Some((first, rest)) = pair_list { - ( - rest_input, - Some(List { - span: outer_span(input, rest_input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }), - ) - } else { - (rest_input, None) - } - }) - } + fn parse_map_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) + } + + // /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. + // fn parse_pair_list<'a, 'e, E: ParseError> + ContextError>>( + // input: Input<'a, 'e>, + // state: Errors, + // ) -> IResult, Option, Term<'a>>>>, E> { + // context( + // "parse pair list", + // opt(pair( + // parse_pair, + // many0(tuple(( + // opt(lex_whitespace), + // comma, + // opt(lex_whitespace), + // parse_pair, + // ))), + // )), + // )(input) + // .map(|(rest_input, pair_list)| { + // if let Some((first, rest)) = pair_list { + // ( + // rest_input, + // Some(List { + // span: outer_span(input, rest_input), + // first, + // rest: if rest.is_empty() { None } else { Some(rest) }, + // }), + // ) + // } else { + // (rest_input, None) + // } + // }) + // } /// Parse a pair of the form `key = value`. - fn parse_pair<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Term<'a>>, E> { - context( - "parse pair", - tuple(( - parse_term, - opt(lex_whitespace), - equal, - opt(lex_whitespace), - parse_term, - )), - )(input) + fn parse_pair<'a, 'e>( + input: Input<'a, 'e>, + ) -> IResult, Pair<'a, Term<'a>, Term<'a>>> { + tuple(( + parse_term, + opt(lex_whitespace), + report_label(equal, ParserLabel::Equal), + opt(lex_whitespace), + parse_term, + ))(input) .map(|(rest_input, (key, ws1, equal, ws2, value))| { ( rest_input, Pair { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), key, ws1, equal, @@ -3150,90 +3286,80 @@ mod new { }) } - /// Parse a list of terms of the form `term1, term2, …`. - fn parse_term_list<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult>, E> { - context( - "parse term list", - pair( - parse_term, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_term, - ))), - ), - )(input) - .map(|(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input, rest_input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }) - } + // /// Parse a list of terms of the form `term1, term2, …`. + // fn parse_term_list<'a, 'e, E: ParseError> + ContextError>>( + // input: Input<'a, 'e>, + // state: Errors, + // ) -> IResult, List<'a, Term<'a>>, E> { + // context( + // "parse term list", + // pair( + // parse_term, + // many0(tuple(( + // opt(lex_whitespace), + // comma, + // opt(lex_whitespace), + // parse_term, + // ))), + // ), + // )(input) + // .map(|(rest_input, (first, rest))| { + // ( + // rest_input, + // List { + // span: outer_span(input, rest_input), + // first, + // rest: if rest.is_empty() { None } else { Some(rest) }, + // }, + // ) + // }) + // } /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse term", - alt(( - parse_binary_term, - parse_tuple_term, - parse_unary_prefix_term, - parse_map_term, - parse_primitive_term, - parse_variable, - parse_existential, - parse_aggregation_term, - parse_blank, - )), - )(input) + fn parse_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + report_error(alt(( + parse_binary_term, + parse_tuple_term, + // parse_unary_prefix_term, + parse_map_term, + parse_primitive_term, + parse_variable, + parse_existential, + parse_aggregation_term, + parse_blank, + )))(input) } /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse primitive term", - alt(( - parse_rdf_literal, - parse_ident, - parse_iri, - parse_number, - parse_string, - )), - )(input) + fn parse_primitive_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + report_error(alt(( + parse_rdf_literal, + parse_ident, + parse_iri, + parse_number, + parse_string, + )))(input) .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } /// Parse a rdf literal e.g. "2023-06-19"^^ - fn parse_rdf_literal<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context( - "parse rdf literal", - tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), - )(input) + fn parse_rdf_literal<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + tuple(( + lex_string, + recognize(pair(caret, caret)), + report_label(lex_iri, ParserLabel::Iri), + ))(input) .map(|(rest_input, (string, carets, iri))| { ( rest_input, Primitive::RdfLiteral { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), string, carets: Token { kind: TokenKind::Caret, - span: carets, + span: carets.input, }, iri, }, @@ -3241,45 +3367,31 @@ mod new { }) } - fn parse_ident<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse identifier", lex_ident)(input) - .map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) + fn parse_ident<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + lex_ident(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) } - fn parse_iri<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse iri", lex_iri)(input) - .map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) + fn parse_iri<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) } - fn parse_number<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse number", alt((parse_decimal, parse_integer)))(input) + fn parse_number<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + report_error(alt((parse_decimal, parse_integer)))(input) } - fn parse_decimal<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context( - "parse decimal", - tuple(( - opt(alt((plus, minus))), - opt(lex_number), - dot, - lex_number, - opt(parse_exponent), - )), - )(input) + fn parse_decimal<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + tuple(( + opt(alt((plus, minus))), + opt(lex_number), + dot, + lex_number, + opt(parse_exponent), + ))(input) .map(|(rest_input, (sign, before, dot, after, exponent))| { - dbg!(&sign, &before, &dot, &after, &exponent); ( rest_input, Primitive::Number { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), sign, before, dot: Some(dot), @@ -3290,82 +3402,68 @@ mod new { }) } - fn parse_integer<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse integer", pair(opt(alt((plus, minus))), lex_number))(input).map( - |(rest_input, (sign, number))| { - ( - rest_input, - Primitive::Number { - span: outer_span(input, rest_input), - sign, - before: None, - dot: None, - after: number, - exponent: None, - }, - ) - }, - ) - } - - fn parse_exponent<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Exponent<'a>, E> { - tuple((exp, opt(alt((plus, minus))), lex_number))(input) - .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) - } - - fn parse_string<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, Primitive<'a>, E> { - context("parse string", lex_string)(input) - .map(|(rest_input, string)| (rest_input, Primitive::String(string))) - } - - /// Parse an unary term. - fn parse_unary_prefix_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse unary prefix term", - pair(lex_unary_prefix_operators, parse_term), - )(input) - .map(|(rest_input, (operation, term))| { + fn parse_integer<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + pair(opt(alt((plus, minus))), lex_number)(input).map(|(rest_input, (sign, number))| { ( rest_input, - Term::UnaryPrefix { - span: outer_span(input, rest_input), - operation, - term: Box::new(term), + Primitive::Number { + span: outer_span(input.input, rest_input.input), + sign, + before: None, + dot: None, + after: number, + exponent: None, }, ) }) } + fn parse_exponent<'a, 'e>(input: Input<'a, 'e>) -> IResult, Exponent<'a>> { + tuple(( + exp, + opt(alt((plus, minus))), + report_label(lex_number, ParserLabel::Number), + ))(input) + .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) + } + + fn parse_string<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) + } + + // /// Parse an unary term. + // fn parse_unary_prefix_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + // pair(lex_unary_prefix_operators, parse_term)(input).map( + // |(rest_input, (operation, term))| { + // ( + // rest_input, + // Term::UnaryPrefix { + // span: outer_span(input.input, rest_input.input), + // operation, + // term: Box::new(term), + // }, + // ) + // }, + // ) + // } + /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse binary term", - pair( - parse_arithmetic_product, - opt(tuple(( - opt(lex_whitespace), - alt((plus, minus)), - opt(lex_whitespace), - parse_binary_term, - ))), - ), + fn parse_binary_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + pair( + parse_arithmetic_product, + opt(tuple(( + opt(lex_whitespace), + alt((plus, minus)), + opt(lex_whitespace), + parse_binary_term, + ))), )(input) .map(|(rest_input, (lhs, opt))| { ( rest_input, if let Some((ws1, operation, ws2, rhs)) = opt { Term::Binary { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), lhs: Box::new(lhs), ws1, operation, @@ -3381,27 +3479,22 @@ mod new { /// Parse an arithmetic product, i.e. an expression involving /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse arithmetic product", - pair( - parse_arithmetic_factor, - opt(tuple(( - opt(lex_whitespace), - alt((star, slash)), - opt(lex_whitespace), - parse_arithmetic_product, - ))), - ), + fn parse_arithmetic_product<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + pair( + parse_arithmetic_factor, + opt(tuple(( + opt(lex_whitespace), + alt((star, slash)), + opt(lex_whitespace), + parse_arithmetic_product, + ))), )(input) .map(|(rest_input, (lhs, opt))| { ( rest_input, if let Some((ws1, operation, ws2, rhs)) = opt { Term::Binary { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), lhs: Box::new(lhs), ws1, operation, @@ -3415,19 +3508,14 @@ mod new { }) } - fn parse_arithmetic_factor<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse arithmetic factor", - alt(( - parse_tuple_term, - parse_aggregation_term, - parse_primitive_term, - parse_variable, - parse_existential, - )), - )(input) + fn parse_arithmetic_factor<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + report_error(alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + )))(input) } // fn fold_arithmetic_expression<'a>( @@ -3452,29 +3540,24 @@ mod new { // } /// Parse an aggregation term of the form `#sum(…)`. - fn parse_aggregation_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse aggregation term", - tuple(( - recognize(pair(hash, lex_ident)), - open_paren, - opt(lex_whitespace), - parse_term_list, - opt(lex_whitespace), - close_paren, - )), - )(input) + fn parse_aggregation_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + tuple(( + recognize(pair(hash, lex_ident)), + report_label(open_paren, ParserLabel::OpenParen), + opt(lex_whitespace), + parse_list(parse_term), + opt(lex_whitespace), + report_label(close_paren, ParserLabel::CloseParen), + ))(input) .map( |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { ( rest_input, Term::Aggregation { - span: outer_span(input, rest_input), + span: outer_span(input.input, rest_input.input), operation: Token { kind: TokenKind::Aggregate, - span: operation, + span: operation.input, }, open_paren, ws1, @@ -3488,61 +3571,51 @@ mod new { } /// Parse a `_` - fn parse_blank<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse blank", underscore)(input) - .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) + fn parse_blank<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + underscore(input).map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) } /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. - fn parse_tuple_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse tuple term", parse_tuple)(input) + fn parse_tuple_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + parse_tuple(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } /// Parse a map as a term. - fn parse_map_term<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse map term", parse_map)(input) - .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) + fn parse_map_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } /// Parse a variable. - fn parse_variable<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context("parse variable", recognize(pair(question_mark, lex_ident)))(input).map( - |(rest_input, var)| { - ( - rest_input, - Term::Variable(Token { - kind: TokenKind::Variable, - span: var, - }), - ) - }, - ) + fn parse_variable<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + recognize(pair( + question_mark, + report_label(lex_ident, ParserLabel::Identifier), + ))(input) + .map(|(rest_input, var)| { + ( + rest_input, + Term::Variable(Token { + kind: TokenKind::Variable, + span: var.input, + }), + ) + }) } /// Parse an existential variable. - fn parse_existential<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse existential", - recognize(pair(exclamation_mark, lex_ident)), - )(input) + fn parse_existential<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + recognize(pair( + exclamation_mark, + report_label(lex_ident, ParserLabel::Identifier), + ))(input) .map(|(rest_input, existential)| { ( rest_input, Term::Existential(Token { kind: TokenKind::Existential, - span: existential, + span: existential.input, }), ) }) @@ -3550,17 +3623,24 @@ mod new { // Order of functions is important, because of ordered choice and no backtracking /// Parse the operator for an infix atom. - fn parse_operation_token<'a, E: ParseError> + ContextError>>( - input: Span<'a>, - ) -> IResult, E> { - context( - "parse operation token", - alt((less_equal, greater_equal, equal, unequal, less, greater)), - )(input) + fn parse_operation_token<'a, 'e>(input: Input<'a, 'e>) -> IResult, Token<'a>> { + report_error(alt(( + less_equal, + greater_equal, + equal, + unequal, + less, + greater, + )))(input) } #[cfg(test)] mod tests { + use std::{ + cell::RefCell, + collections::{HashMap, HashSet}, + }; + use nom::error::{convert_error, VerboseError}; use super::*; @@ -3585,14 +3665,17 @@ mod new { }; } - fn convert_located_span_error<'a>(input: Span<'a>, err: VerboseError>) -> String { + fn convert_located_span_error<'a, 'e>( + input: Span<'a>, + err: VerboseError>, + ) -> String { convert_error( *(input.fragment()), VerboseError { errors: err .errors .into_iter() - .map(|(span, tag)| (*(span.fragment()), tag)) + .map(|(span, tag)| (*(span.input.fragment()), tag)) .collect(), }, ) @@ -3604,10 +3687,21 @@ mod new { // tok: &lex_tokens(Span::new("a(B,C).")).unwrap().1, // }; let input = Span::new("a(B,C)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - parse_program::>(input).unwrap().1, + // parse_program::>(input).unwrap().1, + parse_program(input).0, Program { - span: input, + span: input.input, tl_doc_comment: None, statements: vec![Statement::Fact { span: s!(0, 1, "a(B,C)."), @@ -3664,11 +3758,22 @@ mod new { let input = Span::new( r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, ); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - parse_program::>(input).unwrap().1, + // parse_program::>(input).unwrap().1, + parse_program(input).0, Program { tl_doc_comment: None, - span: input, + span: input.input, statements: vec![ Statement::Directive(Directive::Base { span: s!(0, 1, "@base ."), @@ -3898,28 +4003,39 @@ mod new { ) } - #[test] - fn ignore_ws_and_comments() { - let input = Span::new(" Hi %cool comment\n"); - assert_eq!( - super::ignore_ws_and_comments(lex_ident::>)(input), - Ok(( - s!(22, 2, ""), - Token { - kind: TokenKind::Ident, - span: s!(3, 1, "Hi") - } - )) - ) - } + // #[test] + // fn ignore_ws_and_comments() { + // let input = Span::new(" Hi %cool comment\n"); + // assert_eq!( + // super::ignore_ws_and_comments(lex_ident::>)(input), + // Ok(( + // s!(22, 2, ""), + // Token { + // kind: TokenKind::Ident, + // span: s!(3, 1, "Hi") + // } + // )) + // ) + // } #[test] fn fact_with_ws() { let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; assert_eq!( - parse_program::>(input).unwrap().1, + // parse_program::>(input).unwrap().1, + parse_program(input).0, Program { - span: input, + span: input.input, tl_doc_comment: None, statements: vec![ Statement::Fact { @@ -4023,47 +4139,71 @@ limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, ); - let ast = parse_program::>(input); - match &ast { - Ok((rest_input, ast)) => { - println!("Rest Input:\n{:#?}\n\n{}", rest_input, ast); - assert_eq!( - { - let mut string_from_tokens = String::new(); - for token in get_all_tokens(ast) { - string_from_tokens.push_str(token.span().fragment()); - } - println!("String from Tokens:\n"); - println!("{}\n", string_from_tokens); - string_from_tokens - }, - *input.fragment(), - ); - } - Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - println!( - "PRINT ERROR:\n\n{}", - convert_located_span_error(input, err.clone()) - ); - } - Err(err) => panic!("{}", err), - } - assert!(ast.is_ok()); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let errors = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state: errors, + }; + // let ast = parse_program::>(input); + let (ast, _) = parse_program(input); + println!("{}", ast); + assert_eq!( + { + let mut string_from_tokens = String::new(); + for token in get_all_tokens(&ast) { + string_from_tokens.push_str(token.span().fragment()); + } + println!("String from Tokens:\n"); + println!("{}\n", string_from_tokens); + string_from_tokens + }, + *input.input.fragment(), + ); } #[test] fn parser_test() { - let str = std::fs::read_to_string("../testfile.rls").expect("testfile not found"); + let file = "../testfile2.rls"; + let str = std::fs::read_to_string(file).expect("testfile not found"); let input = Span::new(str.as_str()); - let result = parse_program::>(input); - match result { - Ok(ast) => println!("{}", ast.1), - Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - println!("{}", convert_located_span_error(input, err)) - } - Err(_) => (), + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_program::>(input); + let (ast, errors) = parse_program(input); + println!("{}\n\n{:#?}", ast, errors); + let mut error_map: HashMap> = HashMap::new(); + for error in errors { + if let Some(set) = error_map.get_mut(&error.0) { + set.insert(error.1); + } else { + let mut set = HashSet::new(); + set.insert(error.1); + error_map.insert(error.0, set); + }; + } + dbg!(&error_map); + // assert!(false); + let lines: Vec<_> = str.lines().collect(); + for (pos, str) in error_map { + // println!("{pos:?}, {str:?}"); + println!("error: {str:?}"); + println!("--> {}:{}:{}", file, pos.line, pos.column); + println!("{}", lines.get((pos.line - 1) as usize).unwrap()); + println!("{0:>1$}\n", "^", pos.column as usize) } - assert!(false); } #[test] @@ -4072,7 +4212,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("42")); + let input = Span::new("42"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Primitive(Primitive::Number { @@ -4087,7 +4239,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("35+7")); + let input = Span::new("35+7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4116,7 +4280,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("6*7")); + let input = Span::new("6*7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4145,7 +4321,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("49-7")); + let input = Span::new("49-7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4174,7 +4362,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("84/2")); + let input = Span::new("84/2"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4203,7 +4403,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("5*7+7")); + let input = Span::new("5*7+7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4246,7 +4458,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("7+5*7")); + let input = Span::new("7+5*7"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4290,30 +4514,42 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { let input = Span::new("(15+3*2-(7+35)*8)/3"); - let result = parse_term::>(input); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); - match result { - Ok(ast) => { - println!("{}", ast.1); - ast.1 - } - Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - panic!( - "{}", - convert_error( - *(input.fragment()), - VerboseError { - errors: err - .errors - .into_iter() - .map(|(span, tag)| { (*(span.fragment()), tag) }) - .collect() - } - ) - ) - } - Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), - } + // match result { + // Ok(ast) => { + // println!("{}", ast.1); + // ast.1 + // } + // Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + // panic!( + // "{}", + // convert_error( + // *(input.input.fragment()), + // VerboseError { + // errors: err + // .errors + // .into_iter() + // .map(|(span, tag)| { (*(span.fragment()), tag) }) + // .collect() + // } + // ) + // ) + // } + // Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), + // } + result.unwrap().1 }, Term::Binary { span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), @@ -4449,7 +4685,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { - let result = parse_term::>(Span::new("15+3*2-(7+35)*8/3")); + let input = Span::new("15+3*2-(7+35)*8/3"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term(input); result.unwrap().1 }, Term::Binary { @@ -4592,17 +4840,43 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters assert_eq!( { let input = Span::new("e42"); - parse_exponent::>(input) + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + // parse_exponent::>(input) + parse_exponent(input).unwrap().1 }, - Ok(( - s!(3, 1, ""), - Exponent { - e: T! {TokenKind::Exponent, 0,1,"e"}, - sign: None, - number: T! {TokenKind::Number, 1,1,"42"} - } - )) + Exponent { + e: T! {TokenKind::Exponent, 0,1,"e"}, + sign: None, + number: T! {TokenKind::Number, 1,1,"42"} + } ) } + + #[test] + fn missing_dot() { + let input = Span::new("some(Fact\nSome other, Fact.\nthird(fact)."); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + let result = parse_program(input); + println!("{}\n\n{:#?}", result.0, result.1); + // assert!(false); + } } } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 722aff57f..b9bec1e4e 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -20,6 +20,7 @@ pub(crate) trait AstNode: std::fmt::Debug + Display { fn name(&self) -> String; } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub(crate) struct Position { pub(crate) offset: usize, pub(crate) line: u32, @@ -33,6 +34,34 @@ pub(crate) struct List<'a, T> { // ([ws]?[,][ws]?[T])* pub(crate) rest: Option>, Token<'a>, Option>, T)>>, } +impl List<'_, T> { + pub fn to_vec(&self) -> Vec { + let mut vec = Vec::new(); + vec.push(self.first.clone()); + if let Some(rest) = &self.rest { + for (_, _, _, item) in rest { + vec.push(item.clone()); + } + } + vec + } +} +impl std::iter::IntoIterator for List<'_, T> { + type Item = T; + + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + let mut vec = Vec::new(); + vec.push(self.first); + if let Some(rest) = self.rest { + for (_, _, _, item) in rest { + vec.push(item); + } + } + vec.into_iter() + } +} impl AstNode for List<'_, T> { fn children(&self) -> Option> { let mut vec = Vec::new(); diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index a7397f7ec..334e1ee4a 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -43,12 +43,21 @@ impl AstNode for Program<'_> { } fn name(&self) -> String { - format!( - "Program \x1b[34m@{}:{} \x1b[92m\"{}…\"\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - &self.span.fragment()[..60], - ) + if self.span.fragment().len() < 60 { + format!( + "Program \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + &self.span.fragment(), + ) + } else { + format!( + "Program \x1b[34m@{}:{} \x1b[92m{:?}[…]\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + &self.span.fragment()[..60], + ) + } } } impl std::fmt::Display for Program<'_> { diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index e26ec39dd..a2b3a1c9d 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -27,6 +27,7 @@ pub(crate) enum Statement<'a> { }, Whitespace(Token<'a>), Comment(Token<'a>), + Error(Token<'a>), } impl AstNode for Statement<'_> { fn children(&self) -> Option> { @@ -84,6 +85,7 @@ impl AstNode for Statement<'_> { } Statement::Whitespace(ws) => Some(vec![ws]), Statement::Comment(c) => Some(vec![c]), + Statement::Error(t) => Some(vec![t]), } } @@ -94,6 +96,7 @@ impl AstNode for Statement<'_> { Statement::Rule { span, .. } => *span, Statement::Whitespace(ws) => ws.span(), Statement::Comment(c) => c.span(), + Statement::Error(t) => t.span, } } @@ -128,6 +131,7 @@ impl AstNode for Statement<'_> { Statement::Rule { .. } => name!("Rule"), Statement::Whitespace(_) => name!("Whitespace"), Statement::Comment(_) => name!("Comment"), + Statement::Error(_) => name!("ERROR"), } } } diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index a65730809..6850a69c2 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -1,19 +1,24 @@ -use std::num::{ParseFloatError, ParseIntError}; +use std::{ + num::{ParseFloatError, ParseIntError}, + ops::Range, + str::{CharIndices, Chars}, +}; use nemo_physical::datavalues::DataValueCreationError; use nom::{ error::{ErrorKind, FromExternalError}, - IResult, + AsBytes, IResult, InputIter, InputLength, InputTake, InputTakeAtPosition, }; use nom_locate::LocatedSpan; use thiserror::Error; use crate::{ io::formats::import_export::ImportExportError, + io::lexer::ParserState, model::rule_model::{Aggregate, Constraint, Literal, Term}, }; -use super::Variable; +use super::{ast::Position, Variable}; /// A [LocatedSpan] over the input. pub(super) type Span<'a> = LocatedSpan<&'a str>; @@ -434,7 +439,7 @@ impl<'a> Tokens<'a> { Tokens { tok: vec } } } -impl<'a> nom::AsBytes for Tokens<'a> { +impl<'a> AsBytes for Tokens<'a> { fn as_bytes(&self) -> &[u8] { todo!() } @@ -471,7 +476,7 @@ impl<'a, T> nom::FindToken for Tokens<'a> { todo!() } } -impl<'a> nom::InputIter for Tokens<'a> { +impl<'a> InputIter for Tokens<'a> { type Item = &'a Token<'a>; type Iter = std::iter::Enumerate<::std::slice::Iter<'a, Token<'a>>>; @@ -506,7 +511,7 @@ impl<'a> nom::InputLength for Tokens<'a> { self.tok.len() } } -impl<'a> nom::InputTake for Tokens<'a> { +impl<'a> InputTake for Tokens<'a> { fn take(&self, count: usize) -> Self { Tokens { tok: &self.tok[0..count], @@ -584,3 +589,251 @@ impl<'a, R> nom::Slice for Tokens<'a> { todo!() } } + +#[derive(Debug, Clone, Copy)] +pub(crate) struct Input<'a, 's> { + pub(crate) input: crate::io::lexer::Span<'a>, + pub(crate) parser_state: ParserState<'s>, +} +impl<'a, 's> Input<'a, 's> { + fn new(input: &'a str, errors: ParserState<'s>) -> Input<'a, 's> { + Input { + input: Span::new(input), + parser_state: errors, + } + } +} +impl ToRange for Input<'_, '_> { + fn to_range(&self) -> Range { + self.input.to_range() + } +} + +impl AsBytes for Input<'_, '_> { + fn as_bytes(&self) -> &[u8] { + self.input.fragment().as_bytes() + } +} + +impl<'a, 's> nom::Compare> for Input<'a, 's> { + fn compare(&self, t: Input) -> nom::CompareResult { + self.input.compare(t.as_bytes()) + } + + fn compare_no_case(&self, t: Input) -> nom::CompareResult { + self.input.compare_no_case(t.as_bytes()) + } +} +impl nom::Compare<&str> for Input<'_, '_> { + fn compare(&self, t: &str) -> nom::CompareResult { + self.input.compare(t) + } + + fn compare_no_case(&self, t: &str) -> nom::CompareResult { + self.input.compare_no_case(t) + } +} + +impl nom::ExtendInto for Input<'_, '_> { + type Item = char; + + type Extender = String; + + fn new_builder(&self) -> Self::Extender { + self.input.new_builder() + } + + fn extend_into(&self, acc: &mut Self::Extender) { + self.input.extend_into(acc) + } +} + +impl nom::FindSubstring<&str> for Input<'_, '_> { + fn find_substring(&self, substr: &str) -> Option { + self.input.find_substring(substr) + } +} + +impl<'a, 'e, T> nom::FindToken for Input<'a, 'e> +where + &'a str: nom::FindToken, +{ + fn find_token(&self, token: T) -> bool { + self.input.find_token(token) + } +} + +impl<'a, 's> InputIter for Input<'a, 's> { + type Item = char; + + type Iter = CharIndices<'a>; + + type IterElem = Chars<'a>; + + fn iter_indices(&self) -> Self::Iter { + todo!() + } + + fn iter_elements(&self) -> Self::IterElem { + todo!() + } + + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn slice_index(&self, count: usize) -> Result { + self.input.slice_index(count) + } +} + +impl nom::InputLength for Input<'_, '_> { + fn input_len(&self) -> usize { + self.input.input_len() + } +} + +impl InputTake for Input<'_, '_> { + fn take(&self, count: usize) -> Self { + Input { + input: self.input.take(count), + parser_state: self.parser_state, + } + } + + fn take_split(&self, count: usize) -> (Self, Self) { + let (first, second) = self.input.take_split(count); + ( + Input { + input: first, + parser_state: self.parser_state, + }, + Input { + input: second, + parser_state: self.parser_state, + }, + ) + } +} + +impl nom::InputTakeAtPosition for Input<'_, '_> { + type Item = char; + + fn split_at_position>( + &self, + predicate: P, + ) -> nom::IResult + where + P: Fn(Self::Item) -> bool, + { + match self.input.position(predicate) { + Some(n) => Ok(self.take_split(n)), + None => Err(nom::Err::Incomplete(nom::Needed::new(1))), + } + } + + fn split_at_position1>( + &self, + predicate: P, + e: nom::error::ErrorKind, + ) -> nom::IResult + where + P: Fn(Self::Item) -> bool, + { + todo!() + } + + fn split_at_position_complete>( + &self, + predicate: P, + ) -> nom::IResult + where + P: Fn(Self::Item) -> bool, + { + match self.split_at_position(predicate) { + Err(nom::Err::Incomplete(_)) => Ok(self.take_split(self.input_len())), + res => res, + } + } + + fn split_at_position1_complete>( + &self, + predicate: P, + e: nom::error::ErrorKind, + ) -> nom::IResult + where + P: Fn(Self::Item) -> bool, + { + match self.input.fragment().position(predicate) { + Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))), + Some(n) => Ok(self.take_split(n)), + None => { + if self.input.fragment().input_len() == 0 { + Err(nom::Err::Error(E::from_error_kind(*self, e))) + } else { + Ok(self.take_split(self.input_len())) + } + } + } + } +} + +impl nom::Offset for Input<'_, '_> { + fn offset(&self, second: &Self) -> usize { + self.input.offset(&second.input) + } +} + +impl nom::ParseTo for Input<'_, '_> { + fn parse_to(&self) -> Option { + todo!() + } +} + +impl<'a, 'e, R> nom::Slice for Input<'a, 'e> +where + &'a str: nom::Slice, +{ + fn slice(&self, range: R) -> Self { + Input { + input: self.input.slice(range), + parser_state: self.parser_state, + } + } +} + +pub(crate) trait ToRange { + fn to_range(&self) -> Range; +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) enum ParserLabel { + Rule, + Fact, + Directive, + Dot, + Arrow, + // Head, + // Body, + Comma, + Iri, + Prefix, + Identifier, + OpenParen, + CloseParen, + OpenBrace, + CloseBrace, + OpenBracket, + ClosePracket, + Equal, + Number, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) struct Label { + pub(crate) label: ParserLabel, + pub(crate) pos: Position, +} diff --git a/testfile.rls b/testfile.rls index b5f5d6db9..07e551c67 100644 --- a/testfile.rls +++ b/testfile.rls @@ -9,6 +9,10 @@ father(alice, bob). mother(bob, carla). father(bob, darius). mother(alice, carla). +father(, ). +mother(, ). +father(, ). +mother(, ). % Rules: parent(?X, ?Y) :- mother(?X, ?Y). @@ -16,3 +20,16 @@ parent(?X, ?Y) :- father(?X, ?Y). parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . a(?x) :- b(?x, B) . s(4) :- s(3). + +ancestor(?X,?Y) :- parent(?X, ?Y) . +ancestor(?X,?Z) :- ancestor(?X, ?Y), parent(?Y, ?Z) . +ancestorOfAlice(?X) :- ancestor(alice,?X). + +mydata(a,b) . +mydata("hello", 42) . +mydata(3.14, "2023-06-19"^^) . + +resulta(?N + 10) :- mydata(_, ?N) . +resultB(?R) :- mydata(?X, ?Y), ?R = SQRT(?X) . +result(?D) :- mydata(?X, _), ?D = DATATYPE(?X) . + diff --git a/testfile2.rls b/testfile2.rls new file mode 100644 index 000000000..32ee5c5af --- /dev/null +++ b/testfile2.rls @@ -0,0 +1,11 @@ +% Facts: +father(alice, bob). +mother(bob, carla). +father(bob, darius). +mother(alice, carla). + +% Rules: +parent(?X, ?Y) :- mother(?X, ?Y). +parent(?X, ?Y) :- father(?X, ?Y). +parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . + From fb3eb036bec5a50e31e9ac6f330348b1a1ddcc9b Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 16 May 2024 09:07:00 +0200 Subject: [PATCH 096/214] Refactor to use type annotation on vecs instead of trivial cast on the first element --- nemo/src/io/parser/ast.rs | 5 ++--- nemo/src/io/parser/ast/atom.rs | 5 ++--- nemo/src/io/parser/ast/directive.rs | 25 ++++++++++--------------- nemo/src/io/parser/ast/map.rs | 10 ++++------ nemo/src/io/parser/ast/program.rs | 5 ++--- nemo/src/io/parser/ast/statement.rs | 10 ++++------ nemo/src/io/parser/ast/term.rs | 20 ++++++++------------ nemo/src/io/parser/ast/tuple.rs | 5 ++--- 8 files changed, 34 insertions(+), 51 deletions(-) diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index b9bec1e4e..539d991ce 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -64,9 +64,8 @@ impl std::iter::IntoIterator for List<'_, T> { } impl AstNode for List<'_, T> { fn children(&self) -> Option> { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(&self.first as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(&self.first); if let Some(rest) = &self.rest { for (ws1, delim, ws2, item) in rest { if let Some(ws) = ws1 { diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 52e8d5383..75a9d4d4d 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -36,9 +36,8 @@ impl AstNode for Atom<'_> { rhs, .. } => { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(lhs as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(lhs); if let Some(ws) = ws1 { vec.push(ws); }; diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index c75351256..fd13de1d8 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -78,10 +78,9 @@ impl AstNode for Directive<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); if let Some(ws) = ws1 { @@ -105,10 +104,9 @@ impl AstNode for Directive<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); if let Some(ws) = ws1 { @@ -138,10 +136,9 @@ impl AstNode for Directive<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); vec.push(ws1); @@ -173,10 +170,9 @@ impl AstNode for Directive<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); vec.push(ws1); @@ -204,10 +200,9 @@ impl AstNode for Directive<'_> { ws2, dot, } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(kw); vec.push(ws1); diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 669963ae8..626d93aa6 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -17,10 +17,9 @@ pub(crate) struct Map<'a> { } impl AstNode for Map<'_> { fn children(&self) -> Option> { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(identifier) = &self.identifier { - #[allow(trivial_casts)] - vec.push(identifier as &dyn AstNode); + vec.push(identifier); }; if let Some(ws) = &self.ws1 { vec.push(ws); @@ -78,9 +77,8 @@ pub(crate) struct Pair<'a, K, V> { } impl AstNode for Pair<'_, K, V> { fn children(&self) -> Option> { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(&self.key as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(&self.key); if let Some(ws) = &self.ws1 { vec.push(ws); } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 334e1ee4a..466be7577 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -12,10 +12,9 @@ pub(crate) struct Program<'a> { } impl AstNode for Program<'_> { fn children(&self) -> Option> { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = &self.tl_doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; // NOTE: The current implementation puts the doc comment and all the // statements in the same vec, so there is no need to implement AstNode diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index a2b3a1c9d..383b58272 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -40,10 +40,9 @@ impl AstNode for Statement<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(atom); if let Some(ws) = ws { @@ -63,10 +62,9 @@ impl AstNode for Statement<'_> { dot, .. } => { - let mut vec = Vec::new(); + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { - #[allow(trivial_casts)] - vec.push(dc as &dyn AstNode); + vec.push(dc); }; vec.push(head); if let Some(ws) = ws1 { diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 8be1e79e7..17eddee16 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -53,9 +53,8 @@ impl AstNode for Term<'_> { rhs, .. } => { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(&**lhs as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(&**lhs); if let Some(ws) = ws1 { vec.push(ws); }; @@ -75,9 +74,8 @@ impl AstNode for Term<'_> { close_paren, .. } => { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(operation as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(operation); vec.push(open_paren); if let Some(ws) = ws1 { vec.push(ws); @@ -194,10 +192,9 @@ impl AstNode for Primitive<'_> { exponent, .. } => { - let mut vec = Vec::new(); - #[allow(trivial_casts)] + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(s) = sign { - vec.push(s as &dyn AstNode); + vec.push(s); } if let Some(b) = before { vec.push(b); @@ -284,9 +281,8 @@ pub(crate) struct Exponent<'a> { } impl AstNode for Exponent<'_> { fn children(&self) -> Option> { - let mut vec = Vec::new(); - #[allow(trivial_casts)] - vec.push(&self.e as &dyn AstNode); + let mut vec: Vec<&dyn AstNode> = Vec::new(); + vec.push(&self.e); if let Some(s) = &self.sign { vec.push(s); }; diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index a49f67012..4b9e9da9f 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -16,10 +16,9 @@ pub(crate) struct Tuple<'a> { } impl AstNode for Tuple<'_> { fn children(&self) -> Option> { - let mut vec = Vec::new(); - #[allow(trivial_casts)] + let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(identifier) = &self.identifier { - vec.push(identifier as &dyn AstNode); + vec.push(identifier); } if let Some(ws) = &self.ws1 { vec.push(ws); From 23781b6f7f5e9073f90b83ce538736531a6e11ea Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Thu, 16 May 2024 11:31:25 +0200 Subject: [PATCH 097/214] Fix whitespace parsing within statements. Now also comments are allowed and do not produce an error --- nemo/src/io/parser.rs | 264 +++++++++++++++++----------- nemo/src/io/parser/ast.rs | 67 ++++--- nemo/src/io/parser/ast/atom.rs | 6 +- nemo/src/io/parser/ast/directive.rs | 32 ++-- nemo/src/io/parser/ast/map.rs | 12 +- nemo/src/io/parser/ast/statement.rs | 10 +- nemo/src/io/parser/ast/term.rs | 10 +- nemo/src/io/parser/ast/tuple.rs | 8 +- 8 files changed, 248 insertions(+), 161 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 758bb9061..3a1394708 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2434,6 +2434,7 @@ mod new { use super::ast::{ atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, + Wsoc, }; use super::types::{Input, Label, ParserLabel, ToRange}; use crate::io::lexer::{ @@ -2649,6 +2650,34 @@ mod new { } } + fn wsoc0<'a, 's>(input: Input<'a, 's>) -> IResult, Option>> { + many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { + if vec.is_empty() { + (rest_input, None) + } else { + ( + rest_input, + Some(Wsoc { + span: outer_span(input.input, rest_input.input), + token: vec, + }), + ) + } + }) + } + + fn wsoc1<'a, 's>(input: Input<'a, 's>) -> IResult, Wsoc<'a>> { + many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { + ( + rest_input, + Wsoc { + span: outer_span(input.input, rest_input.input), + token: vec, + }, + ) + }) + } + /// Parse a full program consisting of directives, facts, rules and comments. fn parse_program<'a, 'e>(input: Input<'a, 'e>) -> (Program<'a>, Vec) { let (rest_input, (tl_doc_comment, statements)) = all_consuming(pair( @@ -2689,10 +2718,11 @@ mod new { /// Parse a fact of the form `predicateName(term1, term2, …).` fn parse_fact<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + dbg!(&input.parser_state.labels); tuple(( opt(lex_doc_comment), parse_normal_atom, - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2722,11 +2752,11 @@ mod new { tuple(( opt(lex_doc_comment), parse_head, - opt(lex_whitespace), + wsoc0, report_label(arrow, ParserLabel::Arrow), - opt(lex_whitespace), + wsoc0, parse_body, - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2787,9 +2817,9 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Base), )), - opt(lex_whitespace), + wsoc0, report_label(lex_iri, ParserLabel::Iri), - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2829,11 +2859,11 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Prefix), )), - opt(lex_whitespace), + wsoc0, report_label(recognize(pair(lex_ident, colon)), ParserLabel::Prefix), - opt(lex_whitespace), + wsoc0, report_label(lex_iri, ParserLabel::Iri), - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2880,13 +2910,13 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Import), )), - lex_whitespace, + wsoc1, report_label(lex_ident, ParserLabel::Identifier), - opt(lex_whitespace), + wsoc0, report_label(arrow, ParserLabel::Arrow), - opt(lex_whitespace), + wsoc0, parse_map, - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2932,13 +2962,13 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Export), )), - lex_whitespace, + wsoc1, report_label(lex_ident, ParserLabel::Identifier), - opt(lex_whitespace), + wsoc0, report_label(arrow, ParserLabel::Arrow), - opt(lex_whitespace), + wsoc0, parse_map, - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -2984,9 +3014,9 @@ mod new { at, verify(lex_ident, |token| token.kind == TokenKind::Output), )), - lex_whitespace, + wsoc1, opt(parse_list(lex_ident)), - opt(lex_whitespace), + wsoc0, expect( report_label(dot, ParserLabel::Dot), "missing `.`", @@ -3047,25 +3077,18 @@ mod new { parse_t: fn(Input<'a, 'e>) -> IResult, T>, ) -> impl Fn(Input<'a, 'e>) -> IResult, List<'a, T>> { move |input: Input<'a, 'e>| { - pair( - parse_t, - many0(tuple(( - opt(lex_whitespace), - comma, - opt(lex_whitespace), - parse_t, - ))), - )(input) - .map(|(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input.input, rest_input.input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }) + pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t))))(input).map( + |(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input.input, rest_input.input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) + }, + ) } } @@ -3107,26 +3130,21 @@ mod new { /// Parse an "infix atom" of the form `term1 term2`. /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. fn parse_infix_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - tuple(( - parse_term, - opt(lex_whitespace), - parse_operation_token, - opt(lex_whitespace), - parse_term, - ))(input) - .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { - ( - rest_input, - Atom::InfixAtom { - span: outer_span(input.input, rest_input.input), - lhs, - ws1, - operation, - ws2, - rhs, - }, - ) - }) + tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term))(input).map( + |(rest_input, (lhs, ws1, operation, ws2, rhs))| { + ( + rest_input, + Atom::InfixAtom { + span: outer_span(input.input, rest_input.input), + lhs, + ws1, + operation, + ws2, + rhs, + }, + ) + }, + ) } /// Parse a tuple with an optional name, like `ident(term1, term2)` @@ -3134,11 +3152,11 @@ mod new { fn parse_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { tuple(( opt(lex_ident), - opt(lex_whitespace), + wsoc0, open_paren, - opt(lex_whitespace), + wsoc0, opt(parse_list(parse_term)), - opt(lex_whitespace), + wsoc0, report_label(close_paren, ParserLabel::CloseParen), ))(input) .map( @@ -3165,11 +3183,11 @@ mod new { fn parse_named_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { tuple(( lex_ident, - opt(lex_whitespace), + wsoc0, report_label(open_paren, ParserLabel::OpenParen), - opt(lex_whitespace), + wsoc0, opt(parse_list(parse_term)), - opt(lex_whitespace), + wsoc0, report_label(close_paren, ParserLabel::CloseParen), ))(input) .map( @@ -3196,11 +3214,11 @@ mod new { fn parse_map<'a, 'e>(input: Input<'a, 'e>) -> IResult, Map<'a>> { tuple(( opt(lex_ident), - opt(lex_whitespace), + wsoc0, open_brace, - opt(lex_whitespace), + wsoc0, opt(parse_list(parse_pair)), - opt(lex_whitespace), + wsoc0, report_label(close_brace, ParserLabel::CloseBrace), ))(input) .map( @@ -3266,9 +3284,9 @@ mod new { ) -> IResult, Pair<'a, Term<'a>, Term<'a>>> { tuple(( parse_term, - opt(lex_whitespace), + wsoc0, report_label(equal, ParserLabel::Equal), - opt(lex_whitespace), + wsoc0, parse_term, ))(input) .map(|(rest_input, (key, ws1, equal, ws2, value))| { @@ -3451,12 +3469,7 @@ mod new { fn parse_binary_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { pair( parse_arithmetic_product, - opt(tuple(( - opt(lex_whitespace), - alt((plus, minus)), - opt(lex_whitespace), - parse_binary_term, - ))), + opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3483,9 +3496,9 @@ mod new { pair( parse_arithmetic_factor, opt(tuple(( - opt(lex_whitespace), + wsoc0, alt((star, slash)), - opt(lex_whitespace), + wsoc0, parse_arithmetic_product, ))), )(input) @@ -3544,9 +3557,9 @@ mod new { tuple(( recognize(pair(hash, lex_ident)), report_label(open_paren, ParserLabel::OpenParen), - opt(lex_whitespace), + wsoc0, parse_list(parse_term), - opt(lex_whitespace), + wsoc0, report_label(close_paren, ParserLabel::CloseParen), ))(input) .map( @@ -3782,9 +3795,12 @@ mod new { kind: TokenKind::Base, span: s!(0, 1, "@base"), }, - ws1: Some(Token { - kind: TokenKind::Whitespace, - span: s!(5, 1, " ") + ws1: Some(Wsoc { + span: s!(5, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(5, 1, " ") + }] }), base_iri: Token { kind: TokenKind::Iri, @@ -3807,9 +3823,12 @@ mod new { kind: TokenKind::Prefix, span: s!(32, 1, "@prefix"), }, - ws1: Some(Token { - kind: TokenKind::Whitespace, - span: s!(39, 1, " ") + ws1: Some(Wsoc { + span: s!(39, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(39, 1, " ") + }] }), prefix: Token { kind: TokenKind::Ident, @@ -3837,9 +3856,12 @@ mod new { kind: TokenKind::Import, span: s!(85, 1, "@import"), }, - ws1: Token { - kind: TokenKind::Whitespace, - span: s!(92, 1, " "), + ws1: Wsoc { + span: s!(91, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(92, 1, " "), + }] }, predicate: Token { kind: TokenKind::Ident, @@ -3903,9 +3925,12 @@ mod new { kind: TokenKind::Export, span: s!(136, 1, "@export"), }, - ws1: Token { - kind: TokenKind::Whitespace, + ws1: Wsoc { span: s!(143, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(143, 1, " "), + }] }, predicate: Token { kind: TokenKind::Ident, @@ -3949,9 +3974,12 @@ mod new { kind: TokenKind::Output, span: s!(153, 1, "@output") }, - ws1: Token { - kind: TokenKind::Whitespace, + ws1: Wsoc { span: s!(160, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(160, 1, " "), + }] }, predicates: Some(List { span: s!(161, 1, "a, b, c"), @@ -3966,9 +3994,12 @@ mod new { kind: TokenKind::Comma, span: s!(162, 1, ","), }, - Some(Token { - kind: TokenKind::Whitespace, + Some(Wsoc { span: s!(163, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(163, 1, " "), + }] }), Token { kind: TokenKind::Ident, @@ -3981,9 +4012,12 @@ mod new { kind: TokenKind::Comma, span: s!(165, 1, ","), }, - Some(Token { - kind: TokenKind::Whitespace, + Some(Wsoc { span: s!(166, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(166, 1, " "), + }] }), Token { kind: TokenKind::Ident, @@ -4066,9 +4100,12 @@ mod new { kind: TokenKind::Comma, span: s!(9, 1, ","), }, - Some(Token { - kind: TokenKind::Whitespace, + Some(Wsoc { span: s!(10, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(10, 1, " "), + }] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, @@ -4081,9 +4118,12 @@ mod new { kind: TokenKind::Comma, span: s!(15, 1, ","), }, - Some(Token { - kind: TokenKind::Whitespace, + Some(Wsoc { span: s!(16, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(16, 1, " "), + }] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, @@ -4098,9 +4138,12 @@ mod new { span: s!(27, 1, ")") }, }), - ws: Some(Token { - kind: TokenKind::Whitespace, + ws: Some(Wsoc { span: s!(28, 1, " "), + token: vec![Token { + kind: TokenKind::Whitespace, + span: s!(28, 1, " "), + }] }), dot: Token { kind: TokenKind::Dot, @@ -4878,5 +4921,22 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters println!("{}\n\n{:#?}", result.0, result.1); // assert!(false); } + + #[test] + fn wsoc() { + let input = Span::new(" \t\n % first comment\n % second comment\n"); + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input, + parser_state, + }; + dbg!(wsoc0(input)); + dbg!(wsoc1(input)); + } } } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 539d991ce..3ed70e89d 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -27,12 +27,51 @@ pub(crate) struct Position { pub(crate) column: u32, } +/// Whitespace or Comment token +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Wsoc<'a> { + pub(crate) span: Span<'a>, + pub(crate) token: Vec> +} +impl AstNode for Wsoc<'_> { + fn children(&self) -> Option> { + if self.token.is_empty() { + None + } else { + #[allow(trivial_casts)] + Some(self.token.iter().map(|t| t as &dyn AstNode).collect()) + } + } + + fn span(&self) -> Span { + self.span + } + + fn position(&self) -> Position { + Position { offset: self.span.location_offset(), line: self.span.location_line(), column: self.span.get_utf8_column() as u32 } + } + + fn is_token(&self) -> bool { + false + } + + fn name(&self) -> String { + format!("Wsoc \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", self.span.location_line(), self.span.get_utf8_column(), self.span.fragment()) + } +} +impl Display for Wsoc<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + + #[derive(Debug, Clone, PartialEq)] pub(crate) struct List<'a, T> { pub(crate) span: Span<'a>, pub(crate) first: T, // ([ws]?[,][ws]?[T])* - pub(crate) rest: Option>, Token<'a>, Option>, T)>>, + pub(crate) rest: Option>, Token<'a>, Option>, T)>>, } impl List<'_, T> { pub fn to_vec(&self) -> Vec { @@ -178,19 +217,13 @@ mod test { kw: Token{ kind:TokenKind::Prefix, span:s!(125,4,"@prefix") - } , - ws1:Some(Token{ - kind:TokenKind::Whitespace, - span:s!(132,4," ") - }) , + }, + ws1:Some(Wsoc {span: s!(132, 4, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(132,4," ")}] }), prefix: Token { kind: TokenKind::PrefixIdent, span: s!(133, 4, "xsd:"), }, - ws2: Some(Token{ - kind:TokenKind::Whitespace, - span:s!(137,4," ") - }), + ws2: Some(Wsoc {span: s!(137, 4, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(137,4," ")}] }), prefix_iri: Token { kind: TokenKind::Iri, span: s!(138, 4, ""), @@ -239,10 +272,7 @@ mod test { kind: TokenKind::Comma, span: s!(242, 8, ","), }, - Some(Token { - kind: TokenKind::Whitespace, - span: s!(243, 8, " "), - }), + Some(Wsoc {span: s!(243, 8, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(243,8," "),}] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(244, 8, "ConstB"), @@ -296,9 +326,9 @@ mod test { }), rest: None, }, - ws1: Some(Token{kind:TokenKind::Whitespace,span:s!(310,12," ")}), + ws1: Some(Wsoc {span: s!(310, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(310,12," ")}] }), arrow: Token{kind:TokenKind::Arrow, span:s!(311,12,":-")}, - ws2: Some(Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}), + ws2: Some(Wsoc {span: s!(313, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}] }), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), first: Atom::Positive(Tuple { @@ -322,10 +352,7 @@ mod test { kind: TokenKind::Comma, span: s!(333, 12, ","), }, - Some(Token { - kind: TokenKind::Whitespace, - span: s!(334, 12, " "), - }), + Some(Wsoc {span: s!(334, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(334,12," "),}] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(335, 12, "ConstB"), diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 75a9d4d4d..46dbf0ac4 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,6 +1,6 @@ use super::term::Term; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode}; +use super::{ast_to_ascii_tree, AstNode, Wsoc}; use super::{map::Map, Position}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -16,9 +16,9 @@ pub(crate) enum Atom<'a> { InfixAtom { span: Span<'a>, lhs: Term<'a>, - ws1: Option>, + ws1: Option>, operation: Token<'a>, - ws2: Option>, + ws2: Option>, rhs: Term<'a>, }, Map(Map<'a>), diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index fd13de1d8..8b04c3c91 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,5 +1,5 @@ use super::map::Map; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -10,9 +10,9 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Option>, + ws1: Option>, base_iri: Token<'a>, - ws2: Option>, + ws2: Option>, dot: Token<'a>, }, // "@prefix wikidata: ." @@ -20,11 +20,11 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Option>, + ws1: Option>, prefix: Token<'a>, - ws2: Option>, + ws2: Option>, prefix_iri: Token<'a>, - ws3: Option>, + ws3: Option>, dot: Token<'a>, }, // "@import table :- csv{resource="path/to/file.csv"} ." @@ -32,13 +32,13 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Token<'a>, + ws1: Wsoc<'a>, predicate: Token<'a>, - ws2: Option>, + ws2: Option>, arrow: Token<'a>, - ws3: Option>, + ws3: Option>, map: Map<'a>, - ws4: Option>, + ws4: Option>, dot: Token<'a>, }, // "@export result :- turtle{resource="out.ttl"} ." @@ -46,13 +46,13 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Token<'a>, + ws1: Wsoc<'a>, predicate: Token<'a>, - ws2: Option>, + ws2: Option>, arrow: Token<'a>, - ws3: Option>, + ws3: Option>, map: Map<'a>, - ws4: Option>, + ws4: Option>, dot: Token<'a>, }, // "@output A, B, C." @@ -60,9 +60,9 @@ pub(crate) enum Directive<'a> { span: Span<'a>, doc_comment: Option>, kw: Token<'a>, - ws1: Token<'a>, + ws1: Wsoc<'a>, predicates: Option>>, - ws2: Option>, + ws2: Option>, dot: Token<'a>, }, } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 626d93aa6..473d01e92 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,5 +1,5 @@ use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; use std::fmt::Debug; @@ -8,11 +8,11 @@ use std::fmt::Debug; pub(crate) struct Map<'a> { pub(crate) span: Span<'a>, pub(crate) identifier: Option>, - pub(crate) ws1: Option>, + pub(crate) ws1: Option>, pub(crate) open_brace: Token<'a>, - pub(crate) ws2: Option>, + pub(crate) ws2: Option>, pub(crate) pairs: Option, Term<'a>>>>, - pub(crate) ws3: Option>, + pub(crate) ws3: Option>, pub(crate) close_brace: Token<'a>, } impl AstNode for Map<'_> { @@ -70,9 +70,9 @@ impl std::fmt::Display for Map<'_> { pub(crate) struct Pair<'a, K, V> { pub(crate) span: Span<'a>, pub(crate) key: K, - pub(crate) ws1: Option>, + pub(crate) ws1: Option>, pub(crate) equal: Token<'a>, - pub(crate) ws2: Option>, + pub(crate) ws2: Option>, pub(crate) value: V, } impl AstNode for Pair<'_, K, V> { diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 383b58272..a0762d0c8 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -1,6 +1,6 @@ use super::atom::Atom; use super::directive::Directive; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -11,18 +11,18 @@ pub(crate) enum Statement<'a> { span: Span<'a>, doc_comment: Option>, atom: Atom<'a>, - ws: Option>, + ws: Option>, dot: Token<'a>, }, Rule { span: Span<'a>, doc_comment: Option>, head: List<'a, Atom<'a>>, - ws1: Option>, + ws1: Option>, arrow: Token<'a>, - ws2: Option>, + ws2: Option>, body: List<'a, Atom<'a>>, - ws3: Option>, + ws3: Option>, dot: Token<'a>, }, Whitespace(Token<'a>), diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 17eddee16..060269687 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,6 +1,6 @@ use super::map::Map; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -18,18 +18,18 @@ pub(crate) enum Term<'a> { Binary { span: Span<'a>, lhs: Box>, - ws1: Option>, + ws1: Option>, operation: Token<'a>, - ws2: Option>, + ws2: Option>, rhs: Box>, }, Aggregation { span: Span<'a>, operation: Token<'a>, open_paren: Token<'a>, - ws1: Option>, + ws1: Option>, terms: Box>>, - ws2: Option>, + ws2: Option>, close_paren: Token<'a>, }, Tuple(Box>), diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 4b9e9da9f..f074eb6a1 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -1,5 +1,5 @@ use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -7,11 +7,11 @@ use ascii_tree::write_tree; pub(crate) struct Tuple<'a> { pub(crate) span: Span<'a>, pub(crate) identifier: Option>, - pub(crate) ws1: Option>, + pub(crate) ws1: Option>, pub(crate) open_paren: Token<'a>, - pub(crate) ws2: Option>, + pub(crate) ws2: Option>, pub(crate) terms: Option>>, - pub(crate) ws3: Option>, + pub(crate) ws3: Option>, pub(crate) close_paren: Token<'a>, } impl AstNode for Tuple<'_> { From fb8b25ceface1719889bb2df0b0a35549d5d33b3 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 29 May 2024 14:08:47 +0200 Subject: [PATCH 098/214] Fix test --- nemo/src/io/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 3a1394708..0bb7945a0 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -3857,7 +3857,7 @@ mod new { span: s!(85, 1, "@import"), }, ws1: Wsoc { - span: s!(91, 1, " "), + span: s!(92, 1, " "), token: vec![Token { kind: TokenKind::Whitespace, span: s!(92, 1, " "), From 57efb99f3f6f5e8b73958a08ecd38e3faa79bac7 Mon Sep 17 00:00:00 2001 From: logicallangs <> Date: Thu, 23 May 2024 11:44:08 +0100 Subject: [PATCH 099/214] feat: add language server --- Cargo.lock | 201 +++++-- Cargo.toml | 3 + nemo-language-server/Cargo.toml | 30 + nemo-language-server/README.md | 0 nemo-language-server/src/language_server.rs | 544 ++++++++++++++++++ .../src/language_server/nemo_position.rs | 69 +++ nemo-language-server/src/lib.rs | 19 + nemo-language-server/src/main.rs | 15 + nemo-wasm/Cargo.toml | 3 + nemo-wasm/README.md | 50 +- nemo-wasm/src/language_server.rs | 85 +++ nemo-wasm/src/lib.rs | 2 + nemo/Cargo.toml | 1 + nemo/src/io/lexer.rs | 31 +- nemo/src/io/parser.rs | 64 ++- nemo/src/io/parser/ast.rs | 102 +++- nemo/src/io/parser/ast/atom.rs | 41 +- nemo/src/io/parser/ast/directive.rs | 17 +- nemo/src/io/parser/ast/map.rs | 59 +- nemo/src/io/parser/ast/program.rs | 27 +- nemo/src/io/parser/ast/statement.rs | 27 +- nemo/src/io/parser/ast/term.rs | 98 +++- nemo/src/io/parser/ast/tuple.rs | 33 +- nemo/src/io/parser/types.rs | 34 +- 24 files changed, 1361 insertions(+), 194 deletions(-) create mode 100644 nemo-language-server/Cargo.toml create mode 100644 nemo-language-server/README.md create mode 100644 nemo-language-server/src/language_server.rs create mode 100644 nemo-language-server/src/language_server/nemo_position.rs create mode 100644 nemo-language-server/src/lib.rs create mode 100644 nemo-language-server/src/main.rs create mode 100644 nemo-wasm/src/language_server.rs diff --git a/Cargo.lock b/Cargo.lock index 46134e7f8..31fb2db39 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -147,6 +147,17 @@ dependencies = [ "tempfile", ] +[[package]] +name = "async-trait" +version = "0.1.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -164,6 +175,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "auto_impl" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c87f3f15e7794432337fc718554eaa4dc8f04c9677a950ffe366f20a162ae42" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "autocfg" version = "1.3.0" @@ -376,16 +398,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "console_error_panic_hook" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" -dependencies = [ - "cfg-if", - "wasm-bindgen", -] - [[package]] name = "core-foundation" version = "0.9.4" @@ -457,6 +469,19 @@ dependencies = [ "memchr", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "delegate" version = "0.12.0" @@ -679,6 +704,20 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -686,6 +725,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -694,6 +734,23 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -712,10 +769,16 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -1068,6 +1131,19 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lsp-types" +version = "0.94.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1" +dependencies = [ + "bitflags 1.3.2", + "serde", + "serde_json", + "serde_repr", + "url", +] + [[package]] name = "macros" version = "0.0.1" @@ -1186,6 +1262,7 @@ dependencies = [ "test-log", "thiserror", "tokio", + "tower-lsp", "unicode-ident", ] @@ -1245,21 +1322,6 @@ dependencies = [ "pyo3", ] -[[package]] -name = "nemo-wasm" -version = "0.5.2-dev" -dependencies = [ - "console_error_panic_hook", - "js-sys", - "nemo", - "nemo-physical", - "thiserror", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-bindgen-test", - "web-sys", -] - [[package]] name = "nom" version = "5.1.3" @@ -1953,12 +2015,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - [[package]] name = "scopeguard" version = "1.2.0" @@ -2019,6 +2075,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2312,6 +2379,40 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +[[package]] +name = "tower-lsp" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ba052b54a6627628d9b3c34c176e7eda8359b7da9acd497b9f20998d118508" +dependencies = [ + "async-trait", + "auto_impl", + "bytes", + "dashmap", + "futures", + "httparse", + "lsp-types", + "memchr", + "serde", + "serde_json", + "tokio", + "tokio-util", + "tower", + "tower-lsp-macros", + "tracing", +] + +[[package]] +name = "tower-lsp-macros" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "tower-service" version = "0.3.2" @@ -2325,9 +2426,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "tracing-core" version = "0.1.32" @@ -2420,6 +2533,7 @@ dependencies = [ "form_urlencoded", "idna", "percent-encoding", + "serde", ] [[package]] @@ -2552,31 +2666,6 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" -[[package]] -name = "wasm-bindgen-test" -version = "0.3.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9bf62a58e0780af3e852044583deee40983e5886da43a271dd772379987667b" -dependencies = [ - "console_error_panic_hook", - "js-sys", - "scoped-tls", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-bindgen-test-macro", -] - -[[package]] -name = "wasm-bindgen-test-macro" -version = "0.3.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - [[package]] name = "web-sys" version = "0.3.69" diff --git a/Cargo.toml b/Cargo.toml index 03c3bb78d..cde9888d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,9 @@ members = [ "nemo-cli", "nemo-physical", "nemo-python", +] +exclude = [ + "nemo-language-server", "nemo-wasm", ] diff --git a/nemo-language-server/Cargo.toml b/nemo-language-server/Cargo.toml new file mode 100644 index 000000000..6e16e33f3 --- /dev/null +++ b/nemo-language-server/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "nemo-language-server" +description.workspace = true +version.workspace = true +authors.workspace = true +edition.workspace = true +homepage.workspace = true +license.workspace = true +readme = "README.md" +repository.workspace = true +default-run = "nemo-language-server" + +[[bin]] +name = "nemo-language-server" +path = "src/main.rs" +required-features = ["tokio"] + +[features] +default = ["tokio"] +# Allows building for web assembly environments +js = [] +tokio = ["dep:tokio"] + +[dependencies] +line-index = "0.1.1" +nemo = { path = "../nemo", default-features = false } +futures = "0.3.21" +tokio = { version = "1.27.0", features = ["full"], optional = true } +tower-lsp = { version = "0.20.0", default-features = false } +tower-service = "0.3.2" diff --git a/nemo-language-server/README.md b/nemo-language-server/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs new file mode 100644 index 000000000..35acce8c6 --- /dev/null +++ b/nemo-language-server/src/language_server.rs @@ -0,0 +1,544 @@ +use std::collections::HashMap; +use std::vec; + +use futures::lock::Mutex; +use line_index::{LineCol, LineIndex, WideEncoding}; +use nemo::io::parser::ast::program::Program; +use nemo::io::parser::ast::{AstNode, Position}; +use nemo::io::parser::new::parse_program_str; +use nemo_position::{ + lsp_position_to_nemo_position, nemo_position_to_lsp_position, PositionConversionError, +}; +use tower_lsp::lsp_types::{ + CompletionOptions, Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, + DocumentChangeOperation, DocumentChanges, DocumentSymbol, DocumentSymbolOptions, + DocumentSymbolParams, DocumentSymbolResponse, InitializeParams, InitializeResult, + InitializedParams, Location, MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, + PrepareRenameResponse, Range, ReferenceParams, RenameOptions, RenameParams, ServerCapabilities, + TextDocumentEdit, TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, + TextEdit, Url, VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit, +}; +use tower_lsp::{Client, LanguageServer}; + +mod nemo_position; + +#[derive(Debug)] +pub struct Backend { + client: Client, + state: Mutex, // TODO: Replace with RwLock, see https://github.com/rust-lang/futures-rs/pull/2082 +} + +#[derive(Debug)] +pub(crate) struct BackendState { + text_document_store: HashMap, +} + +#[derive(Debug, Clone)] +struct TextDocumentInfo { + /// Content of the text document + text: String, + // Version information so that the language client can check if the server operated on the up to date version + version: i32, +} + +/// Converts a source position to a LSP position +pub(crate) fn line_col_to_position( + line_index: &LineIndex, + line_col: LineCol, +) -> Result { + let wide_line_col = line_index + .to_wide(WideEncoding::Utf16, line_col) + .ok_or(())?; + + Ok(tower_lsp::lsp_types::Position { + line: wide_line_col.line, + character: wide_line_col.col, + }) +} + +impl Backend { + pub fn new(client: Client) -> Self { + Self { + client, + state: Mutex::new(BackendState { + text_document_store: HashMap::new(), + }), + } + } + + async fn handle_change(&self, text_document: VersionedTextDocumentIdentifier, text: &str) { + self.state.lock().await.text_document_store.insert( + text_document.uri.clone(), + TextDocumentInfo { + text: text.to_string(), + version: text_document.version, + }, + ); + + let line_index = LineIndex::new(text); + + let (_program, errors) = parse_program_str(text); + + let diagnostics = errors + .into_iter() + .map(|error| Diagnostic { + message: error.1, + range: Range::new( + line_col_to_position( + &line_index, + LineCol { + line: error.0.line - 1, + col: error.0.column - 1, + }, + ) + .unwrap(), + line_col_to_position( + &line_index, + LineCol { + line: error.0.line - 1, + col: error.0.column - 1 + 1, + }, + ) + .unwrap(), + ), + ..Default::default() + }) + .collect(); + + self.client + .publish_diagnostics( + text_document.uri.clone(), + diagnostics, + Some(text_document.version), + ) + .await; + } + + async fn read_text_document_info(&self, uri: &Url) -> Option { + if let Some(info) = self.state.lock().await.text_document_store.get(uri) { + let a = info.clone(); + Some(a) + } else { + self.client + .log_message( + MessageType::ERROR, + "could not find text document with URI {uri}", + ) + .await; + None + } + } +} + +#[tower_lsp::async_trait] +impl LanguageServer for Backend { + async fn initialize( + &self, + _: InitializeParams, + ) -> tower_lsp::jsonrpc::Result { + Ok(InitializeResult { + capabilities: ServerCapabilities { + text_document_sync: Some(TextDocumentSyncCapability::Kind( + TextDocumentSyncKind::FULL, + )), + references_provider: Some(OneOf::Left(true)), + document_symbol_provider: Some(OneOf::Right(DocumentSymbolOptions { + label: Some("Nemo".to_string()), + work_done_progress_options: WorkDoneProgressOptions { + ..Default::default() + }, + })), + rename_provider: Some(OneOf::Right(RenameOptions { + prepare_provider: Some(true), + work_done_progress_options: WorkDoneProgressOptions { + ..Default::default() + }, + })), + completion_provider: Some(CompletionOptions { + work_done_progress_options: WorkDoneProgressOptions { + ..Default::default() + }, + ..Default::default() + }), + + ..Default::default() + }, + ..Default::default() + }) + } + + async fn initialized(&self, _: InitializedParams) { + self.client + .log_message(MessageType::INFO, "server initialized") + .await; + } + + async fn did_open(&self, params: DidOpenTextDocumentParams) { + self.handle_change( + VersionedTextDocumentIdentifier { + uri: params.text_document.uri, + version: params.text_document.version, + }, + ¶ms.text_document.text, + ) + .await; + } + + async fn did_change(&self, params: DidChangeTextDocumentParams) { + self.handle_change(params.text_document, ¶ms.content_changes[0].text) + .await; + } + + async fn references( + &self, + params: ReferenceParams, + ) -> tower_lsp::jsonrpc::Result>> { + let info = self + .read_text_document_info(¶ms.text_document_position.text_document.uri) + .await; + + match info { + Some(info) => { + let text = info.text; + let line_index = LineIndex::new(&text); + let position = lsp_position_to_nemo_position( + &line_index, + params.text_document_position.position, + ) + .unwrap(); // TODO handle unwrap + + let program = parse_program_str(&text); + let program = program.0; + + let node_path = find_in_ast(&program, position); + + // Get most identifier most specific to the position + let indentified_node = node_path_deepest_identifier(&node_path); + let indentified_node = match indentified_node { + Some(indentified_node) => indentified_node, + None => return Ok(None), + }; + + // Find other AST nodes with the same global identifier + let referenced_nodes = + find_by_identifier(indentified_node.scoping_node, &indentified_node.identifier); + + let locations = referenced_nodes + .iter() + .map(|node| Location { + uri: params.text_document_position.text_document.uri.clone(), + range: node_to_range_lsp(&line_index, *node), + }) + .collect(); + + Ok(Some(locations)) + } + None => Ok(None), // TODO: Handle error + } + } + + async fn document_symbol( + &self, + params: DocumentSymbolParams, + ) -> tower_lsp::jsonrpc::Result> { + let info = self + .read_text_document_info(¶ms.text_document.uri) + .await; + + match info { + Some(info) => { + let text = info.text; + let line_index = LineIndex::new(&text); + + let program = parse_program_str(&text); + let program = program.0; + + let document_symbol = ast_node_to_document_symbol(&line_index, &program); + + if let Ok(document_symbol) = document_symbol { + return Ok(document_symbol.map(|document_symbol| { + DocumentSymbolResponse::Nested(document_symbol.children.unwrap()) + })); + } + + Ok(None) + } + None => Ok(None), // TODO: Handle error + } + } + + /// Finds references to symbol that was renamed and sends edit operations to language client + async fn rename( + &self, + params: RenameParams, + ) -> tower_lsp::jsonrpc::Result> { + let info = self + .read_text_document_info(¶ms.text_document_position.text_document.uri) + .await; + + let info = match info { + Some(info) => info, + None => return Ok(None), + }; + + let text = info.text; + let line_index = LineIndex::new(&text); + let position = + lsp_position_to_nemo_position(&line_index, params.text_document_position.position) + .unwrap(); + + let program = parse_program_str(&text); + let program = program.0; + + let node_path = find_in_ast(&program, position); + + // Get most identifier most specific to the position + let indentified_node = node_path_deepest_identifier(&node_path); + let indentified_node = match indentified_node { + Some(indentified_node) => indentified_node, + None => return Ok(None), + }; + + // Find other AST nodes with the same global identifier + let referenced_nodes = + find_by_identifier(indentified_node.scoping_node, &indentified_node.identifier); + + let edit = TextDocumentEdit { + text_document: OptionalVersionedTextDocumentIdentifier { + uri: params.text_document_position.text_document.uri, + version: Some(info.version), + }, + edits: referenced_nodes + .into_iter() + .filter_map(|node| { + node.lsp_sub_node_to_rename().map(|renamed_node| { + OneOf::Left(TextEdit { + range: node_to_range_lsp(&line_index, renamed_node), + new_text: params.new_name.clone(), + }) + }) + }) + .collect(), + }; + + Ok(Some(WorkspaceEdit { + document_changes: Some(DocumentChanges::Operations(vec![ + DocumentChangeOperation::Edit(edit), + ])), + ..Default::default() + })) + } + + /// Tells the language client the range of the token that will be renamed + async fn prepare_rename( + &self, + params: TextDocumentPositionParams, + ) -> tower_lsp::jsonrpc::Result> { + let info = self + .read_text_document_info(¶ms.text_document.uri) + .await; + + let info = match info { + Some(info) => info, + None => return Ok(None), + }; + + let text = info.text; + let line_index = LineIndex::new(&text); + let position = lsp_position_to_nemo_position(&line_index, params.position).unwrap(); + + let program = parse_program_str(&text); + let program = program.0; + + let node_path = find_in_ast(&program, position); + + // Get identifier most specific to the position + let indentified_node = node_path_deepest_identifier(&node_path); + + match indentified_node { + Some(indentified_node) => { + Ok(indentified_node + .node + .lsp_sub_node_to_rename() + .map(|renamed_node| { + PrepareRenameResponse::Range(node_to_range_lsp(&line_index, renamed_node)) + })) + } + None => Ok(None), + } + } + + async fn shutdown(&self) -> tower_lsp::jsonrpc::Result<()> { + Ok(()) + } +} + +struct IdentifiedNode<'a> { + node: &'a dyn AstNode, + identifier: String, + scoping_node: &'a dyn AstNode, +} + +struct PariallyIdentifiedNode<'a> { + node: &'a dyn AstNode, + identifier: String, + identifier_scope: String, +} + +/// Get identifier most specific to the position of the node path +fn node_path_deepest_identifier<'a>(node_path: &[&'a dyn AstNode]) -> Option> { + let mut info = None; + + for node in node_path.iter().rev() { + match info { + None => { + if let Some((identifier, identifier_scope)) = node.lsp_identifier() { + info = Some(PariallyIdentifiedNode { + node: *node, + identifier, + identifier_scope, + }); + } + } + Some(ref info) => { + if let Some(parent_identifier) = node.lsp_identifier() + && parent_identifier.0.starts_with(&info.identifier_scope) + { + return Some(IdentifiedNode { + node: info.node, + identifier: info.identifier.clone(), + scoping_node: *node, + }); + } + } + } + } + + return info.map(|info| IdentifiedNode { + node: info.node, + identifier: info.identifier, + scoping_node: *node_path.first().unwrap(), + }); +} + +fn find_by_identifier<'a>(node: &'a dyn AstNode, identifier: &str) -> Vec<&'a dyn AstNode> { + let mut references = Vec::new(); + + find_by_identifier_recurse(node, identifier, &mut references); + + references +} + +fn find_by_identifier_recurse<'a>( + node: &'a dyn AstNode, + identifier: &str, + references: &mut Vec<&'a dyn AstNode>, +) { + if node + .lsp_identifier() + .map(|(i, _)| i == identifier) + .unwrap_or(false) + { + references.push(node); + } + + if let Some(children) = node.children() { + for child in children { + find_by_identifier_recurse(child, identifier, references); + } + }; +} + +fn find_in_ast<'a>(node: &'a Program<'a>, position: Position) -> Vec<&'a dyn AstNode> { + let mut path = Vec::new(); + + find_in_ast_recurse(node, position, &mut path); + + path +} + +fn find_in_ast_recurse<'a>( + node: &'a dyn AstNode, + position: Position, + path: &mut Vec<&'a dyn AstNode>, +) { + path.push(node); + + if let Some(children) = node.children() { + for (child, next_child) in children.iter().zip(children.iter().skip(1)) { + if next_child.position() > position { + find_in_ast_recurse(*child, position, path); + return; + } + } + if let Some(child) = children.last() { + find_in_ast_recurse(*child, position, path); + } + }; +} + +fn node_to_range_lsp(line_index: &LineIndex, node: &dyn AstNode) -> Range { + Range { + start: nemo_position_to_lsp_position(line_index, node.position()).unwrap(), // TODO: Improve error handling + end: nemo_position_to_lsp_position( + line_index, + Position { + offset: node.position().offset + node.span().len(), + line: node.position().line + node.span().fragment().lines().count() as u32 - 1, + column: if node.span().fragment().lines().count() > 1 { + 1 + node.span().fragment().lines().last().unwrap().len() // TODO: Check if length is in correct encoding + as u32 + } else { + node.position().column + node.span().fragment().len() as u32 + // TODO: Check if length is in correct encoding + }, + }, + ) + .unwrap(), + } +} + +fn ast_node_to_document_symbol( + line_index: &LineIndex, + node: &dyn AstNode, +) -> Result, PositionConversionError> { + let range = node_to_range_lsp(line_index, node); + + let selection_range = range; + + if let Some((name, kind)) = node.lsp_symbol_info() { + let children_results: Vec<_> = node + .children() + .into_iter() + .flatten() + .map(|child| ast_node_to_document_symbol(line_index, child)) + .collect(); + let mut children = Vec::with_capacity(children_results.len()); + for child_result in children_results { + child_result? + .into_iter() + .for_each(|symbol| children.push(symbol)) + } + let children = if children.is_empty() { + None + } else { + Some(children) + }; + + Ok(Some( + #[allow(deprecated)] + DocumentSymbol { + children, + detail: None, + kind, + name, + range, + selection_range, + tags: None, + deprecated: None, + }, + )) + } else { + Ok(None) + } +} diff --git a/nemo-language-server/src/language_server/nemo_position.rs b/nemo-language-server/src/language_server/nemo_position.rs new file mode 100644 index 000000000..4e155166e --- /dev/null +++ b/nemo-language-server/src/language_server/nemo_position.rs @@ -0,0 +1,69 @@ +//! LSP position: +//! +//! * line: u32 index of the line, first line gets index 0 +//! * offset: u32 index of the UTF-16 code point within the line, first column gets index 0 +//! +//! Nemo position: +//! +//! * line: u32 index of the line, first line gets index 1 +//! * offset: u32 index of the UTF-8 code point (byte) within the line, first column gets index 0 + +use line_index::{LineCol, LineIndex, WideEncoding, WideLineCol}; + +#[derive(Debug)] +pub enum PositionConversionError { + NemoPosition(nemo::io::parser::ast::Position), + LspPosition(tower_lsp::lsp_types::Position), +} + +fn line_col_to_nemo_position( + line_index: &LineIndex, + line_col: LineCol, +) -> Result { + Ok(nemo::io::parser::ast::Position { + line: line_col.line + 1, + column: line_col.col, + offset: line_index.offset(line_col).ok_or(())?.into(), + }) +} + +/// Converts a LSP position to a Nemo parser position +pub fn lsp_position_to_nemo_position( + line_index: &LineIndex, + position: tower_lsp::lsp_types::Position, +) -> Result { + let line_col = line_index + .to_utf8( + WideEncoding::Utf16, + WideLineCol { + line: position.line, + col: position.character, + }, + ) + .ok_or(PositionConversionError::LspPosition(position))?; + + Ok(line_col_to_nemo_position(line_index, line_col).unwrap()) +} + +fn nemo_position_to_line_col(position: nemo::io::parser::ast::Position) -> LineCol { + LineCol { + line: position.line - 1, + col: position.column - 1, + } +} + +/// Converts a source position to a LSP position +pub fn nemo_position_to_lsp_position( + line_index: &LineIndex, + position: nemo::io::parser::ast::Position, +) -> Result { + // TODO: Find out what UTF encoding nemo parser uses + let wide_line_col = line_index + .to_wide(WideEncoding::Utf16, nemo_position_to_line_col(position)) + .ok_or(PositionConversionError::NemoPosition(position))?; + + Ok(tower_lsp::lsp_types::Position { + line: wide_line_col.line, + character: wide_line_col.col, + }) +} diff --git a/nemo-language-server/src/lib.rs b/nemo-language-server/src/lib.rs new file mode 100644 index 000000000..de15066cd --- /dev/null +++ b/nemo-language-server/src/lib.rs @@ -0,0 +1,19 @@ +#![feature(let_chains)] + +pub use language_server::Backend; + +pub use tower_lsp::{ + jsonrpc::{Request, Response}, + ClientSocket, ExitedError, LspService, +}; +pub use tower_service::Service; + +mod language_server; + +pub fn create_language_server() -> (LspService, ClientSocket) { + LspService::new(Backend::new) +} + +// // See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features +#[cfg(all(feature = "js", feature = "tokio"))] +compile_error!("feature \"js\" and feature \"tokio\" cannot be enabled at the same time"); diff --git a/nemo-language-server/src/main.rs b/nemo-language-server/src/main.rs new file mode 100644 index 000000000..6a000d4af --- /dev/null +++ b/nemo-language-server/src/main.rs @@ -0,0 +1,15 @@ +#![feature(let_chains)] + +use language_server::Backend; +use tower_lsp::{LspService, Server}; + +mod language_server; + +#[tokio::main] +async fn main() { + let stdin = tokio::io::stdin(); + let stdout = tokio::io::stdout(); + + let (service, socket) = LspService::new(Backend::new); + Server::new(stdin, stdout, socket).serve(service).await; +} diff --git a/nemo-wasm/Cargo.toml b/nemo-wasm/Cargo.toml index 5054e1b31..432406182 100644 --- a/nemo-wasm/Cargo.toml +++ b/nemo-wasm/Cargo.toml @@ -21,6 +21,9 @@ console_error_panic_hook = "0.1.7" js-sys = "0.3.63" nemo = { path = "../nemo", features = [ "js" ], default-features = false } nemo-physical = { path = "../nemo-physical", default-features = false } +nemo-language-server = { path = "../nemo-language-server", features = [ "js" ], default-features = false} +futures = "0.3.21" +gloo-utils = { version = "0.1", features = ["serde"] } thiserror = "1.0" wasm-bindgen = "0.2" wasm-bindgen-futures = "0.4.37" diff --git a/nemo-wasm/README.md b/nemo-wasm/README.md index b979fbf03..f33d4b7fe 100644 --- a/nemo-wasm/README.md +++ b/nemo-wasm/README.md @@ -7,17 +7,17 @@ This crate provides a Web Assembly build and JavaScript/TypeScript bindings for ## Building -- Install [wasm-pack](https://rustwasm.github.io/wasm-pack/book/prerequisites/index.html) -- Build the library: +- Install [wasm-pack](https://rustwasm.github.io/wasm-pack/book/prerequisites/index.html) +- Build the library: ``` -wasm-pack build --target bundler --weak-refs -wasm-pack build --target bundler --weak-refs --release +wasm-pack build --out-dir nemoWASMBundler --target bundler --weak-refs --release +wasm-pack build --out-dir nemoWASMWeb --target web --weak-refs --release ``` -- In order to use the `FileSystemSyncAccessHandle` APIs, the `web_sys_unstable_apis` `cfg` flag needs to be set - - See https://rustwasm.github.io/docs/wasm-bindgen/web-sys/unstable-apis.html - - See https://rustwasm.github.io/wasm-bindgen/api/web_sys/struct.FileSystemSyncAccessHandle.html +- In order to use the `FileSystemSyncAccessHandle` APIs, the `web_sys_unstable_apis` `cfg` flag needs to be set + - See https://rustwasm.github.io/docs/wasm-bindgen/web-sys/unstable-apis.html + - See https://rustwasm.github.io/wasm-bindgen/api/web_sys/struct.FileSystemSyncAccessHandle.html ## Example usage @@ -31,21 +31,21 @@ const engine = new NemoEngine(program); engine.reason(); for (const predicate of program.getOutputPredicates()) { - const rows = new NemoResultsIterable(engine.getResult(predicate)); + const rows = new NemoResultsIterable(engine.getResult(predicate)); - for (const row of rows) { - console.log(row); - } + for (const row of rows) { + console.log(row); + } } // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators // Iterables are not directly supported yet, see https://github.com/rustwasm/wasm-bindgen/issues/1478 class NemoResultsIterable { - public constructor(private iterator: NemoResults) {} + public constructor(private iterator: NemoResults) {} - public [Symbol.iterator]() { - return this.iterator; - } + public [Symbol.iterator]() { + return this.iterator; + } } ``` @@ -59,22 +59,22 @@ const engine = new NemoEngine(program); engine.reason(); for (const predicate of program.getOutputPredicates()) { - const rows = new NemoResultsIterable(engine.getResult(predicate)); + const rows = new NemoResultsIterable(engine.getResult(predicate)); - for (const row of rows) { - console.log(row); - } + for (const row of rows) { + console.log(row); + } } // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators // Iterables are not directly supported yet, see https://github.com/rustwasm/wasm-bindgen/issues/1478 class NemoResultsIterable { - constructor(iterator) { - this.iterator = iterator; - } + constructor(iterator) { + this.iterator = iterator; + } - [Symbol.iterator]() { - return this.iterator; - } + [Symbol.iterator]() { + return this.iterator; + } } ``` diff --git a/nemo-wasm/src/language_server.rs b/nemo-wasm/src/language_server.rs new file mode 100644 index 000000000..046bb0114 --- /dev/null +++ b/nemo-wasm/src/language_server.rs @@ -0,0 +1,85 @@ +use std::pin::Pin; + +use futures::{FutureExt, SinkExt, StreamExt}; +use futures::{Sink, Stream}; +use gloo_utils::format::JsValueSerdeExt; +use js_sys::{Array, Promise}; +use nemo_language_server::{ + create_language_server, Backend, ExitedError, LspService, Request, Response, Service, +}; +use wasm_bindgen::prelude::wasm_bindgen; +use wasm_bindgen::JsValue; +use wasm_bindgen_futures::future_to_promise; + +/// Creates a Nemo language server +/// The server is split up into mutliple parts to allow concurrent sending/waiting for server/client-bound requests/responses. +/// To enable this with `wasm_bindgen`, multiple structs are required to ensure exclusive access, see https://stackoverflow.com/questions/75712197/rust-wasm-bindgen-recursive-use-of-an-object-detected-which-would-lead-to-unsaf#77013978 . +#[wasm_bindgen(js_name = "createNemoLanguageServer")] +pub fn create_nemo_language_server() -> JsValue { + let (service, socket) = create_language_server(); + + let (request_stream, responses_sink) = socket.split(); + + let (a, b, c) = ( + NemoLspChannelClientInitiated(service), + NemoLspRequestsServerInitiated(Box::pin(request_stream)), + NemoLspResponsesServerInitiated(Box::pin(responses_sink)), + ); + + let (a, b, c): (JsValue, JsValue, JsValue) = (a.into(), b.into(), c.into()); + + let array = Array::new(); + + array.push(&a); + array.push(&b); + array.push(&c); + + array.into() +} + +/// Handles requests initiated by the server +#[wasm_bindgen] +pub struct NemoLspRequestsServerInitiated(Pin>>); + +/// Handles responses corresponding to requests initiated by the server +#[wasm_bindgen] +pub struct NemoLspResponsesServerInitiated(Pin>>); + +#[wasm_bindgen] +impl NemoLspRequestsServerInitiated { + #[wasm_bindgen(js_name = "getNextRequest")] + pub async fn next_request(&mut self) -> JsValue { + let request = self.0.next().await; + + JsValue::from_serde(&request).unwrap() + } +} + +#[wasm_bindgen] +impl NemoLspResponsesServerInitiated { + /// Only one response may be sent at a time, wait for the promise to resolve before sending the next response + #[wasm_bindgen(js_name = "sendResponse")] + pub async fn send_response(&mut self, response_json_object: JsValue) { + let response = response_json_object.into_serde().unwrap(); + + self.0.send(response).await.unwrap(); + } +} + +/// Handles requests initiated by the client and the corresponding responses +#[wasm_bindgen] +pub struct NemoLspChannelClientInitiated(LspService); + +#[wasm_bindgen] +impl NemoLspChannelClientInitiated { + #[wasm_bindgen(js_name = "sendRequest")] + pub fn send_request(&mut self, request_json_object: JsValue) -> Promise { + let request = request_json_object.into_serde().unwrap(); + + future_to_promise( + self.0 + .call(request) + .map(|response| Result::Ok(JsValue::from_serde(&response.unwrap()).unwrap())), + ) + } +} diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index d3be7676e..b5d7e8301 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -34,6 +34,8 @@ use wasm_bindgen::JsValue; use web_sys::Blob; use web_sys::FileReaderSync; +mod language_server; + #[wasm_bindgen] #[derive(Clone)] pub struct NemoProgram(nemo::model::Program); diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 173a3de22..ea6dfc52e 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -44,6 +44,7 @@ bytesize = "1.2" ascii_tree = "0.1.1" serde_json = "1.0.108" serde = {version = "1.0.138", features = ["derive"] } +tower-lsp = "0.20.0" dyn-clone = "1.0.16" unicode-ident = "1.0.12" diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 555e9e74f..c0e1c90b4 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -4,14 +4,15 @@ use std::{cell::RefCell, ops::Range}; use nom::{ branch::alt, - bytes::complete::{is_not, tag, take, take_till, take_while}, + bytes::complete::{is_not, tag, take, take_till}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, combinator::{all_consuming, cut, map, recognize}, - error::{ContextError, ParseError}, + error::{ParseError}, multi::{many0, many1}, sequence::{delimited, pair, tuple}, }; use nom_locate::LocatedSpan; +use tower_lsp::lsp_types::SymbolKind; #[derive(Debug)] pub(crate) enum NewParseError { @@ -23,7 +24,7 @@ pub(crate) enum NewParseError { SyntaxError(String), MissingTlDocComment, } -impl nom::error::ParseError> for NewParseError { +impl ParseError> for NewParseError { fn from_error_kind(input: Input, kind: nom::error::ErrorKind) -> Self { NewParseError::SyntaxError(kind.description().to_string()) } @@ -36,12 +37,12 @@ impl nom::error::ParseError> for NewParseError { pub(crate) type IResult = nom::IResult; use super::parser::{ - ast::Position, + ast::{AstNode, Position}, types::{Input, Label, ToRange}, }; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct Error(pub(crate) Position, pub(crate) String); +pub struct Error(pub Position, pub String); #[derive(Debug, Clone, Copy, PartialEq)] pub(crate) struct ParserState<'a> { @@ -64,7 +65,7 @@ impl ToRange for Span<'_> { } } -pub(crate) fn to_range<'a>(span: Span<'a>) -> Range { +pub(crate) fn to_range(span: Span<'_>) -> Range { let start = span.location_offset(); let end = start + span.fragment().len(); start..end @@ -228,7 +229,7 @@ impl std::fmt::Display for TokenKind { } #[derive(Debug, Copy, Clone, PartialEq)] -pub(crate) struct Token<'a> { +pub struct Token<'a> { pub(crate) kind: TokenKind, pub(crate) span: Span<'a>, } @@ -260,8 +261,8 @@ impl std::fmt::Display for Token<'_> { } } } -impl<'a> crate::io::parser::ast::AstNode for Token<'a> { - fn children(&self) -> Option> { +impl<'a> AstNode for Token<'a> { + fn children(&self) -> Option> { None::> } @@ -281,6 +282,18 @@ impl<'a> crate::io::parser::ast::AstNode for Token<'a> { true } + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None + } + fn name(&self) -> String { String::from("Token") } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 0bb7945a0..b06986d68 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -22,7 +22,7 @@ use nom::{ use macros::traced; -pub(crate) mod ast; +pub mod ast; pub(crate) mod types; use types::{ConstraintOperator, IntermediateResult, Span}; @@ -2429,45 +2429,45 @@ mod test { } /// NEW PARSER -mod new { +pub mod new { use std::cell::RefCell; use super::ast::{ atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, Wsoc, }; - use super::types::{Input, Label, ParserLabel, ToRange}; + use super::types::{Input, Label, ParserLabel}; use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, - lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, - lex_whitespace, map_err, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, - slash, star, tilde, underscore, unequal, Error, NewParseError, ParserState, Span, Token, + lex_ident, lex_iri, lex_number, lex_string, lex_toplevel_doc_comment, + lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, + slash, star, tilde, underscore, unequal, Error, ParserState, Span, Token, TokenKind, }; - use crate::io::parser::ast::AstNode; - use nom::combinator::{all_consuming, cut, map, opt, recognize}; - use nom::error::{context, ContextError, ParseError}; - use nom::sequence::{delimited, pair}; + + use nom::combinator::{all_consuming, opt, recognize}; + use nom::error::{ParseError}; + use nom::sequence::{pair}; use nom::Parser; use nom::{ branch::alt, combinator::verify, - multi::{many0, many1, separated_list0}, + multi::{many0, many1}, sequence::tuple, IResult, }; fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { unsafe { - let span = Span::new_from_raw_offset( + + // dbg!(&input, &span, &rest_input); + Span::new_from_raw_offset( input.location_offset(), input.location_line(), &input[..(rest_input.location_offset() - input.location_offset())], (), - ); - // dbg!(&input, &span, &rest_input); - span + ) } } @@ -2516,14 +2516,14 @@ mod new { 'e, O: Copy, E: ParseError>, - F: nom::Parser, O, E>, + F: Parser, O, E>, >( mut parser: F, error_msg: impl ToString, error_output: O, errors: ParserState<'e>, ) -> impl FnMut(Input<'a, 'e>) -> IResult, O, E> { - move |input| match parser.parse(input.clone()) { + move |input| match parser.parse(input) { Ok(result) => Ok(result), Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { let err = Error( @@ -2542,7 +2542,7 @@ mod new { } fn recover<'a, 'e, E>( - mut parser: impl nom::Parser, Statement<'a>, E>, + mut parser: impl Parser, Statement<'a>, E>, error_msg: impl ToString, errors: ParserState<'e>, ) -> impl FnMut(Input<'a, 'e>) -> IResult, Statement<'a>, E> { @@ -2567,7 +2567,7 @@ mod new { } fn report_label<'a, 's, O, E>( - mut parser: impl nom::Parser, O, E>, + mut parser: impl Parser, O, E>, label: ParserLabel, ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { move |input| match parser.parse(input) { @@ -2594,7 +2594,7 @@ mod new { } fn report_error<'a, 's, O, E>( - mut parser: impl nom::Parser, O, E>, + mut parser: impl Parser, O, E>, ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { move |input| match parser.parse(input) { Ok(result) => { @@ -2612,7 +2612,7 @@ mod new { .into_iter(); for label in labels { if let Some(last) = furthest_errors.last() { - if label.pos.offset >= (*last).0.offset { + if label.pos.offset >= last.0.offset { let err = Error(label.pos, format!("expected {:?}", label.label)); furthest_errors.push(err); @@ -2679,7 +2679,7 @@ mod new { } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a, 'e>(input: Input<'a, 'e>) -> (Program<'a>, Vec) { + fn parse_program<'a>(input: Input<'a, '_>) -> (Program<'a>, Vec) { let (rest_input, (tl_doc_comment, statements)) = all_consuming(pair( opt(lex_toplevel_doc_comment), many1(recover( @@ -2706,6 +2706,20 @@ mod new { ) } + pub fn parse_program_str(input: &str) -> (Program<'_>, Vec) { + let refcell = RefCell::new(Vec::new()); + let labels = RefCell::new(Vec::new()); + let parser_state = ParserState { + errors: &refcell, + labels: &labels, + }; + let input = Input { + input: Span::new(input), + parser_state, + }; + parse_program(input) + } + /// Parse whitespace that is between directives, facts, rules and comments. fn parse_whitespace<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) @@ -3763,7 +3777,7 @@ mod new { } }], } - ) + ); } #[test] @@ -4034,7 +4048,7 @@ mod new { }), ], } - ) + ); } // #[test] @@ -4160,7 +4174,7 @@ mod new { }) ], } - ) + ); } #[test] diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 3ed70e89d..30d3a8aa6 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,37 +1,50 @@ use nom::Offset; +use tower_lsp::lsp_types::SymbolKind; use crate::io::lexer::{Span, Token}; +use ascii_tree::{write_tree, Tree}; use std::fmt::Display; -use ascii_tree::{Tree, write_tree}; pub(crate) mod atom; pub(crate) mod directive; pub(crate) mod map; -pub(crate) mod tuple; -pub(crate) mod program; +pub mod program; pub(crate) mod statement; pub(crate) mod term; +pub(crate) mod tuple; -pub(crate) trait AstNode: std::fmt::Debug + Display { +pub trait AstNode: std::fmt::Debug + Display + Sync { fn children(&self) -> Option>; fn span(&self) -> Span; fn position(&self) -> Position; fn is_token(&self) -> bool; + fn name(&self) -> String; + + /// Returns an optional pair of the identfier and identifier scope. + /// + /// The identifier scope will scope this identifier up to any [`AstNode`] + /// that has an identifier that has this node's identifier scope as a prefix. + /// + /// This can be used to restict rename operations to be local, e.g. for variable idenfiers inside of rules. + fn lsp_identifier(&self) -> Option<(String, String)>; + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)>; + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode>; } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct Position { - pub(crate) offset: usize, - pub(crate) line: u32, - pub(crate) column: u32, +// TODO: tidy up PartialOrd and Ord implementation +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Position { + pub offset: usize, + pub line: u32, + pub column: u32, } /// Whitespace or Comment token #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Wsoc<'a> { - pub(crate) span: Span<'a>, - pub(crate) token: Vec> +pub struct Wsoc<'a> { + pub span: Span<'a>, + pub token: Vec>, } impl AstNode for Wsoc<'_> { fn children(&self) -> Option> { @@ -48,7 +61,11 @@ impl AstNode for Wsoc<'_> { } fn position(&self) -> Position { - Position { offset: self.span.location_offset(), line: self.span.location_line(), column: self.span.get_utf8_column() as u32 } + Position { + offset: self.span.location_offset(), + line: self.span.location_line(), + column: self.span.get_utf8_column() as u32, + } } fn is_token(&self) -> bool { @@ -56,22 +73,39 @@ impl AstNode for Wsoc<'_> { } fn name(&self) -> String { - format!("Wsoc \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", self.span.location_line(), self.span.get_utf8_column(), self.span.fragment()) + format!( + "Wsoc \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) + } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None } } + impl Display for Wsoc<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() } } - #[derive(Debug, Clone, PartialEq)] -pub(crate) struct List<'a, T> { - pub(crate) span: Span<'a>, - pub(crate) first: T, +pub struct List<'a, T> { + pub span: Span<'a>, + pub first: T, // ([ws]?[,][ws]?[T])* - pub(crate) rest: Option>, Token<'a>, Option>, T)>>, + pub rest: Option>, Token<'a>, Option>, T)>>, } impl List<'_, T> { pub fn to_vec(&self) -> Vec { @@ -85,7 +119,7 @@ impl List<'_, T> { vec } } -impl std::iter::IntoIterator for List<'_, T> { +impl IntoIterator for List<'_, T> { type Item = T; type IntoIter = std::vec::IntoIter; @@ -137,9 +171,27 @@ impl AstNode for List<'_, T> { } fn name(&self) -> String { - format!("List \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", self.span.location_line(), self.span.get_utf8_column(), self.span.fragment()) + format!( + "List \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", + self.span.location_line(), + self.span.get_utf8_column(), + self.span.fragment() + ) + } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some((String::from("List"), SymbolKind::ARRAY)) } } + impl Display for List<'_, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); @@ -175,9 +227,9 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { } mod test { - use super::*; - use super::{atom::Atom, directive::Directive, tuple::Tuple, program::Program, statement::Statement, term::Term, term::Primitive}; - use crate::io::lexer::TokenKind; + + + macro_rules! s { ($offset:literal,$line:literal,$str:literal) => { @@ -203,7 +255,7 @@ mod test { let span = Span::new(input); let ast = Program { span, - tl_doc_comment: Some(Token { + tl_doc_comment: Some(Token { kind: TokenKind::TlDocComment, span: s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") }), diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 46dbf0ac4..5410ae9d1 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,3 +1,5 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::term::Term; use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode, Wsoc}; @@ -6,7 +8,7 @@ use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Atom<'a> { +pub enum Atom<'a> { Positive(Tuple<'a>), Negative { span: Span<'a>, @@ -23,6 +25,17 @@ pub(crate) enum Atom<'a> { }, Map(Map<'a>), } + +impl Atom<'_> { + fn tuple(&self) -> Option<&Tuple<'_>> { + match &self { + Atom::Positive(tuple) => Some(tuple), + Atom::Negative { atom, .. } => Some(atom), + _ => None, + } + } +} + impl AstNode for Atom<'_> { fn children(&self) -> Option> { match self { @@ -93,6 +106,32 @@ impl AstNode for Atom<'_> { Atom::Map(_) => name!("Map Atom"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + self.tuple().map(|tuple| ( + format!("atom/{}", tuple.identifier.unwrap().span().fragment()), + "file".to_string(), + )) + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + // TODO: + // match self.tuple() { + // Some(tuple) => Some(&tuple.identifier.unwrap()), + // None => None, + // } + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + match self.tuple() { + Some(tuple) => Some(( + format!("Atom: {}", tuple.identifier.unwrap().span.fragment()), + SymbolKind::FUNCTION, + )), + None => Some((String::from("Atom"), SymbolKind::FUNCTION)), + } + } } impl std::fmt::Display for Atom<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 8b04c3c91..415b584d4 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,10 +1,12 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::map::Map; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Directive<'a> { +pub enum Directive<'a> { // "@base ." Base { span: Span<'a>, @@ -261,7 +263,20 @@ impl AstNode for Directive<'_> { Directive::Output { .. } => name!("Output Directive"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some((String::from("Directive"), SymbolKind::FUNCTION)) + } } + impl std::fmt::Display for Directive<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 473d01e92..cdafbd7f9 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,3 +1,5 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::term::Term; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; @@ -5,15 +7,15 @@ use ascii_tree::write_tree; use std::fmt::Debug; #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Map<'a> { - pub(crate) span: Span<'a>, - pub(crate) identifier: Option>, - pub(crate) ws1: Option>, - pub(crate) open_brace: Token<'a>, - pub(crate) ws2: Option>, - pub(crate) pairs: Option, Term<'a>>>>, - pub(crate) ws3: Option>, - pub(crate) close_brace: Token<'a>, +pub struct Map<'a> { + pub span: Span<'a>, + pub identifier: Option>, + pub ws1: Option>, + pub open_brace: Token<'a>, + pub ws2: Option>, + pub pairs: Option, Term<'a>>>>, + pub ws3: Option>, + pub close_brace: Token<'a>, } impl AstNode for Map<'_> { fn children(&self) -> Option> { @@ -57,7 +59,20 @@ impl AstNode for Map<'_> { fn name(&self) -> String { String::from("Map") } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some((String::from("Map"), SymbolKind::STRUCT)) + } } + impl std::fmt::Display for Map<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); @@ -67,13 +82,13 @@ impl std::fmt::Display for Map<'_> { } #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Pair<'a, K, V> { - pub(crate) span: Span<'a>, - pub(crate) key: K, - pub(crate) ws1: Option>, - pub(crate) equal: Token<'a>, - pub(crate) ws2: Option>, - pub(crate) value: V, +pub struct Pair<'a, K, V> { + pub span: Span<'a>, + pub key: K, + pub ws1: Option>, + pub equal: Token<'a>, + pub ws2: Option>, + pub value: V, } impl AstNode for Pair<'_, K, V> { fn children(&self) -> Option> { @@ -114,6 +129,18 @@ impl AstNode for Pair<'_, K, V> { self.span.fragment() ) } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some((String::from("Pair"), SymbolKind::ARRAY)) + } } impl std::fmt::Display for Pair<'_, K, V> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 466be7577..9331e59d5 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,14 +1,14 @@ -use ascii_tree::write_tree; +use tower_lsp::lsp_types::SymbolKind; -use super::statement::Statement; -use super::{ast_to_ascii_tree, AstNode, Position}; +use super::{ast_to_ascii_tree, statement::Statement, AstNode, Position}; use crate::io::lexer::{Span, Token}; +use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Program<'a> { - pub(crate) span: Span<'a>, - pub(crate) tl_doc_comment: Option>, - pub(crate) statements: Vec>, +pub struct Program<'a> { + pub span: Span<'a>, + pub tl_doc_comment: Option>, + pub statements: Vec>, } impl AstNode for Program<'_> { fn children(&self) -> Option> { @@ -58,7 +58,20 @@ impl AstNode for Program<'_> { ) } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + Some(("File".to_string(), SymbolKind::FILE)) + } } + impl std::fmt::Display for Program<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index a0762d0c8..f641dbd8e 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -1,3 +1,5 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::atom::Atom; use super::directive::Directive; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; @@ -5,7 +7,7 @@ use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Statement<'a> { +pub enum Statement<'a> { Directive(Directive<'a>), Fact { span: Span<'a>, @@ -123,6 +125,7 @@ impl AstNode for Statement<'_> { ) }; } + match self { Statement::Directive(_) => name!("Directive"), Statement::Fact { .. } => name!("Fact"), @@ -132,7 +135,29 @@ impl AstNode for Statement<'_> { Statement::Error(_) => name!("ERROR"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + Some(("statement".to_string(), "statement".to_string())) + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + let name = match self { + Statement::Directive(_) => "Directive", + Statement::Fact { .. } => "Fact", + Statement::Rule { .. } => "Rule", + Statement::Whitespace(_ws) => return None, + Statement::Comment(_) => return None, + Statement::Error(_) => "Invalid", + }; + + Some((String::from(name), SymbolKind::CLASS)) + } } + impl std::fmt::Display for Statement<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 060269687..81ed9d47f 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -1,3 +1,5 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::map::Map; use super::tuple::Tuple; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; @@ -5,7 +7,7 @@ use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Term<'a> { +pub enum Term<'a> { Primitive(Primitive<'a>), Variable(Token<'a>), Existential(Token<'a>), @@ -36,6 +38,7 @@ pub(crate) enum Term<'a> { Map(Box>), Blank(Token<'a>), } + impl AstNode for Term<'_> { fn children(&self) -> Option> { match self { @@ -141,7 +144,7 @@ impl AstNode for Term<'_> { Term::Binary { .. } => name!("Binary Term"), Term::Aggregation { .. } => name!("Aggregation"), Term::Tuple(f) => { - if let Some(_) = f.identifier { + if f.identifier.is_some() { name!("Function Symbol") } else { name!("Tuple") @@ -151,6 +154,70 @@ impl AstNode for Term<'_> { Term::Blank(_) => name!("Blank"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + match self { + Term::Variable(t) => Some(( + format!("variable/{}", t.span().fragment()), + "statement".to_string(), + )), + Term::Aggregation { operation, .. } => Some(( + format!("aggregation/{}", operation.span().fragment()), + "file".to_string(), + )), + Term::Tuple(tuple) => { + tuple.identifier.map(|identifier| ( + format!("function/{}", identifier.span().fragment()), + "file".to_string(), + )) + } + _ => None, + } + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + // TODO: + // match self { + // Term::Variable(t) => Some(t), + // Term::Aggregation { operation, .. } => Some(operation), + // Term::Tuple(tuple) => { + // if let Some(identifier) = tuple.identifier { + // Some(identifier) + // } else { + // None + // } + // } + // // Term::Function(named_tuple) => Some(&named_tuple.identifier), + // _ => None, + // } + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + match self { + Term::Primitive(_) => Some((String::from("Primitive term"), SymbolKind::CONSTANT)), + Term::Variable(t) => Some((format!("Variable: {}", t.span()), SymbolKind::VARIABLE)), + Term::UnaryPrefix { .. } => Some((String::from("Unary prefix"), SymbolKind::OPERATOR)), + Term::Blank { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), + Term::Existential { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), + Term::Binary { .. } => Some((String::from("Binary term"), SymbolKind::OPERATOR)), + Term::Aggregation { operation, .. } => Some(( + format!("Aggregation: {}", operation.span.fragment()), + SymbolKind::OPERATOR, + )), + Term::Tuple(tuple) => { + if let Some(identifier) = tuple.identifier { + Some(( + format!("Function: {}", identifier.span.fragment()), + SymbolKind::OPERATOR, + )) + } else { + Some((String::from("Tuple"), SymbolKind::ARRAY)) + } + } + Term::Map(map) => Some((String::from("Map"), SymbolKind::ARRAY)), + } + } } impl std::fmt::Display for Term<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -180,6 +247,7 @@ pub(crate) enum Primitive<'a> { iri: Token<'a>, }, } + impl AstNode for Primitive<'_> { fn children(&self) -> Option> { match self { @@ -264,6 +332,18 @@ impl AstNode for Primitive<'_> { Primitive::RdfLiteral { .. } => name!("RDF Literal"), } } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None + } } impl std::fmt::Display for Primitive<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -279,6 +359,7 @@ pub(crate) struct Exponent<'a> { pub(crate) sign: Option>, pub(crate) number: Token<'a>, } + impl AstNode for Exponent<'_> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); @@ -305,7 +386,20 @@ impl AstNode for Exponent<'_> { fn name(&self) -> String { todo!() } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None + } } + impl std::fmt::Display for Exponent<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index f074eb6a1..459d22962 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -1,19 +1,22 @@ +use tower_lsp::lsp_types::SymbolKind; + use super::term::Term; use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] -pub(crate) struct Tuple<'a> { - pub(crate) span: Span<'a>, - pub(crate) identifier: Option>, - pub(crate) ws1: Option>, - pub(crate) open_paren: Token<'a>, - pub(crate) ws2: Option>, - pub(crate) terms: Option>>, - pub(crate) ws3: Option>, - pub(crate) close_paren: Token<'a>, +pub struct Tuple<'a> { + pub span: Span<'a>, + pub identifier: Option>, + pub ws1: Option>, + pub open_paren: Token<'a>, + pub ws2: Option>, + pub terms: Option>>, + pub ws3: Option>, + pub close_paren: Token<'a>, } + impl AstNode for Tuple<'_> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); @@ -61,6 +64,18 @@ impl AstNode for Tuple<'_> { self.span.fragment() ) } + + fn lsp_identifier(&self) -> Option<(String, String)> { + None + } + + fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + None + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + None + } } impl std::fmt::Display for Tuple<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 6850a69c2..6dea14af0 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -39,11 +39,11 @@ pub type ParseResult<'a, T> = Result; #[error("Parse error on line {}, column {}: {}\nat {}{}", .line, .column, .source, .fragment, format_parse_error_context(.context))] pub struct LocatedParseError { #[source] - pub(super) source: ParseError, - pub(super) line: u32, - pub(super) column: usize, - pub(super) fragment: String, - pub(super) context: Vec, + pub source: ParseError, + pub line: u32, + pub column: usize, + pub fragment: String, + pub context: Vec, } impl LocatedParseError { @@ -479,9 +479,9 @@ impl<'a, T> nom::FindToken for Tokens<'a> { impl<'a> InputIter for Tokens<'a> { type Item = &'a Token<'a>; - type Iter = std::iter::Enumerate<::std::slice::Iter<'a, Token<'a>>>; + type Iter = std::iter::Enumerate>>; - type IterElem = ::std::slice::Iter<'a, Token<'a>>; + type IterElem = std::slice::Iter<'a, Token<'a>>; fn iter_indices(&self) -> Self::Iter { self.tok.iter().enumerate() @@ -506,7 +506,7 @@ impl<'a> InputIter for Tokens<'a> { } } } -impl<'a> nom::InputLength for Tokens<'a> { +impl<'a> InputLength for Tokens<'a> { fn input_len(&self) -> usize { self.tok.len() } @@ -529,7 +529,7 @@ impl<'a> InputTake for Tokens<'a> { ) } } -impl<'a> nom::InputTakeAtPosition for Tokens<'a> { +impl<'a> InputTakeAtPosition for Tokens<'a> { type Item = &'a Token<'a>; fn split_at_position>( @@ -690,7 +690,7 @@ impl<'a, 's> InputIter for Input<'a, 's> { } } -impl nom::InputLength for Input<'_, '_> { +impl InputLength for Input<'_, '_> { fn input_len(&self) -> usize { self.input.input_len() } @@ -719,13 +719,13 @@ impl InputTake for Input<'_, '_> { } } -impl nom::InputTakeAtPosition for Input<'_, '_> { +impl InputTakeAtPosition for Input<'_, '_> { type Item = char; fn split_at_position>( &self, predicate: P, - ) -> nom::IResult + ) -> IResult where P: Fn(Self::Item) -> bool, { @@ -738,8 +738,8 @@ impl nom::InputTakeAtPosition for Input<'_, '_> { fn split_at_position1>( &self, predicate: P, - e: nom::error::ErrorKind, - ) -> nom::IResult + e: ErrorKind, + ) -> IResult where P: Fn(Self::Item) -> bool, { @@ -749,7 +749,7 @@ impl nom::InputTakeAtPosition for Input<'_, '_> { fn split_at_position_complete>( &self, predicate: P, - ) -> nom::IResult + ) -> IResult where P: Fn(Self::Item) -> bool, { @@ -762,8 +762,8 @@ impl nom::InputTakeAtPosition for Input<'_, '_> { fn split_at_position1_complete>( &self, predicate: P, - e: nom::error::ErrorKind, - ) -> nom::IResult + e: ErrorKind, + ) -> IResult where P: Fn(Self::Item) -> bool, { From 8d4be5d9b9192c85f618a7b5b1c53ea93bc415a5 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 29 May 2024 14:56:40 +0200 Subject: [PATCH 100/214] Refactor error reporting to use nom_supreme::error::ErrorTree --- Cargo.lock | 52 ++ nemo/Cargo.toml | 2 + nemo/src/io/lexer.rs | 420 +++++----- nemo/src/io/parser.rs | 1465 ++++++++++++++++++++--------------- nemo/src/io/parser/ast.rs | 41 +- nemo/src/io/parser/types.rs | 46 +- testfile2.rls | 15 +- 7 files changed, 1154 insertions(+), 887 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 31fb2db39..847bd5322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,6 +111,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "ascii_tree" version = "0.1.1" @@ -260,6 +266,15 @@ dependencies = [ "wyz", ] +[[package]] +name = "brownstone" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5839ee4f953e811bfdcf223f509cb2c6a3e1447959b0bff459405575bc17f22" +dependencies = [ + "arrayvec", +] + [[package]] name = "bstr" version = "1.9.1" @@ -1023,6 +1038,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "indent_write" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3" + [[package]] name = "indexmap" version = "2.2.6" @@ -1057,6 +1078,12 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "joinery" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72167d68f5fce3b8655487b8038691a3c9984ee769590f93f2a631f4ad64e4f5" + [[package]] name = "js-sys" version = "0.3.69" @@ -1243,6 +1270,8 @@ dependencies = [ "macros", "nemo-physical", "nom 7.1.3", + "nom-greedyerror", + "nom-supreme", "nom_locate", "num", "oxiri", @@ -1342,6 +1371,29 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom-greedyerror" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f359007d505b20cd6e4974ff0d5c8e4565f0f9e15823937238221ccb74b516" +dependencies = [ + "nom 7.1.3", + "nom_locate", +] + +[[package]] +name = "nom-supreme" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bd3ae6c901f1959588759ff51c95d24b491ecb9ff91aa9c2ef4acc5b1dcab27" +dependencies = [ + "brownstone", + "indent_write", + "joinery", + "memchr", + "nom 7.1.3", +] + [[package]] name = "nom_locate" version = "4.2.0" diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index ea6dfc52e..ab6f43231 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -47,6 +47,8 @@ serde = {version = "1.0.138", features = ["derive"] } tower-lsp = "0.20.0" dyn-clone = "1.0.16" unicode-ident = "1.0.12" +nom-greedyerror = "0.5.0" +nom-supreme = "0.8.0" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index c0e1c90b4..cd2aa1cd9 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -6,11 +6,13 @@ use nom::{ branch::alt, bytes::complete::{is_not, tag, take, take_till}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, - combinator::{all_consuming, cut, map, recognize}, - error::{ParseError}, + combinator::{all_consuming, cut, map, opt, recognize}, + error::{context, ContextError, ErrorKind, ParseError}, multi::{many0, many1}, sequence::{delimited, pair, tuple}, + IResult, }; +use nom_greedyerror::GreedyError; use nom_locate::LocatedSpan; use tower_lsp::lsp_types::SymbolKind; @@ -34,20 +36,21 @@ impl ParseError> for NewParseError { } } -pub(crate) type IResult = nom::IResult; - use super::parser::{ ast::{AstNode, Position}, - types::{Input, Label, ToRange}, + types::{Input, ToRange}, }; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Error(pub Position, pub String); +pub struct Error { + pub pos: Position, + pub msg: String, + pub context: Vec<&'static str>, +} #[derive(Debug, Clone, Copy, PartialEq)] pub(crate) struct ParserState<'a> { pub(crate) errors: &'a RefCell>, - pub(crate) labels: &'a RefCell>, } impl ParserState<'_> { pub fn report_error(&self, error: Error) { @@ -299,25 +302,29 @@ impl<'a> AstNode for Token<'a> { } } -pub(crate) fn map_err<'a, 'e, O, E: ParseError>>( - mut f: impl nom::Parser, O, E>, - mut op: impl FnMut(E) -> NewParseError, -) -> impl FnMut(Input<'a, 'e>) -> IResult, O> { - move |input| { - f.parse(input).map_err(|e| match e { - nom::Err::Incomplete(err) => nom::Err::Incomplete(err), - nom::Err::Error(err) => nom::Err::Error(op(err)), - nom::Err::Failure(err) => nom::Err::Error(op(err)), - }) - } -} +// pub(crate) fn map_err<'a, 's, O, E: ParseError>>( +// mut f: impl nom::Parser, O, E>, +// mut op: impl FnMut(E) -> NewParseError, +// ) -> impl FnMut(Input<'a, 's>) -> IResult, O> { +// move |input| { +// f.parse(input).map_err(|e| match e { +// nom::Err::Incomplete(err) => nom::Err::Incomplete(err), +// nom::Err::Error(err) => nom::Err::Error(op(err)), +// nom::Err::Failure(err) => nom::Err::Error(op(err)), +// }) +// } +// } macro_rules! syntax { - ($func_name: ident, $tag_string: literal, $token: expr) => { - pub(crate) fn $func_name<'a, 'e>( - input: Input<'a, 'e>, - ) -> nom::IResult, Token<'a>> { - map(tag($tag_string), |span: Input| { + ($func_name: ident, $tag_str: literal, $token: expr) => { + pub(crate) fn $func_name< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Token<'a>, E> { + map(context($tag_str, tag($tag_str)), |span: Input| { Token::new($token, span.input) })(input) } @@ -344,32 +351,41 @@ syntax!(at, "@", TokenKind::At); syntax!(exp_lower, "e", TokenKind::Exponent); syntax!(exp_upper, "E", TokenKind::Exponent); -pub(crate) fn exp<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - alt((exp_lower, exp_upper))(input) +pub(crate) fn exp<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context("lex exponent", alt((exp_lower, exp_upper)))(input) } -pub(crate) fn lex_punctuations<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - alt(( - arrow, - open_paren, - close_paren, - open_bracket, - close_bracket, - open_brace, - close_brace, - dot, - comma, - colon, - question_mark, - exclamation_mark, - tilde, - caret, - hash, - underscore, - at, - ))(input) +pub(crate) fn lex_punctuations< + 'a, + 's, + E: ParseError> + ContextError>, +>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "lex punctuations", + alt(( + arrow, + open_paren, + close_paren, + open_bracket, + close_bracket, + open_brace, + close_brace, + dot, + comma, + colon, + question_mark, + exclamation_mark, + tilde, + caret, + hash, + underscore, + at, + )), + )(input) } syntax!(less, "<", TokenKind::Less); @@ -383,34 +399,42 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - alt(( - less_equal, - greater_equal, - unequal, - less, - equal, - greater, - plus, - minus, - star, - slash, - ))(input) +pub(crate) fn lex_operators<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "lex operators", + alt(( + less_equal, + greater_equal, + unequal, + less, + equal, + greater, + plus, + minus, + star, + slash, + )), + )(input) } -// pub(crate) fn lex_unary_prefix_operators<'a, 'e>( -// input: Input<'a, 'e>, -// ) -> IResult, Token<'a>> { +// pub(crate) fn lex_unary_prefix_operators<'a, 's>( +// input: Input<'a, 's>, +// ) -> IResult, Token<'a>> { // alt((plus, minus))(input) // } -pub(crate) fn lex_ident<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - let (rest_input, ident) = recognize(pair( - alpha1, - many0(alt((alphanumeric1, tag("_"), tag("-")))), - ))(input)?; +pub(crate) fn lex_ident<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + let (rest_input, ident) = context( + "lex identifier", + recognize(pair( + alpha1, + many0(alt((alphanumeric1, tag("_"), tag("-")))), + )), + )(input)?; let token = match *ident.input.fragment() { "base" => Token::new(TokenKind::Base, ident.input), "prefix" => Token::new(TokenKind::Prefix, ident.input), @@ -422,57 +446,97 @@ pub(crate) fn lex_ident<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">"))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) +pub(crate) fn lex_iri<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "lex iri", + recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) } -pub(crate) fn lex_number<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - digit1(input) +pub(crate) fn lex_number<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context("lex number", digit1)(input) .map(|(rest_input, result)| (rest_input, Token::new(TokenKind::Number, result.input))) } -pub(crate) fn lex_string<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - recognize(delimited(tag("\""), is_not("\""), cut(tag("\""))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) +pub(crate) fn lex_string<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "lex string", + recognize(delimited(tag("\""), is_not("\""), cut(tag("\"")))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) } -pub(crate) fn lex_comment<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - recognize(tuple((tag("%"), many0(is_not("\n")), line_ending)))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) +pub(crate) fn lex_comment<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "comment", + recognize(tuple((tag("%"), many0(is_not("\n")), line_ending))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) } -pub(crate) fn lex_doc_comment<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) +pub(crate) fn lex_doc_comment< + 'a, + 's, + E: ParseError> + ContextError>, +>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "documentation comment", + recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending)))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) } -pub(crate) fn lex_toplevel_doc_comment<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending))))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) +pub(crate) fn lex_toplevel_doc_comment< + 'a, + 's, + E: ParseError> + ContextError>, +>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "top level documentation comment", + recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending)))), + )(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) } -pub(crate) fn lex_comments<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment))(input) +pub(crate) fn lex_comments<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context( + "comments", + alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment)), + )(input) } -pub(crate) fn lex_whitespace<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Token<'a>> { - multispace1(input).map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) +pub(crate) fn lex_whitespace<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context("whitespace", multispace1)(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) } -pub(crate) fn lex_illegal<'a, 'e>(input: Input<'a, 'e>) -> nom::IResult, Token<'a>> { - take(1usize)(input).map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) +pub(crate) fn lex_illegal<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> { + context("illegal character", take(1usize))(input) + .map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) } -pub(crate) fn lex_tokens<'a, 'e>( - input: Input<'a, 'e>, -) -> nom::IResult, Vec>> { +pub(crate) fn lex_tokens<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> IResult, Vec>, E> { all_consuming(many0(alt(( lex_iri, lex_operators, @@ -490,10 +554,12 @@ pub(crate) fn lex_tokens<'a, 'e>( }) } -pub(crate) fn skip_to_dot<'a, 'e>(input: Input<'a, 'e>) -> (Input<'a, 'e>, Token<'a>) { +pub(crate) fn skip_to_dot<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, +) -> (Input<'a, 's>, Token<'a>) { let (rest_input, error_input) = recognize(pair( take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), - tag("."), + opt(tag(".")), ))(input) .expect("Skipping to the next dot should not fail!"); ( @@ -522,18 +588,14 @@ mod tests { fn empty_input() { let input = Span::new(""); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(Eof, 0, 1, "")] ) } @@ -542,18 +604,14 @@ mod tests { fn base() { let input = Span::new("@base"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } @@ -562,18 +620,14 @@ mod tests { fn prefix() { let input = Span::new("@prefix"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Prefix, 1, 1, "prefix"), @@ -586,18 +640,14 @@ mod tests { fn output() { let input = Span::new("@output"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Output, 1, 1, "output"), @@ -610,18 +660,14 @@ mod tests { fn import() { let input = Span::new("@import"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Import, 1, 1, "import"), @@ -634,18 +680,14 @@ mod tests { fn export() { let input = Span::new("@export"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Export, 1, 1, "export"), @@ -658,18 +700,14 @@ mod tests { fn idents_with_keyword_prefix() { let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Ident, 1, 1, "baseA"), @@ -699,18 +737,14 @@ mod tests { fn tokenize() { let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "P"), T!(OpenParen, 1, 1, "("), @@ -741,18 +775,14 @@ mod tests { fn comment() { let input = Span::new(" % Some Comment\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " "), T!(Comment, 4, 1, "% Some Comment\n"), @@ -767,18 +797,14 @@ mod tests { fn ident() { let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "some_Ident"), T!(OpenParen, 10, 1, "("), @@ -796,18 +822,14 @@ mod tests { fn forbidden_ident() { let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Underscore, 0, 1, "_"), T!(Ident, 1, 1, "someIdent"), @@ -826,18 +848,14 @@ mod tests { fn iri() { let input = Span::new(""); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Eof, 31, 1, ""), @@ -849,18 +867,14 @@ mod tests { fn iri_pct_enc() { let input = Span::new("\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Whitespace, 37, 1, "\n"), @@ -875,18 +889,14 @@ mod tests { fn constraints() { let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "A"), T!(OpenParen, 1, 1, "("), @@ -919,18 +929,14 @@ mod tests { fn pct_enc_comment() { let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Comment, 0, 1, "%d4 this should be a comment,\n"), T!( @@ -949,18 +955,14 @@ mod tests { fn fact() { let input = Span::new("somePred(term1, term2)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "somePred"), T!(OpenParen, 8, 1, "("), @@ -979,18 +981,14 @@ mod tests { fn whitespace() { let input = Span::new(" \t \n\n\t \n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " \t \n\n\t \n"), T!(Eof, 12, 4, ""), @@ -1002,15 +1000,11 @@ mod tests { fn skip_to_dot() { let input = Span::new("some ?broken :- rule). A(Fact)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; - dbg!(super::skip_to_dot(input)); + dbg!(super::skip_to_dot::>(input)); } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index b06986d68..a8d9f82e8 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2430,25 +2430,26 @@ mod test { /// NEW PARSER pub mod new { + use std::borrow::BorrowMut; use std::cell::RefCell; use super::ast::{ atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, Wsoc, }; - use super::types::{Input, Label, ParserLabel}; + use super::types::{Input, ToRange}; use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, - lex_ident, lex_iri, lex_number, lex_string, lex_toplevel_doc_comment, - lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, - slash, star, tilde, underscore, unequal, Error, ParserState, Span, Token, + lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, + lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, slash, + star, tilde, underscore, unequal, Error, NewParseError, ParserState, Span, Token, TokenKind, }; - - use nom::combinator::{all_consuming, opt, recognize}; - use nom::error::{ParseError}; - use nom::sequence::{pair}; + use crate::io::parser::ast::AstNode; + use nom::combinator::{all_consuming, cut, map, opt, recognize}; + use nom::error::{context, ContextError, ErrorKind, ParseError}; + use nom::sequence::{delimited, pair}; use nom::Parser; use nom::{ branch::alt, @@ -2457,10 +2458,11 @@ pub mod new { sequence::tuple, IResult, }; + use nom_greedyerror::GreedyError; + use nom_supreme::error::{ErrorTree, StackContext}; fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { unsafe { - // dbg!(&input, &span, &rest_input); Span::new_from_raw_offset( input.location_offset(), @@ -2511,29 +2513,45 @@ pub mod new { // } // } - fn expect< - 'a, - 'e, - O: Copy, - E: ParseError>, - F: Parser, O, E>, - >( + // fn context<'a, 's, O, E>( + // mut f: impl FnMut(Input<'a, 's>) -> IResult, O, E>, + // context: ParserContext, + // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + // move |input| { + // let mut labels = *input.parser_state.labels.borrow_mut(); + // if let None = labels { + // labels = Some(Context { + // context: context.clone(), + // label: None, + // inner: vec![], + // }); + // labels + // } else { + // dbg!(&labels); + // labels + // }; + // f(input) + // } + // } + + fn expect<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( mut parser: F, error_msg: impl ToString, error_output: O, - errors: ParserState<'e>, - ) -> impl FnMut(Input<'a, 'e>) -> IResult, O, E> { + errors: ParserState<'s>, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { move |input| match parser.parse(input) { Ok(result) => Ok(result), Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { - let err = Error( - Position { + let err = Error { + pos: Position { offset: input.input.location_offset(), line: input.input.location_line(), column: input.input.get_utf8_column() as u32, }, - error_msg.to_string(), - ); + msg: error_msg.to_string(), + context: vec![], + }; errors.report_error(err); Ok((input, error_output)) } @@ -2541,116 +2559,231 @@ pub mod new { } } - fn recover<'a, 'e, E>( - mut parser: impl Parser, Statement<'a>, E>, + fn recover<'a, 's, E>( + mut parser: impl Parser, Statement<'a>, E>, error_msg: impl ToString, - errors: ParserState<'e>, - ) -> impl FnMut(Input<'a, 'e>) -> IResult, Statement<'a>, E> { - move |input: Input<'a, 'e>| match parser.parse(input) { + context: &'static str, + errors: ParserState<'s>, + ) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { + move |input: Input<'a, 's>| match parser.parse(input) { Ok(result) => Ok(result), Err(err) if input.input.is_empty() => Err(err), Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => { - let err = Error( - Position { + let err = Error { + pos: Position { offset: input.input.location_offset(), line: input.input.location_line(), column: input.input.get_utf8_column() as u32, }, - error_msg.to_string(), - ); + msg: error_msg.to_string(), + context: vec![context], + }; errors.report_error(err); - let (rest_input, token) = skip_to_dot(input); + let (rest_input, token) = skip_to_dot::>>(input); Ok((rest_input, Statement::Error(token))) } Err(err) => Err(err), } } - fn report_label<'a, 's, O, E>( - mut parser: impl Parser, O, E>, - label: ParserLabel, - ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + // fn report_label<'a, 's, O, E>( + // mut parser: impl nom::Parser, O, E>, + // label: ParserLabel, + // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + // move |input| match parser.parse(input) { + // Ok(result) => Ok(result), + // Err(err) => { + // match err { + // nom::Err::Incomplete(_) => (), + // nom::Err::Error(_) | nom::Err::Failure(_) => { + // if !input.input.is_empty() { + // input.parser_state.labels.borrow_mut().push(Label { + // label, + // pos: Position { + // offset: input.input.location_offset(), + // line: input.input.location_line(), + // column: input.input.get_utf8_column() as u32, + // }, + // }) + // }; + // } + // }; + // Err(err) + // } + // } + // } + + // fn report_error<'a, 's, O, E>( + // mut parser: impl nom::Parser, O, E>, + // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + // move |input| match parser.parse(input) { + // Ok(result) => { + // input.parser_state.labels.borrow_mut().inner.clear(); + // Ok(result) + // } + // Err(err) => { + // match err { + // nom::Err::Incomplete(_) => (), + // nom::Err::Error(_) | nom::Err::Failure(_) => { + // // println!("LABELS BEFORE REPORT!!!!: {:#?}", input.parser_state.labels); + // let mut furthest_errors: Vec = Vec::new(); + // let labels = + // as Clone>::clone(&input.parser_state.labels.borrow()) + // .into_iter(); + // for label in labels { + // if let Some(last) = furthest_errors.last() { + // if label.pos.offset >= (*last).0.offset { + // let err = + // Error(label.pos, format!("expected {:?}", label.label)); + // furthest_errors.push(err); + // } + // } else { + // let err = Error(label.pos, format!("expected {:?}", label.label)); + // furthest_errors.push(err); + // }; + // } + // for err in furthest_errors { + // input.parser_state.report_error(err) + // } + // // for label in furthest_errors { + // // println!( + // // "Syntax error: Parser got stuck at line {} column {}, expected {:?}", + // // label.position.line, label.position.column, label.label + // // ); + // // println!( + // // "\n{}", + // // input + // // .parser_state + // // .source + // // .fragment() + // // .lines() + // // .collect::>() + // // .get((label.position.line - 1) as usize) + // // .unwrap() + // // ); + // // println!("{1:>0$}", label.position.column, "^"); + // // } + // } + // }; + // Err(err) + // } + // } + // } + fn report_error<'a, 's, O>( + mut parser: impl nom::Parser, O, ErrorTree>>, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, ErrorTree>> { move |input| match parser.parse(input) { Ok(result) => Ok(result), - Err(err) => { - match err { + Err(e) => { + if input.input.is_empty() { + return Err(e); + }; + match &e { nom::Err::Incomplete(_) => (), - nom::Err::Error(_) | nom::Err::Failure(_) => { - if !input.input.is_empty() { - input.parser_state.labels.borrow_mut().push(Label { - label, - pos: Position { - offset: input.input.location_offset(), - line: input.input.location_line(), - column: input.input.get_utf8_column() as u32, - }, - }) - }; + nom::Err::Error(err) | nom::Err::Failure(err) => { + let (deepest_pos, errors) = get_deepest_errors(err); + for error in errors { + input.parser_state.report_error(error); + } + // let error = Error(deepest_pos, format!("")); + // // input.parser_state.report_error(error) } }; - Err(err) + Err(e) } } } - fn report_error<'a, 's, O, E>( - mut parser: impl Parser, O, E>, - ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - move |input| match parser.parse(input) { - Ok(result) => { - input.parser_state.labels.borrow_mut().clear(); - Ok(result) + fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { + match e { + ErrorTree::Base { location, kind } => { + let span = location.input; + let err_pos = Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }; + ( + err_pos, + vec![Error { + pos: err_pos, + msg: format!("{}", e), + context: Vec::new(), + }], + ) } - Err(err) => { - match err { - nom::Err::Incomplete(_) => (), - nom::Err::Error(_) | nom::Err::Failure(_) => { - // println!("LABELS BEFORE REPORT!!!!: {:#?}", input.parser_state.labels); - let mut furthest_errors: Vec = Vec::new(); - let labels = - as Clone>::clone(&input.parser_state.labels.borrow()) - .into_iter(); - for label in labels { - if let Some(last) = furthest_errors.last() { - if label.pos.offset >= last.0.offset { - let err = - Error(label.pos, format!("expected {:?}", label.label)); - furthest_errors.push(err); - } - } else { - let err = Error(label.pos, format!("expected {:?}", label.label)); - furthest_errors.push(err); - }; + ErrorTree::Stack { base, contexts } => { + // let mut err_pos = Position::default(); + match &**base { + ErrorTree::Base { location, kind } => { + let span = location.input; + let err_pos = Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }; + ( + err_pos, + vec![Error { + pos: err_pos, + msg: format!("{}", base), + context: context_strs(contexts), + }], + ) + } + ErrorTree::Stack { base, contexts } => { + let (pos, mut deepest_errors) = get_deepest_errors(base); + let contexts = context_strs(contexts); + dbg!(&deepest_errors); + for mut error in &mut deepest_errors { + error.context.append(&mut contexts.clone()); } - for err in furthest_errors { - input.parser_state.report_error(err) + dbg!(&deepest_errors); + (pos, deepest_errors) + } + ErrorTree::Alt(error_tree) => { + let (pos, mut deepest_errors) = get_deepest_errors(base); + let contexts = context_strs(contexts); + for mut error in &mut deepest_errors { + error.context.append(&mut contexts.clone()); } - // for label in furthest_errors { - // println!( - // "Syntax error: Parser got stuck at line {} column {}, expected {:?}", - // label.position.line, label.position.column, label.label - // ); - // println!( - // "\n{}", - // input - // .parser_state - // .source - // .fragment() - // .lines() - // .collect::>() - // .get((label.position.line - 1) as usize) - // .unwrap() - // ); - // println!("{1:>0$}", label.position.column, "^"); - // } + (pos, deepest_errors) } - }; - Err(err) + } + } + ErrorTree::Alt(vec) => { + let mut return_vec: Vec = Vec::new(); + let mut deepest_pos = Position::default(); + for error in vec { + let (pos, mut deepest_errors) = get_deepest_errors(error); + if pos > deepest_pos { + deepest_pos = pos; + return_vec.clear(); + return_vec.append(&mut deepest_errors); + } else if pos == deepest_pos { + return_vec.append(&mut deepest_errors); + } + } + (deepest_pos, return_vec) } } } - fn wsoc0<'a, 's>(input: Input<'a, 's>) -> IResult, Option>> { + fn context_strs( + contexts: &Vec<(Input<'_, '_>, StackContext<&'static str>)>, + ) -> Vec<&'static str> { + contexts + .iter() + .map(|(_, c)| match c { + StackContext::Kind(k) => todo!(), + StackContext::Context(str) => *str, + }) + .collect() + } + + fn wsoc0<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Option>, E> { many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { if vec.is_empty() { (rest_input, None) @@ -2666,7 +2799,9 @@ pub mod new { }) } - fn wsoc1<'a, 's>(input: Input<'a, 's>) -> IResult, Wsoc<'a>> { + fn wsoc1<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Wsoc<'a>, E> { many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { ( rest_input, @@ -2679,74 +2814,82 @@ pub mod new { } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a>(input: Input<'a, '_>) -> (Program<'a>, Vec) { - let (rest_input, (tl_doc_comment, statements)) = all_consuming(pair( - opt(lex_toplevel_doc_comment), - many1(recover( - report_error(alt(( - // TODO: Discuss wether directives should only get parsed at the beginning of the source file - report_label(parse_rule, ParserLabel::Rule), - report_label(parse_fact, ParserLabel::Fact), - parse_whitespace, - report_label(parse_directive, ParserLabel::Directive), - parse_comment, - ))), - "failed to parse a statement", - input.parser_state, - )), - ))(input) - .expect("Parser can't fail. If it fails it's a bug! Pleas report it. Got"); - ( - Program { - span: input.input, - tl_doc_comment, - statements, - }, - rest_input.parser_state.errors.take(), - ) + fn parse_program<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> (Program<'a>, Vec) { + let result = context( + "program", + pair( + opt(lex_toplevel_doc_comment::>>), + many0(recover( + report_error(alt(( + // TODO: Discuss wether directives should only get parsed at the beginning of the source file + parse_rule, + parse_fact, + parse_whitespace, + parse_directive, + parse_comment, + ))), + "failed to parse statement", + "program", + input.parser_state, + )), + ), + )(input); + match result { + Ok((rest_input, (tl_doc_comment, statements))) => { + if !rest_input.input.is_empty() { + panic!("Parser did not consume all input. This is considered a bug. Please report it. Unparsed input is: {:?}", rest_input); + }; + ( + Program { + span: input.input, + tl_doc_comment, + statements, + }, + rest_input.parser_state.errors.take(), + ) + } + Err(e) => panic!( + "Parser can't fail. If it fails it's a bug! Please report it. Got: {:?}", + e + ), + } } pub fn parse_program_str(input: &str) -> (Program<'_>, Vec) { let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input: Span::new(input), parser_state, }; - parse_program(input) + parse_program::>>(input) } /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + fn parse_whitespace<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) } /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { + fn parse_comment<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) } /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { - dbg!(&input.parser_state.labels); - tuple(( - opt(lex_doc_comment), - parse_normal_atom, - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + fn parse_fact<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { + // dbg!(&input.parser_state.labels); + context( + "fact", + tuple((opt(lex_doc_comment), parse_normal_atom, wsoc0, dot)), + )(input) .map(|(rest_input, (doc_comment, atom, ws, dot))| { ( rest_input, @@ -2762,25 +2905,22 @@ pub mod new { } /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { - tuple(( - opt(lex_doc_comment), - parse_head, - wsoc0, - report_label(arrow, ParserLabel::Arrow), - wsoc0, - parse_body, - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + fn parse_rule<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { + context( + "rule", + tuple(( + opt(lex_doc_comment), + parse_head, + wsoc0, + arrow, + wsoc0, + parse_body, + wsoc0, + dot, + )), + )(input) .map( |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { ( @@ -2802,48 +2942,54 @@ pub mod new { } /// Parse the head atoms of a rule. - fn parse_head<'a, 'e>(input: Input<'a, 'e>) -> IResult, List<'a, Atom<'a>>> { - parse_list(parse_head_atoms)(input) + fn parse_head<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, List<'a, Atom<'a>>, E> { + context("rule head", parse_list(parse_head_atoms))(input) } /// Parse the body atoms of a rule. - fn parse_body<'a, 'e>(input: Input<'a, 'e>) -> IResult, List<'a, Atom<'a>>> { - parse_list(parse_body_atoms)(input) + fn parse_body<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, List<'a, Atom<'a>>, E> { + context("rule body", parse_list(parse_body_atoms))(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive<'a, 'e>(input: Input<'a, 'e>) -> IResult, Statement<'a>> { - alt(( - parse_base_directive, - parse_prefix_directive, - parse_import_directive, - parse_export_directive, - parse_output_directive, - ))(input) + fn parse_directive<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Statement<'a>, E> { + context( + "directive", + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, + )), + )(input) .map(|(rest, directive)| (rest, Statement::Directive(directive))) } /// Parse the base directive. - fn parse_base_directive<'a, 'e>(input: Input<'a, 'e>) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Base), + fn parse_base_directive<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "base directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Base), + )), + wsoc0, + lex_iri, + wsoc0, + dot, )), - wsoc0, - report_label(lex_iri, ParserLabel::Iri), - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { ( rest_input, @@ -2864,30 +3010,29 @@ pub mod new { } /// Parse the prefix directive. - fn parse_prefix_directive<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), + fn parse_prefix_directive< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "prefix directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Prefix), + )), + wsoc0, + recognize(pair(opt(lex_ident), colon)), + wsoc0, + lex_iri, + wsoc0, + dot, )), - wsoc0, - report_label(recognize(pair(lex_ident, colon)), ParserLabel::Prefix), - wsoc0, - report_label(lex_iri, ParserLabel::Iri), - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { ( @@ -2915,32 +3060,31 @@ pub mod new { } /// Parse the import directive. - fn parse_import_directive<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Import), + fn parse_import_directive< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "import directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Import), + )), + wsoc1, + lex_ident, + wsoc0, + arrow, + wsoc0, + parse_map, + wsoc0, + dot, )), - wsoc1, - report_label(lex_ident, ParserLabel::Identifier), - wsoc0, - report_label(arrow, ParserLabel::Arrow), - wsoc0, - parse_map, - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( @@ -2967,32 +3111,31 @@ pub mod new { } /// Parse the export directive. - fn parse_export_directive<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Export), + fn parse_export_directive< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "export directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Export), + )), + wsoc1, + lex_ident, + wsoc0, + arrow, + wsoc0, + parse_map, + wsoc0, + dot, )), - wsoc1, - report_label(lex_ident, ParserLabel::Identifier), - wsoc0, - report_label(arrow, ParserLabel::Arrow), - wsoc0, - parse_map, - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { ( @@ -3019,28 +3162,27 @@ pub mod new { } /// Parse the output directive. - fn parse_output_directive<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Directive<'a>> { - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| token.kind == TokenKind::Output), + fn parse_output_directive< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Directive<'a>, E> { + context( + "output directive", + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_ident, |token| token.kind == TokenKind::Output), + )), + wsoc1, + opt(parse_list(lex_ident)), + wsoc0, + dot, )), - wsoc1, - opt(parse_list(lex_ident)), - wsoc0, - expect( - report_label(dot, ParserLabel::Dot), - "missing `.`", - Token { - kind: TokenKind::Error, - span: Span::new("ERROR! missing `.`"), - }, - input.parser_state, - ), - ))(input) + )(input) .map( |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { ( @@ -3063,9 +3205,9 @@ pub mod new { } // /// Parse a list of `ident1, ident2, …` - // fn parse_identifier_list<'a, 'e>( - // input: Input<'a, 'e>, - // ) -> IResult, List<'a, Token<'a>>> { + // fn parse_identifier_list<'a, 's, E: ParseError> + ContextError>>( + // input: Input<'a, 's>, + // ) -> IResult, List<'a, Token<'a>>, E> { // pair( // lex_ident, // many0(tuple(( @@ -3087,92 +3229,119 @@ pub mod new { // }) // } - fn parse_list<'a, 'e, T>( - parse_t: fn(Input<'a, 'e>) -> IResult, T>, - ) -> impl Fn(Input<'a, 'e>) -> IResult, List<'a, T>> { - move |input: Input<'a, 'e>| { - pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t))))(input).map( - |(rest_input, (first, rest))| { - ( - rest_input, - List { - span: outer_span(input.input, rest_input.input), - first, - rest: if rest.is_empty() { None } else { Some(rest) }, - }, - ) - }, - ) + fn parse_list<'a, 's, T, E: ParseError> + ContextError>>( + parse_t: fn(Input<'a, 's>) -> IResult, T, E>, + ) -> impl Fn(Input<'a, 's>) -> IResult, List<'a, T>, E> { + move |input: Input<'a, 's>| { + context( + "list", + pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t)))), + )(input) + .map(|(rest_input, (first, rest))| { + ( + rest_input, + List { + span: outer_span(input.input, rest_input.input), + first, + rest: if rest.is_empty() { None } else { Some(rest) }, + }, + ) + }) } } /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - alt((parse_normal_atom, parse_infix_atom, parse_map_atom))(input) + fn parse_head_atoms<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context( + "rule head atoms", + alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), + )(input) } /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - alt(( - parse_normal_atom, - parse_negative_atom, - parse_infix_atom, - parse_map_atom, - ))(input) + fn parse_body_atoms<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context( + "rule body atoms", + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, + )), + )(input) } /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - parse_named_tuple(input) + fn parse_normal_atom<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context("positive atom", parse_named_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - pair(tilde, parse_named_tuple)(input).map(|(rest_input, (tilde, named_tuple))| { - ( - rest_input, - Atom::Negative { - span: outer_span(input.input, rest_input.input), - neg: tilde, - atom: named_tuple, - }, - ) - }) - } - - /// Parse an "infix atom" of the form `term1 term2`. - /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { - tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term))(input).map( - |(rest_input, (lhs, ws1, operation, ws2, rhs))| { + fn parse_negative_atom<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context("negative atom", pair(tilde, parse_named_tuple))(input).map( + |(rest_input, (tilde, named_tuple))| { ( rest_input, - Atom::InfixAtom { + Atom::Negative { span: outer_span(input.input, rest_input.input), - lhs, - ws1, - operation, - ws2, - rhs, + neg: tilde, + atom: named_tuple, }, ) }, ) } + /// Parse an "infix atom" of the form `term1 term2`. + /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. + fn parse_infix_atom<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { + context( + "infix atom", + tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), + )(input) + .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { + ( + rest_input, + Atom::InfixAtom { + span: outer_span(input.input, rest_input.input), + lhs, + ws1, + operation, + ws2, + rhs, + }, + ) + }) + } + /// Parse a tuple with an optional name, like `ident(term1, term2)` /// or just `(int, int, skip)`. - fn parse_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { - tuple(( - opt(lex_ident), - wsoc0, - open_paren, - wsoc0, - opt(parse_list(parse_term)), - wsoc0, - report_label(close_paren, ParserLabel::CloseParen), - ))(input) + fn parse_tuple<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Tuple<'a>, E> { + context( + "tuple", + tuple(( + opt(lex_ident), + wsoc0, + open_paren, + wsoc0, + opt(parse_list(parse_term)), + wsoc0, + close_paren, + )), + )(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( @@ -3194,16 +3363,21 @@ pub mod new { /// Parse a named tuple. This function is like `parse_tuple` with the difference, /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple<'a, 'e>(input: Input<'a, 'e>) -> IResult, Tuple<'a>> { - tuple(( - lex_ident, - wsoc0, - report_label(open_paren, ParserLabel::OpenParen), - wsoc0, - opt(parse_list(parse_term)), - wsoc0, - report_label(close_paren, ParserLabel::CloseParen), - ))(input) + fn parse_named_tuple<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Tuple<'a>, E> { + context( + "named tuple", + tuple(( + lex_ident, + wsoc0, + open_paren, + wsoc0, + opt(parse_list(parse_term)), + wsoc0, + close_paren, + )), + )(input) .map( |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { ( @@ -3225,16 +3399,21 @@ pub mod new { /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. /// Inside the curly braces ist a list of pairs. - fn parse_map<'a, 'e>(input: Input<'a, 'e>) -> IResult, Map<'a>> { - tuple(( - opt(lex_ident), - wsoc0, - open_brace, - wsoc0, - opt(parse_list(parse_pair)), - wsoc0, - report_label(close_brace, ParserLabel::CloseBrace), - ))(input) + fn parse_map<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Map<'a>, E> { + context( + "map", + tuple(( + opt(lex_ident), + wsoc0, + open_brace, + wsoc0, + opt(parse_list(parse_pair)), + wsoc0, + close_brace, + )), + )(input) .map( |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { ( @@ -3255,15 +3434,17 @@ pub mod new { } /// Parse a map in an atom position. - fn parse_map_atom<'a, 'e>(input: Input<'a, 'e>) -> IResult, Atom<'a>> { + fn parse_map_atom<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Atom<'a>, E> { parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) } // /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - // fn parse_pair_list<'a, 'e, E: ParseError> + ContextError>>( - // input: Input<'a, 'e>, + // fn parse_pair_list<'a, 's, E: ParseError> + ContextError>>( + // input: Input<'a, 's>, // state: Errors, - // ) -> IResult, Option, Term<'a>>>>, E> { + // ) -> IResult, Option, Term<'a>>>>, E> { // context( // "parse pair list", // opt(pair( @@ -3293,36 +3474,31 @@ pub mod new { // } /// Parse a pair of the form `key = value`. - fn parse_pair<'a, 'e>( - input: Input<'a, 'e>, - ) -> IResult, Pair<'a, Term<'a>, Term<'a>>> { - tuple(( - parse_term, - wsoc0, - report_label(equal, ParserLabel::Equal), - wsoc0, - parse_term, - ))(input) - .map(|(rest_input, (key, ws1, equal, ws2, value))| { - ( - rest_input, - Pair { - span: outer_span(input.input, rest_input.input), - key, - ws1, - equal, - ws2, - value, - }, - ) - }) + fn parse_pair<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Pair<'a, Term<'a>, Term<'a>>, E> { + context("pair", tuple((parse_term, wsoc0, equal, wsoc0, parse_term)))(input).map( + |(rest_input, (key, ws1, equal, ws2, value))| { + ( + rest_input, + Pair { + span: outer_span(input.input, rest_input.input), + key, + ws1, + equal, + ws2, + value, + }, + ) + }, + ) } // /// Parse a list of terms of the form `term1, term2, …`. - // fn parse_term_list<'a, 'e, E: ParseError> + ContextError>>( - // input: Input<'a, 'e>, + // fn parse_term_list<'a, 's, E: ParseError> + ContextError>>( + // input: Input<'a, 's>, // state: Errors, - // ) -> IResult, List<'a, Term<'a>>, E> { + // ) -> IResult, List<'a, Term<'a>>, E> { // context( // "parse term list", // pair( @@ -3350,39 +3526,50 @@ pub mod new { /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - report_error(alt(( - parse_binary_term, - parse_tuple_term, - // parse_unary_prefix_term, - parse_map_term, - parse_primitive_term, - parse_variable, - parse_existential, - parse_aggregation_term, - parse_blank, - )))(input) + fn parse_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "term", + alt(( + parse_binary_term, + parse_tuple_term, + // parse_unary_prefix_term, + parse_map_term, + parse_primitive_term, + parse_variable, + parse_existential, + parse_aggregation_term, + parse_blank, + )), + )(input) } /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - report_error(alt(( - parse_rdf_literal, - parse_ident, - parse_iri, - parse_number, - parse_string, - )))(input) + fn parse_primitive_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "primitive term", + alt(( + parse_rdf_literal, + parse_ident, + parse_iri, + parse_number, + parse_string, + )), + )(input) .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) } /// Parse a rdf literal e.g. "2023-06-19"^^ - fn parse_rdf_literal<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { - tuple(( - lex_string, - recognize(pair(caret, caret)), - report_label(lex_iri, ParserLabel::Iri), - ))(input) + fn parse_rdf_literal<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { + context( + "RDF Literal", + tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), + )(input) .map(|(rest_input, (string, carets, iri))| { ( rest_input, @@ -3399,26 +3586,37 @@ pub mod new { }) } - fn parse_ident<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + fn parse_ident<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { lex_ident(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) } - fn parse_iri<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + fn parse_iri<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) } - fn parse_number<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { - report_error(alt((parse_decimal, parse_integer)))(input) + fn parse_number<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { + context("number", alt((parse_decimal, parse_integer)))(input) } - fn parse_decimal<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { - tuple(( - opt(alt((plus, minus))), - opt(lex_number), - dot, - lex_number, - opt(parse_exponent), - ))(input) + fn parse_decimal<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { + context( + "decimal", + tuple(( + opt(alt((plus, minus))), + opt(lex_number), + dot, + lex_number, + opt(parse_exponent), + )), + )(input) .map(|(rest_input, (sign, before, dot, after, exponent))| { ( rest_input, @@ -3434,37 +3632,44 @@ pub mod new { }) } - fn parse_integer<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { - pair(opt(alt((plus, minus))), lex_number)(input).map(|(rest_input, (sign, number))| { - ( - rest_input, - Primitive::Number { - span: outer_span(input.input, rest_input.input), - sign, - before: None, - dot: None, - after: number, - exponent: None, - }, - ) - }) + fn parse_integer<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { + context("integer", pair(opt(alt((plus, minus))), lex_number))(input).map( + |(rest_input, (sign, number))| { + ( + rest_input, + Primitive::Number { + span: outer_span(input.input, rest_input.input), + sign, + before: None, + dot: None, + after: number, + exponent: None, + }, + ) + }, + ) } - fn parse_exponent<'a, 'e>(input: Input<'a, 'e>) -> IResult, Exponent<'a>> { - tuple(( - exp, - opt(alt((plus, minus))), - report_label(lex_number, ParserLabel::Number), - ))(input) + fn parse_exponent<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Exponent<'a>, E> { + context( + "exponent", + tuple((exp, opt(alt((plus, minus))), lex_number)), + )(input) .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) } - fn parse_string<'a, 'e>(input: Input<'a, 'e>) -> IResult, Primitive<'a>> { + fn parse_string<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> { lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) } // /// Parse an unary term. - // fn parse_unary_prefix_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { + // fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { // pair(lex_unary_prefix_operators, parse_term)(input).map( // |(rest_input, (operation, term))| { // ( @@ -3480,10 +3685,15 @@ pub mod new { // } /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - pair( - parse_arithmetic_product, - opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), + fn parse_binary_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "binary term", + pair( + parse_arithmetic_product, + opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), + ), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3506,15 +3716,24 @@ pub mod new { /// Parse an arithmetic product, i.e. an expression involving /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - pair( - parse_arithmetic_factor, - opt(tuple(( - wsoc0, - alt((star, slash)), - wsoc0, - parse_arithmetic_product, - ))), + fn parse_arithmetic_product< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "arithmetic product", + pair( + parse_arithmetic_factor, + opt(tuple(( + wsoc0, + alt((star, slash)), + wsoc0, + parse_arithmetic_product, + ))), + ), )(input) .map(|(rest_input, (lhs, opt))| { ( @@ -3535,14 +3754,23 @@ pub mod new { }) } - fn parse_arithmetic_factor<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - report_error(alt(( - parse_tuple_term, - parse_aggregation_term, - parse_primitive_term, - parse_variable, - parse_existential, - )))(input) + fn parse_arithmetic_factor< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "arithmetic factor", + alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + )), + )(input) } // fn fold_arithmetic_expression<'a>( @@ -3567,15 +3795,24 @@ pub mod new { // } /// Parse an aggregation term of the form `#sum(…)`. - fn parse_aggregation_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - tuple(( - recognize(pair(hash, lex_ident)), - report_label(open_paren, ParserLabel::OpenParen), - wsoc0, - parse_list(parse_term), - wsoc0, - report_label(close_paren, ParserLabel::CloseParen), - ))(input) + fn parse_aggregation_term< + 'a, + 's, + E: ParseError> + ContextError>, + >( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "aggregation term", + tuple(( + recognize(pair(hash, lex_ident)), + open_paren, + wsoc0, + parse_list(parse_term), + wsoc0, + close_paren, + )), + )(input) .map( |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { ( @@ -3598,28 +3835,38 @@ pub mod new { } /// Parse a `_` - fn parse_blank<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - underscore(input).map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) + fn parse_blank<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context("blank", underscore)(input) + .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) } /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. - fn parse_tuple_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - parse_tuple(input) + fn parse_tuple_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context("tuple term", parse_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } /// Parse a map as a term. - fn parse_map_term<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - parse_map(input).map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) + fn parse_map_term<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context("map term", parse_map)(input) + .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } /// Parse a variable. - fn parse_variable<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - recognize(pair( - question_mark, - report_label(lex_ident, ParserLabel::Identifier), - ))(input) + fn parse_variable<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "universal variable", + recognize(pair(question_mark, lex_ident)), + )(input) .map(|(rest_input, var)| { ( rest_input, @@ -3632,11 +3879,13 @@ pub mod new { } /// Parse an existential variable. - fn parse_existential<'a, 'e>(input: Input<'a, 'e>) -> IResult, Term<'a>> { - recognize(pair( - exclamation_mark, - report_label(lex_ident, ParserLabel::Identifier), - ))(input) + fn parse_existential<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Term<'a>, E> { + context( + "existential variable", + recognize(pair(exclamation_mark, lex_ident)), + )(input) .map(|(rest_input, existential)| { ( rest_input, @@ -3648,27 +3897,26 @@ pub mod new { }) } - // Order of functions is important, because of ordered choice and no backtracking + // Order of parser compinator is important, because of ordered choice and no backtracking /// Parse the operator for an infix atom. - fn parse_operation_token<'a, 'e>(input: Input<'a, 'e>) -> IResult, Token<'a>> { - report_error(alt(( - less_equal, - greater_equal, - equal, - unequal, - less, - greater, - )))(input) + fn parse_operation_token<'a, 's, E: ParseError> + ContextError>>( + input: Input<'a, 's>, + ) -> IResult, Token<'a>, E> { + context( + "operators", + alt((less_equal, greater_equal, equal, unequal, less, greater)), + )(input) } #[cfg(test)] mod tests { use std::{ cell::RefCell, - collections::{HashMap, HashSet}, + collections::{BTreeMap, HashMap, HashSet}, }; use nom::error::{convert_error, VerboseError}; + use nom_supreme::error::ErrorTree; use super::*; use crate::io::{ @@ -3692,9 +3940,9 @@ pub mod new { }; } - fn convert_located_span_error<'a, 'e>( + fn convert_located_span_error<'a, 's>( input: Span<'a>, - err: VerboseError>, + err: VerboseError>, ) -> String { convert_error( *(input.fragment()), @@ -3715,18 +3963,14 @@ pub mod new { // }; let input = Span::new("a(B,C)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program(input).0, + parse_program::>(input).0, Program { span: input.input, tl_doc_comment: None, @@ -3786,18 +4030,14 @@ pub mod new { r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, ); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program(input).0, + parse_program::>(input).0, Program { tl_doc_comment: None, span: input.input, @@ -4070,18 +4310,14 @@ pub mod new { fn fact_with_ws() { let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program(input).0, + parse_program::>(input).0, Program { span: input.input, tl_doc_comment: None, @@ -4197,17 +4433,13 @@ limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, ); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let errors = ParserState { - errors: &refcell, - labels: &labels, - }; + let errors = ParserState { errors: &refcell }; let input = Input { input, parser_state: errors, }; // let ast = parse_program::>(input); - let (ast, _) = parse_program(input); + let (ast, _) = parse_program::>(input); println!("{}", ast); assert_eq!( { @@ -4229,29 +4461,26 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters let str = std::fs::read_to_string(file).expect("testfile not found"); let input = Span::new(str.as_str()); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_program::>(input); - let (ast, errors) = parse_program(input); + let (ast, errors) = parse_program::>>(input); println!("{}\n\n{:#?}", ast, errors); - let mut error_map: HashMap> = HashMap::new(); + let mut error_map: BTreeMap> = BTreeMap::new(); for error in errors { - if let Some(set) = error_map.get_mut(&error.0) { - set.insert(error.1); + if let Some(set) = error_map.get_mut(&error.pos) { + set.insert(error.msg); } else { let mut set = HashSet::new(); - set.insert(error.1); - error_map.insert(error.0, set); + set.insert(error.msg); + error_map.insert(error.pos, set); }; } dbg!(&error_map); + println!("\n\n"); // assert!(false); let lines: Vec<_> = str.lines().collect(); for (pos, str) in error_map { @@ -4271,17 +4500,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("42"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Primitive(Primitive::Number { @@ -4298,17 +4523,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("35+7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4339,17 +4560,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("6*7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4380,17 +4597,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("49-7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4421,17 +4634,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("84/2"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4462,17 +4671,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("5*7+7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4517,17 +4722,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("7+5*7"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4572,17 +4773,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("(15+3*2-(7+35)*8)/3"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); // match result { // Ok(ast) => { @@ -4744,17 +4941,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("15+3*2-(7+35)*8/3"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // let result = parse_term::>(input); - let result = parse_term(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4898,17 +5091,15 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters { let input = Span::new("e42"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; // parse_exponent::>(input) - parse_exponent(input).unwrap().1 + parse_exponent::>(input) + .unwrap() + .1 }, Exponent { e: T! {TokenKind::Exponent, 0,1,"e"}, @@ -4922,16 +5113,12 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters fn missing_dot() { let input = Span::new("some(Fact\nSome other, Fact.\nthird(fact)."); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, - }; + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; - let result = parse_program(input); + let result = parse_program::>(input); println!("{}\n\n{:#?}", result.0, result.1); // assert!(false); } @@ -4940,17 +5127,27 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters fn wsoc() { let input = Span::new(" \t\n % first comment\n % second comment\n"); let refcell = RefCell::new(Vec::new()); - let labels = RefCell::new(Vec::new()); - let parser_state = ParserState { - errors: &refcell, - labels: &labels, + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, }; + dbg!(wsoc0::>(input)); + dbg!(wsoc1::>(input)); + } + + #[test] + fn debug_test() { + let str = "asd"; + let input = Span::new(str); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; - dbg!(wsoc0(input)); - dbg!(wsoc1(input)); + let result = parse_program::>>(input); + dbg!(&result); } } } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 30d3a8aa6..41c767ec7 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -32,13 +32,37 @@ pub trait AstNode: std::fmt::Debug + Display + Sync { fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode>; } -// TODO: tidy up PartialOrd and Ord implementation -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, Hash)] pub struct Position { pub offset: usize, pub line: u32, pub column: u32, } +impl PartialEq for Position { + fn eq(&self, other: &Self) -> bool { + self.offset.eq(&other.offset) + } +} +impl Eq for Position {} +impl PartialOrd for Position { + fn partial_cmp(&self, other: &Self) -> Option { + self.offset.partial_cmp(&other.offset) + } +} +impl Ord for Position { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.offset.cmp(&other.offset) + } +} +impl Default for Position { + fn default() -> Self { + Position { + offset: 0, + line: 1, + column: 1, + } + } +} /// Whitespace or Comment token #[derive(Debug, Clone, PartialEq)] @@ -227,9 +251,16 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { } mod test { - - - + use super::*; + use super::{ + atom::Atom, + directive::Directive, + program::Program, + statement::Statement, + term::{Primitive, Term}, + tuple::Tuple, + }; + use crate::io::lexer::{Span, TokenKind}; macro_rules! s { ($offset:literal,$line:literal,$str:literal) => { diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 6dea14af0..7031f1d26 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -805,35 +805,23 @@ where } } -pub(crate) trait ToRange { - fn to_range(&self) -> Range; +impl nom_greedyerror::Position for Input<'_, '_> { + fn position(&self) -> usize { + nom_greedyerror::Position::position(&self.input) + } } -#[derive(Debug, Clone, Copy, PartialEq)] -pub(crate) enum ParserLabel { - Rule, - Fact, - Directive, - Dot, - Arrow, - // Head, - // Body, - Comma, - Iri, - Prefix, - Identifier, - OpenParen, - CloseParen, - OpenBrace, - CloseBrace, - OpenBracket, - ClosePracket, - Equal, - Number, -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub(crate) struct Label { - pub(crate) label: ParserLabel, - pub(crate) pos: Position, +impl std::fmt::Display for Input<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "line {}, column {}", + self.input.location_line(), + self.input.get_utf8_column() + ) + } +} + +pub(crate) trait ToRange { + fn to_range(&self) -> Range; } diff --git a/testfile2.rls b/testfile2.rls index 32ee5c5af..95963d67b 100644 --- a/testfile2.rls +++ b/testfile2.rls @@ -1,11 +1,14 @@ % Facts: -father(alice, bob). -mother(bob, carla). -father(bob, darius). -mother(alice, carla). +father( % father predicate means, that 'alice has father bob' + alice, + bob). +mother bob, carla). +father(bob darius). +mother(alice, carla . % Rules: -parent(?X, ?Y) :- mother(?X, ?Y). +parent(?X, ?Y) :- mother(?X, ?Y) parent(?X, ?Y) :- father(?X, ?Y). -parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . +parent( ?X , ?Y :- ~sibling( ?X , ?Y ) . + From b671014870cf0e43ae6220e0d38787510b40f1cc Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 29 May 2024 14:57:56 +0200 Subject: [PATCH 101/214] Fix errors caused by Error struct change --- nemo-language-server/src/language_server.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 35acce8c6..e39252745 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -82,21 +82,21 @@ impl Backend { let diagnostics = errors .into_iter() .map(|error| Diagnostic { - message: error.1, + message: error.msg, range: Range::new( line_col_to_position( &line_index, LineCol { - line: error.0.line - 1, - col: error.0.column - 1, + line: error.pos.line - 1, + col: error.pos.column - 1, }, ) .unwrap(), line_col_to_position( &line_index, LineCol { - line: error.0.line - 1, - col: error.0.column - 1 + 1, + line: error.pos.line - 1, + col: error.pos.column - 1 + 1, }, ) .unwrap(), From d42fab042603ca5b11e7090035279d2925fb5c2f Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 31 May 2024 08:53:35 +0200 Subject: [PATCH 102/214] Make context parser compinator input generic and add Context enum --- nemo/src/io/lexer.rs | 325 ++++++++++++++------- nemo/src/io/parser.rs | 553 ++++++++++++++++++------------------ nemo/src/io/parser/types.rs | 6 + 3 files changed, 492 insertions(+), 392 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index cd2aa1cd9..f976edb49 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -2,40 +2,132 @@ use std::{cell::RefCell, ops::Range}; +use super::parser::new::context; use nom::{ branch::alt, bytes::complete::{is_not, tag, take, take_till}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, combinator::{all_consuming, cut, map, opt, recognize}, - error::{context, ContextError, ErrorKind, ParseError}, + error::ParseError, multi::{many0, many1}, sequence::{delimited, pair, tuple}, IResult, }; -use nom_greedyerror::GreedyError; use nom_locate::LocatedSpan; +use nom_supreme::{context::ContextError, error::GenericErrorTree}; use tower_lsp::lsp_types::SymbolKind; -#[derive(Debug)] -pub(crate) enum NewParseError { - MissingWhitespace, - Rule, +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) enum Context { + Tag(&'static str), + Exponent, + Punctuations, + Operators, + Identifier, + Iri, + Number, + String, + Comment, + DocComment, + TlDocComment, + Comments, + Whitespace, + Illegal, + Program, Fact, + Rule, + RuleHead, + RuleBody, Directive, - Comment, - SyntaxError(String), - MissingTlDocComment, + DirectiveBase, + DirectivePrefix, + DirectiveImport, + DirectiveExport, + DirectiveOutput, + List, + HeadAtoms, + BodyAtoms, + PositiveAtom, + NegativeAtom, + InfixAtom, + Tuple, + NamedTuple, + Map, + Pair, + Term, + TermPrivimitive, + TermBinary, + TermAggregation, + TermTuple, + TermMap, + RdfLiteral, + Decimal, + Integer, + ArithmeticProduct, + ArithmeticFactor, + Blank, + UniversalVariable, + ExistentialVariable, } -impl ParseError> for NewParseError { - fn from_error_kind(input: Input, kind: nom::error::ErrorKind) -> Self { - NewParseError::SyntaxError(kind.description().to_string()) - } - - fn append(_: Input, _: nom::error::ErrorKind, other: Self) -> Self { - other +impl std::fmt::Display for Context { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Context::Tag(c) => write!(f, "{}", c), + Context::Exponent => write!(f, "exponent"), + Context::Punctuations => write!(f, "punctuations"), + Context::Operators => write!(f, "operators"), + Context::Identifier => write!(f, "identifier"), + Context::Iri => write!(f, "lex iri"), + Context::Number => write!(f, "lex number"), + Context::String => write!(f, "lex string"), + Context::Comment => write!(f, "lex comment"), + Context::DocComment => write!(f, "lex documentation comment"), + Context::TlDocComment => write!(f, "lex top level documentation comment"), + Context::Comments => write!(f, "comments"), + Context::Whitespace => write!(f, "lex whitespace"), + Context::Illegal => write!(f, "lex illegal character"), + Context::Program => write!(f, "program"), + Context::Fact => write!(f, "fact"), + Context::Rule => write!(f, "rule"), + Context::RuleHead => write!(f, "rule head"), + Context::RuleBody => write!(f, "rule body"), + Context::Directive => write!(f, "directive"), + Context::DirectiveBase => write!(f, "base directive"), + Context::DirectivePrefix => write!(f, "prefix directive"), + Context::DirectiveImport => write!(f, "import directive"), + Context::DirectiveExport => write!(f, "export directive"), + Context::DirectiveOutput => write!(f, "output directive"), + Context::List => write!(f, "list"), + Context::HeadAtoms => write!(f, "head atoms"), + Context::BodyAtoms => write!(f, "body atoms"), + Context::PositiveAtom => write!(f, "positive atom"), + Context::NegativeAtom => write!(f, "negative atom"), + Context::InfixAtom => write!(f, "infix atom"), + Context::Tuple => write!(f, "tuple"), + Context::NamedTuple => write!(f, "named tuple"), + Context::Map => write!(f, "map"), + Context::Pair => write!(f, "pair"), + Context::Term => write!(f, "term"), + Context::TermPrivimitive => write!(f, "primitive term"), + Context::TermBinary => write!(f, "binary term"), + Context::TermAggregation => write!(f, "aggreation term"), + Context::TermTuple => write!(f, "tuple term"), + Context::TermMap => write!(f, "map term"), + Context::RdfLiteral => write!(f, "rdf literal"), + Context::Decimal => write!(f, "decimal"), + Context::Integer => write!(f, "integer"), + Context::ArithmeticProduct => write!(f, "arithmetic product"), + Context::ArithmeticFactor => write!(f, "arithmetic factor"), + Context::Blank => write!(f, "blank"), + Context::UniversalVariable => write!(f, "universal variable"), + Context::ExistentialVariable => write!(f, "existential variable"), + } } } +pub(crate) type ErrorTree = + GenericErrorTree>; + use super::parser::{ ast::{AstNode, Position}, types::{Input, ToRange}, @@ -45,7 +137,7 @@ use super::parser::{ pub struct Error { pub pos: Position, pub msg: String, - pub context: Vec<&'static str>, + pub context: Vec, } #[derive(Debug, Clone, Copy, PartialEq)] @@ -317,16 +409,16 @@ impl<'a> AstNode for Token<'a> { macro_rules! syntax { ($func_name: ident, $tag_str: literal, $token: expr) => { - pub(crate) fn $func_name< - 'a, - 's, - E: ParseError> + ContextError>, - >( + pub(crate) fn $func_name<'a, 's, E>( input: Input<'a, 's>, - ) -> IResult, Token<'a>, E> { - map(context($tag_str, tag($tag_str)), |span: Input| { - Token::new($token, span.input) - })(input) + ) -> IResult, Token<'a>, E> + where + E: ParseError> + ContextError, Context>, + { + map( + context(Context::Tag($tag_str), tag($tag_str)), + |span: Input| Token::new($token, span.input), + )(input) } }; } @@ -351,21 +443,21 @@ syntax!(at, "@", TokenKind::At); syntax!(exp_lower, "e", TokenKind::Exponent); syntax!(exp_upper, "E", TokenKind::Exponent); -pub(crate) fn exp<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { - context("lex exponent", alt((exp_lower, exp_upper)))(input) +pub(crate) fn exp<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context(Context::Exponent, alt((exp_lower, exp_upper)))(input) } -pub(crate) fn lex_punctuations< - 'a, - 's, - E: ParseError> + ContextError>, ->( +pub(crate) fn lex_punctuations<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "lex punctuations", + Context::Punctuations, alt(( arrow, open_paren, @@ -399,11 +491,12 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_operators<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "lex operators", + Context::Operators, alt(( less_equal, greater_equal, @@ -425,11 +518,12 @@ pub(crate) fn lex_operators<'a, 's, E: ParseError> + ContextError< // alt((plus, minus))(input) // } -pub(crate) fn lex_ident<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ let (rest_input, ident) = context( - "lex identifier", + Context::Identifier, recognize(pair( alpha1, many0(alt((alphanumeric1, tag("_"), tag("-")))), @@ -446,97 +540,107 @@ pub(crate) fn lex_ident<'a, 's, E: ParseError> + ContextError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "lex iri", + Context::Iri, recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) } -pub(crate) fn lex_number<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { - context("lex number", digit1)(input) +pub(crate) fn lex_number<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context(Context::Number, digit1)(input) .map(|(rest_input, result)| (rest_input, Token::new(TokenKind::Number, result.input))) } -pub(crate) fn lex_string<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_string<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "lex string", + Context::String, recognize(delimited(tag("\""), is_not("\""), cut(tag("\"")))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) } -pub(crate) fn lex_comment<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_comment<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "comment", + Context::Comment, recognize(tuple((tag("%"), many0(is_not("\n")), line_ending))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) } -pub(crate) fn lex_doc_comment< - 'a, - 's, - E: ParseError> + ContextError>, ->( +pub(crate) fn lex_doc_comment<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "documentation comment", + Context::DocComment, recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending)))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) } -pub(crate) fn lex_toplevel_doc_comment< - 'a, - 's, - E: ParseError> + ContextError>, ->( +pub(crate) fn lex_toplevel_doc_comment<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "top level documentation comment", + Context::TlDocComment, recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending)))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) } -pub(crate) fn lex_comments<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { +pub(crate) fn lex_comments<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ context( - "comments", + Context::Comments, alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment)), )(input) } -pub(crate) fn lex_whitespace<'a, 's, E: ParseError> + ContextError>>( +pub(crate) fn lex_whitespace<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { - context("whitespace", multispace1)(input) +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context(Context::Whitespace, multispace1)(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) } -pub(crate) fn lex_illegal<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> { - context("illegal character", take(1usize))(input) +pub(crate) fn lex_illegal<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context(Context::Illegal, take(1usize))(input) .map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) } -pub(crate) fn lex_tokens<'a, 's, E: ParseError> + ContextError>>( +pub(crate) fn lex_tokens<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Vec>, E> { +) -> IResult, Vec>, E> +where + E: ParseError> + ContextError, Context>, +{ all_consuming(many0(alt(( lex_iri, lex_operators, @@ -554,9 +658,10 @@ pub(crate) fn lex_tokens<'a, 's, E: ParseError> + ContextError> + ContextError>>( - input: Input<'a, 's>, -) -> (Input<'a, 's>, Token<'a>) { +pub(crate) fn skip_to_dot<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Token<'a>) +where + E: ParseError> + ContextError, Context>, +{ let (rest_input, error_input) = recognize(pair( take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), opt(tag(".")), @@ -573,6 +678,8 @@ pub(crate) fn skip_to_dot<'a, 's, E: ParseError> + ContextError>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(Eof, 0, 1, "")] ) } @@ -611,7 +718,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] ) } @@ -627,7 +734,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Prefix, 1, 1, "prefix"), @@ -647,7 +754,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Output, 1, 1, "output"), @@ -667,7 +774,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Import, 1, 1, "import"), @@ -687,7 +794,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Export, 1, 1, "export"), @@ -707,7 +814,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(At, 0, 1, "@"), T!(Ident, 1, 1, "baseA"), @@ -744,7 +851,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "P"), T!(OpenParen, 1, 1, "("), @@ -782,7 +889,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " "), T!(Comment, 4, 1, "% Some Comment\n"), @@ -804,7 +911,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "some_Ident"), T!(OpenParen, 10, 1, "("), @@ -829,7 +936,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Underscore, 0, 1, "_"), T!(Ident, 1, 1, "someIdent"), @@ -855,7 +962,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Eof, 31, 1, ""), @@ -874,7 +981,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Iri, 0, 1, ""), T!(Whitespace, 37, 1, "\n"), @@ -896,7 +1003,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "A"), T!(OpenParen, 1, 1, "("), @@ -936,7 +1043,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Comment, 0, 1, "%d4 this should be a comment,\n"), T!( @@ -962,7 +1069,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Ident, 0, 1, "somePred"), T!(OpenParen, 8, 1, "("), @@ -988,7 +1095,7 @@ mod tests { }; assert_eq!( // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, + lex_tokens::>(input).unwrap().1, vec![ T!(Whitespace, 0, 1, " \t \n\n\t \n"), T!(Eof, 12, 4, ""), @@ -1005,6 +1112,6 @@ mod tests { input, parser_state: errors, }; - dbg!(super::skip_to_dot::>(input)); + dbg!(super::skip_to_dot::>(input)); } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index a8d9f82e8..c6406aa2d 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2443,12 +2443,12 @@ pub mod new { exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, slash, - star, tilde, underscore, unequal, Error, NewParseError, ParserState, Span, Token, + star, tilde, underscore, unequal, Context, Error, ErrorTree, ParserState, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, cut, map, opt, recognize}; - use nom::error::{context, ContextError, ErrorKind, ParseError}; + use nom::error::{ErrorKind, ParseError}; use nom::sequence::{delimited, pair}; use nom::Parser; use nom::{ @@ -2458,8 +2458,8 @@ pub mod new { sequence::tuple, IResult, }; - use nom_greedyerror::GreedyError; - use nom_supreme::error::{ErrorTree, StackContext}; + use nom_supreme::{context::ContextError, error::StackContext}; + use sanitise_file_name::Stringy; fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { unsafe { @@ -2473,67 +2473,6 @@ pub mod new { } } - // fn ignore_ws_and_comments<'a, F, O, E: ParseError> + ContextError>>( - // inner: F, - // ) -> impl FnMut(Span<'a>) -> IResult, O, E> - // where - // F: Parser, O, E> + FnMut(Span<'a>) -> IResult, O, E>, - // { - // delimited( - // many0(alt((lex_whitespace, lex_comment))), - // inner, - // many0(alt((lex_whitespace, lex_comment))), - // ) - // } - - // fn expect<'a, F, E, T>( - // parser: F, - // error_msg: E, - // state: Errors, - // ) -> impl Fn(Span<'a>) -> IResult, T> - // where - // F: Fn(Span<'a>) -> IResult, T>, - // E: ToString, - // { - // move |input| match parser(input) { - // Ok((rest_input, output)) => Ok((rest_input, output)), - // Err(nom::Err::Error(nom::error::Error { input, .. })) - // | Err(nom::Err::Failure(nom::error::Error { input, .. })) => { - // let err = crate::io::lexer::Error(to_range(input), error_msg.to_string()); - // state.report_error(err); - // Ok(( - // input, - // Token { - // kind: TokenKind::Error, - // span: outer_span(input, rest_input), - // }, - // )) - // } - // Err(err) => Err(err), - // } - // } - - // fn context<'a, 's, O, E>( - // mut f: impl FnMut(Input<'a, 's>) -> IResult, O, E>, - // context: ParserContext, - // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - // move |input| { - // let mut labels = *input.parser_state.labels.borrow_mut(); - // if let None = labels { - // labels = Some(Context { - // context: context.clone(), - // label: None, - // inner: vec![], - // }); - // labels - // } else { - // dbg!(&labels); - // labels - // }; - // f(input) - // } - // } - fn expect<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( mut parser: F, error_msg: impl ToString, @@ -2562,7 +2501,7 @@ pub mod new { fn recover<'a, 's, E>( mut parser: impl Parser, Statement<'a>, E>, error_msg: impl ToString, - context: &'static str, + context: Context, errors: ParserState<'s>, ) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { move |input: Input<'a, 's>| match parser.parse(input) { @@ -2578,7 +2517,7 @@ pub mod new { msg: error_msg.to_string(), context: vec![context], }; - errors.report_error(err); + // errors.report_error(err); let (rest_input, token) = skip_to_dot::>>(input); Ok((rest_input, Statement::Error(token))) } @@ -2586,91 +2525,8 @@ pub mod new { } } - // fn report_label<'a, 's, O, E>( - // mut parser: impl nom::Parser, O, E>, - // label: ParserLabel, - // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - // move |input| match parser.parse(input) { - // Ok(result) => Ok(result), - // Err(err) => { - // match err { - // nom::Err::Incomplete(_) => (), - // nom::Err::Error(_) | nom::Err::Failure(_) => { - // if !input.input.is_empty() { - // input.parser_state.labels.borrow_mut().push(Label { - // label, - // pos: Position { - // offset: input.input.location_offset(), - // line: input.input.location_line(), - // column: input.input.get_utf8_column() as u32, - // }, - // }) - // }; - // } - // }; - // Err(err) - // } - // } - // } - - // fn report_error<'a, 's, O, E>( - // mut parser: impl nom::Parser, O, E>, - // ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - // move |input| match parser.parse(input) { - // Ok(result) => { - // input.parser_state.labels.borrow_mut().inner.clear(); - // Ok(result) - // } - // Err(err) => { - // match err { - // nom::Err::Incomplete(_) => (), - // nom::Err::Error(_) | nom::Err::Failure(_) => { - // // println!("LABELS BEFORE REPORT!!!!: {:#?}", input.parser_state.labels); - // let mut furthest_errors: Vec = Vec::new(); - // let labels = - // as Clone>::clone(&input.parser_state.labels.borrow()) - // .into_iter(); - // for label in labels { - // if let Some(last) = furthest_errors.last() { - // if label.pos.offset >= (*last).0.offset { - // let err = - // Error(label.pos, format!("expected {:?}", label.label)); - // furthest_errors.push(err); - // } - // } else { - // let err = Error(label.pos, format!("expected {:?}", label.label)); - // furthest_errors.push(err); - // }; - // } - // for err in furthest_errors { - // input.parser_state.report_error(err) - // } - // // for label in furthest_errors { - // // println!( - // // "Syntax error: Parser got stuck at line {} column {}, expected {:?}", - // // label.position.line, label.position.column, label.label - // // ); - // // println!( - // // "\n{}", - // // input - // // .parser_state - // // .source - // // .fragment() - // // .lines() - // // .collect::>() - // // .get((label.position.line - 1) as usize) - // // .unwrap() - // // ); - // // println!("{1:>0$}", label.position.column, "^"); - // // } - // } - // }; - // Err(err) - // } - // } - // } fn report_error<'a, 's, O>( - mut parser: impl nom::Parser, O, ErrorTree>>, + mut parser: impl Parser, O, ErrorTree>>, ) -> impl FnMut(Input<'a, 's>) -> IResult, O, ErrorTree>> { move |input| match parser.parse(input) { Ok(result) => Ok(result), @@ -2681,8 +2537,9 @@ pub mod new { match &e { nom::Err::Incomplete(_) => (), nom::Err::Error(err) | nom::Err::Failure(err) => { - let (deepest_pos, errors) = get_deepest_errors(err); + let (_deepest_pos, errors) = get_deepest_errors(err); for error in errors { + dbg!(&error); input.parser_state.report_error(error); } // let error = Error(deepest_pos, format!("")); @@ -2695,8 +2552,10 @@ pub mod new { } fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { + dbg!(&e); match e { ErrorTree::Base { location, kind } => { + dbg!(&kind); let span = location.input; let err_pos = Position { offset: span.location_offset(), @@ -2707,7 +2566,7 @@ pub mod new { err_pos, vec![Error { pos: err_pos, - msg: format!("{}", e), + msg: "".to_string(), context: Vec::new(), }], ) @@ -2716,17 +2575,30 @@ pub mod new { // let mut err_pos = Position::default(); match &**base { ErrorTree::Base { location, kind } => { + dbg!(&kind); let span = location.input; let err_pos = Position { offset: span.location_offset(), line: span.location_line(), column: span.get_utf8_column() as u32, }; + let mut msg = String::from(""); + for (_, context) in contexts { + match context { + StackContext::Kind(_) => todo!(), + StackContext::Context(c) => match c { + Context::Tag(t) => { + msg.push_str(t); + } + _ => (), + }, + } + } ( err_pos, vec![Error { pos: err_pos, - msg: format!("{}", base), + msg, context: context_strs(contexts), }], ) @@ -2769,21 +2641,39 @@ pub mod new { } } - fn context_strs( - contexts: &Vec<(Input<'_, '_>, StackContext<&'static str>)>, - ) -> Vec<&'static str> { + fn context_strs(contexts: &Vec<(Input<'_, '_>, StackContext)>) -> Vec { contexts .iter() .map(|(_, c)| match c { StackContext::Kind(k) => todo!(), - StackContext::Context(str) => *str, + StackContext::Context(c) => *c, }) .collect() } - fn wsoc0<'a, 's, E: ParseError> + ContextError>>( - input: Input<'a, 's>, - ) -> IResult, Option>, E> { + pub(crate) fn context<'a, 's, P, E, F, O>( + context: P, + mut f: F, + ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> + where + P: Clone, + F: Parser, O, E>, + E: ContextError, P>, + { + move |i| match f.parse(i.clone()) { + Ok(o) => Ok(o), + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Err(nom::Err::Error(e)) => Err(nom::Err::Error(E::add_context(i, context.clone(), e))), + Err(nom::Err::Failure(e)) => { + Err(nom::Err::Failure(E::add_context(i, context.clone(), e))) + } + } + } + + fn wsoc0<'a, 's, E>(input: Input<'a, 's>) -> IResult, Option>, E> + where + E: ParseError> + ContextError, Context>, + { many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { if vec.is_empty() { (rest_input, None) @@ -2799,7 +2689,7 @@ pub mod new { }) } - fn wsoc1<'a, 's, E: ParseError> + ContextError>>( + fn wsoc1<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Wsoc<'a>, E> { many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { @@ -2814,11 +2704,15 @@ pub mod new { } /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program<'a, 's, E: ParseError> + ContextError>>( + fn parse_program< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> (Program<'a>, Vec) { let result = context( - "program", + Context::Program, pair( opt(lex_toplevel_doc_comment::>>), many0(recover( @@ -2831,7 +2725,7 @@ pub mod new { parse_comment, ))), "failed to parse statement", - "program", + Context::Program, input.parser_state, )), ), @@ -2868,26 +2762,34 @@ pub mod new { } /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace<'a, 's, E: ParseError> + ContextError>>( + fn parse_whitespace< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) } /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment<'a, 's, E: ParseError> + ContextError>>( + fn parse_comment< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) } /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a, 's, E: ParseError> + ContextError>>( + fn parse_fact<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { // dbg!(&input.parser_state.labels); context( - "fact", + Context::Fact, tuple((opt(lex_doc_comment), parse_normal_atom, wsoc0, dot)), )(input) .map(|(rest_input, (doc_comment, atom, ws, dot))| { @@ -2905,11 +2807,11 @@ pub mod new { } /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a, 's, E: ParseError> + ContextError>>( + fn parse_rule<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { context( - "rule", + Context::Rule, tuple(( opt(lex_doc_comment), parse_head, @@ -2942,25 +2844,29 @@ pub mod new { } /// Parse the head atoms of a rule. - fn parse_head<'a, 's, E: ParseError> + ContextError>>( + fn parse_head<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, List<'a, Atom<'a>>, E> { - context("rule head", parse_list(parse_head_atoms))(input) + context(Context::RuleHead, parse_list(parse_head_atoms))(input) } /// Parse the body atoms of a rule. - fn parse_body<'a, 's, E: ParseError> + ContextError>>( + fn parse_body<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, List<'a, Atom<'a>>, E> { - context("rule body", parse_list(parse_body_atoms))(input) + context(Context::RuleBody, parse_list(parse_body_atoms))(input) } /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive<'a, 's, E: ParseError> + ContextError>>( + fn parse_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Statement<'a>, E> { context( - "directive", + Context::Directive, alt(( parse_base_directive, parse_prefix_directive, @@ -2973,11 +2879,15 @@ pub mod new { } /// Parse the base directive. - fn parse_base_directive<'a, 's, E: ParseError> + ContextError>>( + fn parse_base_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "base directive", + Context::DirectiveBase, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3013,12 +2923,12 @@ pub mod new { fn parse_prefix_directive< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "prefix directive", + Context::DirectivePrefix, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3063,12 +2973,12 @@ pub mod new { fn parse_import_directive< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "import directive", + Context::DirectiveImport, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3114,12 +3024,12 @@ pub mod new { fn parse_export_directive< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "export directive", + Context::DirectiveExport, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3165,12 +3075,12 @@ pub mod new { fn parse_output_directive< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Directive<'a>, E> { context( - "output directive", + Context::DirectiveOutput, tuple(( opt(lex_doc_comment), recognize(pair( @@ -3205,7 +3115,7 @@ pub mod new { } // /// Parse a list of `ident1, ident2, …` - // fn parse_identifier_list<'a, 's, E: ParseError> + ContextError>>( + // fn parse_identifier_list<'a, 's, E: ParseError> + ContextError, Context>>( // input: Input<'a, 's>, // ) -> IResult, List<'a, Token<'a>>, E> { // pair( @@ -3229,12 +3139,17 @@ pub mod new { // }) // } - fn parse_list<'a, 's, T, E: ParseError> + ContextError>>( + fn parse_list< + 'a, + 's, + T, + E: ParseError> + ContextError, Context>, + >( parse_t: fn(Input<'a, 's>) -> IResult, T, E>, ) -> impl Fn(Input<'a, 's>) -> IResult, List<'a, T>, E> { move |input: Input<'a, 's>| { context( - "list", + Context::List, pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t)))), )(input) .map(|(rest_input, (first, rest))| { @@ -3251,21 +3166,29 @@ pub mod new { } /// Parse the head atoms. The same as the body atoms except for disallowing negated atoms. - fn parse_head_atoms<'a, 's, E: ParseError> + ContextError>>( + fn parse_head_atoms< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { context( - "rule head atoms", + Context::HeadAtoms, alt((parse_normal_atom, parse_infix_atom, parse_map_atom)), )(input) } /// Parse the body atoms. The same as the head atoms except for allowing negated atoms. - fn parse_body_atoms<'a, 's, E: ParseError> + ContextError>>( + fn parse_body_atoms< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { context( - "rule body atoms", + Context::BodyAtoms, alt(( parse_normal_atom, parse_negative_atom, @@ -3276,18 +3199,26 @@ pub mod new { } /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom<'a, 's, E: ParseError> + ContextError>>( + fn parse_normal_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { - context("positive atom", parse_named_tuple)(input) + context(Context::PositiveAtom, parse_named_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) } /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom<'a, 's, E: ParseError> + ContextError>>( + fn parse_negative_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { - context("negative atom", pair(tilde, parse_named_tuple))(input).map( + context(Context::NegativeAtom, pair(tilde, parse_named_tuple))(input).map( |(rest_input, (tilde, named_tuple))| { ( rest_input, @@ -3303,11 +3234,15 @@ pub mod new { /// Parse an "infix atom" of the form `term1 term2`. /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom<'a, 's, E: ParseError> + ContextError>>( + fn parse_infix_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { context( - "infix atom", + Context::InfixAtom, tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), )(input) .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { @@ -3327,11 +3262,11 @@ pub mod new { /// Parse a tuple with an optional name, like `ident(term1, term2)` /// or just `(int, int, skip)`. - fn parse_tuple<'a, 's, E: ParseError> + ContextError>>( + fn parse_tuple<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Tuple<'a>, E> { context( - "tuple", + Context::Tuple, tuple(( opt(lex_ident), wsoc0, @@ -3363,11 +3298,15 @@ pub mod new { /// Parse a named tuple. This function is like `parse_tuple` with the difference, /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple<'a, 's, E: ParseError> + ContextError>>( + fn parse_named_tuple< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Tuple<'a>, E> { context( - "named tuple", + Context::NamedTuple, tuple(( lex_ident, wsoc0, @@ -3399,11 +3338,11 @@ pub mod new { /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. /// Inside the curly braces ist a list of pairs. - fn parse_map<'a, 's, E: ParseError> + ContextError>>( + fn parse_map<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Map<'a>, E> { context( - "map", + Context::Map, tuple(( opt(lex_ident), wsoc0, @@ -3434,14 +3373,18 @@ pub mod new { } /// Parse a map in an atom position. - fn parse_map_atom<'a, 's, E: ParseError> + ContextError>>( + fn parse_map_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Atom<'a>, E> { parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) } // /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - // fn parse_pair_list<'a, 's, E: ParseError> + ContextError>>( + // fn parse_pair_list<'a, 's, E: ParseError> + ContextError, Context>>( // input: Input<'a, 's>, // state: Errors, // ) -> IResult, Option, Term<'a>>>>, E> { @@ -3474,28 +3417,30 @@ pub mod new { // } /// Parse a pair of the form `key = value`. - fn parse_pair<'a, 's, E: ParseError> + ContextError>>( + fn parse_pair<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Pair<'a, Term<'a>, Term<'a>>, E> { - context("pair", tuple((parse_term, wsoc0, equal, wsoc0, parse_term)))(input).map( - |(rest_input, (key, ws1, equal, ws2, value))| { - ( - rest_input, - Pair { - span: outer_span(input.input, rest_input.input), - key, - ws1, - equal, - ws2, - value, - }, - ) - }, - ) + context( + Context::Pair, + tuple((parse_term, wsoc0, equal, wsoc0, parse_term)), + )(input) + .map(|(rest_input, (key, ws1, equal, ws2, value))| { + ( + rest_input, + Pair { + span: outer_span(input.input, rest_input.input), + key, + ws1, + equal, + ws2, + value, + }, + ) + }) } // /// Parse a list of terms of the form `term1, term2, …`. - // fn parse_term_list<'a, 's, E: ParseError> + ContextError>>( + // fn parse_term_list<'a, 's, E: ParseError> + ContextError, Context>>( // input: Input<'a, 's>, // state: Errors, // ) -> IResult, List<'a, Term<'a>>, E> { @@ -3526,11 +3471,11 @@ pub mod new { /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_term<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "term", + Context::Term, alt(( parse_binary_term, parse_tuple_term, @@ -3546,11 +3491,15 @@ pub mod new { } /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_primitive_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "primitive term", + Context::TermPrivimitive, alt(( parse_rdf_literal, parse_ident, @@ -3563,11 +3512,15 @@ pub mod new { } /// Parse a rdf literal e.g. "2023-06-19"^^ - fn parse_rdf_literal<'a, 's, E: ParseError> + ContextError>>( + fn parse_rdf_literal< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { context( - "RDF Literal", + Context::RdfLiteral, tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), )(input) .map(|(rest_input, (string, carets, iri))| { @@ -3586,29 +3539,33 @@ pub mod new { }) } - fn parse_ident<'a, 's, E: ParseError> + ContextError>>( + fn parse_ident<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { lex_ident(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) } - fn parse_iri<'a, 's, E: ParseError> + ContextError>>( + fn parse_iri<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) } - fn parse_number<'a, 's, E: ParseError> + ContextError>>( + fn parse_number<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { - context("number", alt((parse_decimal, parse_integer)))(input) + context(Context::Number, alt((parse_decimal, parse_integer)))(input) } - fn parse_decimal<'a, 's, E: ParseError> + ContextError>>( + fn parse_decimal< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { context( - "decimal", + Context::Decimal, tuple(( opt(alt((plus, minus))), opt(lex_number), @@ -3632,10 +3589,14 @@ pub mod new { }) } - fn parse_integer<'a, 's, E: ParseError> + ContextError>>( + fn parse_integer< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { - context("integer", pair(opt(alt((plus, minus))), lex_number))(input).map( + context(Context::Integer, pair(opt(alt((plus, minus))), lex_number))(input).map( |(rest_input, (sign, number))| { ( rest_input, @@ -3652,24 +3613,28 @@ pub mod new { ) } - fn parse_exponent<'a, 's, E: ParseError> + ContextError>>( + fn parse_exponent< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Exponent<'a>, E> { context( - "exponent", + Context::Exponent, tuple((exp, opt(alt((plus, minus))), lex_number)), )(input) .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) } - fn parse_string<'a, 's, E: ParseError> + ContextError>>( + fn parse_string<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) } // /// Parse an unary term. - // fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { + // fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError, Context>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { // pair(lex_unary_prefix_operators, parse_term)(input).map( // |(rest_input, (operation, term))| { // ( @@ -3685,11 +3650,15 @@ pub mod new { // } /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_binary_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "binary term", + Context::TermBinary, pair( parse_arithmetic_product, opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), @@ -3719,12 +3688,12 @@ pub mod new { fn parse_arithmetic_product< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "arithmetic product", + Context::ArithmeticProduct, pair( parse_arithmetic_factor, opt(tuple(( @@ -3757,12 +3726,12 @@ pub mod new { fn parse_arithmetic_factor< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "arithmetic factor", + Context::ArithmeticFactor, alt(( parse_tuple_term, parse_aggregation_term, @@ -3798,12 +3767,12 @@ pub mod new { fn parse_aggregation_term< 'a, 's, - E: ParseError> + ContextError>, + E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "aggregation term", + Context::TermAggregation, tuple(( recognize(pair(hash, lex_ident)), open_paren, @@ -3835,36 +3804,48 @@ pub mod new { } /// Parse a `_` - fn parse_blank<'a, 's, E: ParseError> + ContextError>>( + fn parse_blank<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { - context("blank", underscore)(input) + context(Context::Blank, underscore)(input) .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) } /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with /// parenthesis. - fn parse_tuple_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_tuple_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { - context("tuple term", parse_tuple)(input) + context(Context::TermTuple, parse_tuple)(input) .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) } /// Parse a map as a term. - fn parse_map_term<'a, 's, E: ParseError> + ContextError>>( + fn parse_map_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { - context("map term", parse_map)(input) + context(Context::TermMap, parse_map)(input) .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) } /// Parse a variable. - fn parse_variable<'a, 's, E: ParseError> + ContextError>>( + fn parse_variable< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "universal variable", + Context::UniversalVariable, recognize(pair(question_mark, lex_ident)), )(input) .map(|(rest_input, var)| { @@ -3879,11 +3860,15 @@ pub mod new { } /// Parse an existential variable. - fn parse_existential<'a, 's, E: ParseError> + ContextError>>( + fn parse_existential< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Term<'a>, E> { context( - "existential variable", + Context::ExistentialVariable, recognize(pair(exclamation_mark, lex_ident)), )(input) .map(|(rest_input, existential)| { @@ -3899,11 +3884,15 @@ pub mod new { // Order of parser compinator is important, because of ordered choice and no backtracking /// Parse the operator for an infix atom. - fn parse_operation_token<'a, 's, E: ParseError> + ContextError>>( + fn parse_operation_token< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( input: Input<'a, 's>, ) -> IResult, Token<'a>, E> { context( - "operators", + Context::Operators, alt((less_equal, greater_equal, equal, unequal, less, greater)), )(input) } @@ -3916,7 +3905,6 @@ pub mod new { }; use nom::error::{convert_error, VerboseError}; - use nom_supreme::error::ErrorTree; use super::*; use crate::io::{ @@ -3970,7 +3958,7 @@ pub mod new { }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program::>(input).0, + parse_program::>(input).0, Program { span: input.input, tl_doc_comment: None, @@ -4037,7 +4025,7 @@ pub mod new { }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program::>(input).0, + parse_program::>(input).0, Program { tl_doc_comment: None, span: input.input, @@ -4317,7 +4305,7 @@ pub mod new { }; assert_eq!( // parse_program::>(input).unwrap().1, - parse_program::>(input).0, + parse_program::>(input).0, Program { span: input.input, tl_doc_comment: None, @@ -4439,7 +4427,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state: errors, }; // let ast = parse_program::>(input); - let (ast, _) = parse_program::>(input); + let (ast, _) = parse_program::>(input); println!("{}", ast); assert_eq!( { @@ -4506,7 +4494,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Primitive(Primitive::Number { @@ -4529,7 +4517,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4566,7 +4554,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4603,7 +4591,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4640,7 +4628,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4677,7 +4665,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4728,7 +4716,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -4779,7 +4767,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); // match result { // Ok(ast) => { @@ -4947,7 +4935,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // let result = parse_term::>(input); - let result = parse_term::>(input); + let result = parse_term::>(input); result.unwrap().1 }, Term::Binary { @@ -5097,9 +5085,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; // parse_exponent::>(input) - parse_exponent::>(input) - .unwrap() - .1 + parse_exponent::>(input).unwrap().1 }, Exponent { e: T! {TokenKind::Exponent, 0,1,"e"}, @@ -5118,7 +5104,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters input, parser_state, }; - let result = parse_program::>(input); + let result = parse_program::>(input); println!("{}\n\n{:#?}", result.0, result.1); // assert!(false); } @@ -5132,8 +5118,8 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters input, parser_state, }; - dbg!(wsoc0::>(input)); - dbg!(wsoc1::>(input)); + dbg!(wsoc0::>(input)); + dbg!(wsoc1::>(input)); } #[test] @@ -5148,6 +5134,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }; let result = parse_program::>>(input); dbg!(&result); + println!("{}", result.0); } } } diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 7031f1d26..2b0832655 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -822,6 +822,12 @@ impl std::fmt::Display for Input<'_, '_> { } } +impl nom_supreme::context::ContextError for Input<'_, '_> { + fn add_context(location: I, ctx: C, other: Self) -> Self { + todo!() + } +} + pub(crate) trait ToRange { fn to_range(&self) -> Range; } From 2090189924a2e38f054e5859524951fe9d23e66f Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 31 May 2024 08:55:21 +0200 Subject: [PATCH 103/214] Make error reporting message a bit nicer --- nemo-language-server/src/language_server.rs | 36 +++++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index e39252745..428633b54 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -79,24 +79,46 @@ impl Backend { let (_program, errors) = parse_program_str(text); - let diagnostics = errors + use std::collections::{BTreeMap, HashSet}; + let mut error_map: BTreeMap> = BTreeMap::new(); + for error in &errors { + if let Some(set) = error_map.get_mut(&error.pos) { + set.insert(error.msg.clone()); + } else { + let mut set = HashSet::new(); + set.insert(error.msg.clone()); + error_map.insert(error.pos, set); + }; + } + + let diagnostics = error_map .into_iter() - .map(|error| Diagnostic { - message: error.msg, + .map(|(pos, error_set)| Diagnostic { + message: /*error.msg*/ { + format!("expected{}", { + let mut string = String::new(); + for s in error_set { + string.push_str(" '"); + string.push_str(s.as_str()); + string.push_str("',"); + } + string + }) + }, range: Range::new( line_col_to_position( &line_index, LineCol { - line: error.pos.line - 1, - col: error.pos.column - 1, + line: pos.line - 1, + col: pos.column - 1, }, ) .unwrap(), line_col_to_position( &line_index, LineCol { - line: error.pos.line - 1, - col: error.pos.column - 1 + 1, + line: pos.line - 1, + col: pos.column - 1 + 1, }, ) .unwrap(), From 0063dabaaf4ab97fc9e738c0f39d206d0cbcc459 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 5 Jun 2024 14:37:35 +0200 Subject: [PATCH 104/214] Remove debug printing and add prefixed name parsing --- nemo/src/io/lexer.rs | 26 +++++++++++++++++- nemo/src/io/parser.rs | 42 ++++++++++++++++++++--------- nemo/src/io/parser/ast/statement.rs | 2 +- nemo/src/io/parser/ast/term.rs | 34 ++++++++++++++++++----- 4 files changed, 84 insertions(+), 20 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index f976edb49..e50a4d652 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -61,6 +61,7 @@ pub(crate) enum Context { TermTuple, TermMap, RdfLiteral, + PrefixedConstant, Decimal, Integer, ArithmeticProduct, @@ -114,6 +115,7 @@ impl std::fmt::Display for Context { Context::TermTuple => write!(f, "tuple term"), Context::TermMap => write!(f, "map term"), Context::RdfLiteral => write!(f, "rdf literal"), + Context::PrefixedConstant => write!(f, "prefixed constant"), Context::Decimal => write!(f, "decimal"), Context::Integer => write!(f, "integer"), Context::ArithmeticProduct => write!(f, "arithmetic product"), @@ -229,6 +231,8 @@ pub(crate) enum TokenKind { // Multi-char tokens: /// Identifier for keywords and names Ident, + /// Identifier with a prefix, like `xsd:decimal` + PrefixedIdent, /// Variable like `?var` Variable, /// Existential Variable like `!var` @@ -300,6 +304,7 @@ impl std::fmt::Display for TokenKind { TokenKind::Slash => write!(f, "Slash"), TokenKind::Exponent => write!(f, "Exponent"), TokenKind::Ident => write!(f, "Ident"), + TokenKind::PrefixedIdent => write!(f, "Prefixed Ident"), TokenKind::Variable => write!(f, "Variable"), TokenKind::Existential => write!(f, "Existential"), TokenKind::Aggregate => write!(f, "Aggregate"), @@ -540,6 +545,25 @@ where Ok((rest_input, token)) } +pub(crate) fn lex_prefixed_ident<'a, 's, E>( + input: Input<'a, 's>, +) -> IResult, Token<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + recognize(tuple((opt(lex_ident), colon, lex_ident)))(input).map( + |(rest_input, prefixed_ident)| { + ( + rest_input, + Token { + kind: TokenKind::PrefixedIdent, + span: prefixed_ident.input, + }, + ) + }, + ) +} + pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> where E: ParseError> + ContextError, Context>, @@ -602,7 +626,7 @@ where { context( Context::TlDocComment, - recognize(many1(tuple((tag("%!"), many0(is_not("\n")), line_ending)))), + recognize(many1(tuple((tag("%%%"), many0(is_not("\n")), line_ending)))), )(input) .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index c6406aa2d..9b60b1382 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2441,10 +2441,10 @@ pub mod new { use crate::io::lexer::{ arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, - lex_ident, lex_iri, lex_number, lex_operators, lex_string, lex_toplevel_doc_comment, - lex_whitespace, minus, open_brace, open_paren, plus, question_mark, skip_to_dot, slash, - star, tilde, underscore, unequal, Context, Error, ErrorTree, ParserState, Span, Token, - TokenKind, + lex_ident, lex_iri, lex_number, lex_operators, lex_prefixed_ident, lex_string, + lex_toplevel_doc_comment, lex_whitespace, minus, open_brace, open_paren, plus, + question_mark, skip_to_dot, slash, star, tilde, underscore, unequal, Context, Error, + ErrorTree, ParserState, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; use nom::combinator::{all_consuming, cut, map, opt, recognize}; @@ -2539,7 +2539,6 @@ pub mod new { nom::Err::Error(err) | nom::Err::Failure(err) => { let (_deepest_pos, errors) = get_deepest_errors(err); for error in errors { - dbg!(&error); input.parser_state.report_error(error); } // let error = Error(deepest_pos, format!("")); @@ -2552,10 +2551,8 @@ pub mod new { } fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { - dbg!(&e); match e { ErrorTree::Base { location, kind } => { - dbg!(&kind); let span = location.input; let err_pos = Position { offset: span.location_offset(), @@ -2575,7 +2572,6 @@ pub mod new { // let mut err_pos = Position::default(); match &**base { ErrorTree::Base { location, kind } => { - dbg!(&kind); let span = location.input; let err_pos = Position { offset: span.location_offset(), @@ -2606,11 +2602,9 @@ pub mod new { ErrorTree::Stack { base, contexts } => { let (pos, mut deepest_errors) = get_deepest_errors(base); let contexts = context_strs(contexts); - dbg!(&deepest_errors); for mut error in &mut deepest_errors { error.context.append(&mut contexts.clone()); } - dbg!(&deepest_errors); (pos, deepest_errors) } ErrorTree::Alt(error_tree) => { @@ -3308,7 +3302,7 @@ pub mod new { context( Context::NamedTuple, tuple(( - lex_ident, + alt((lex_prefixed_ident, lex_ident)), wsoc0, open_paren, wsoc0, @@ -3502,6 +3496,7 @@ pub mod new { Context::TermPrivimitive, alt(( parse_rdf_literal, + parse_prefixed_ident, parse_ident, parse_iri, parse_number, @@ -3539,6 +3534,29 @@ pub mod new { }) } + fn parse_prefixed_ident<'a, 's, E>( + input: Input<'a, 's>, + ) -> IResult, Primitive<'a>, E> + where + E: ParseError> + ContextError, Context>, + { + context( + Context::PrefixedConstant, + tuple((opt(lex_ident), colon, lex_ident)), + )(input) + .map(|(rest_input, (prefix, colon, constant))| { + ( + rest_input, + Primitive::PrefixedConstant { + span: outer_span(input.input, rest_input.input), + prefix, + colon, + constant, + }, + ) + }) + } + fn parse_ident<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, ) -> IResult, Primitive<'a>, E> { @@ -4467,7 +4485,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters error_map.insert(error.pos, set); }; } - dbg!(&error_map); + // dbg!(&error_map); println!("\n\n"); // assert!(false); let lines: Vec<_> = str.lines().collect(); diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index f641dbd8e..d9a201500 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -132,7 +132,7 @@ impl AstNode for Statement<'_> { Statement::Rule { .. } => name!("Rule"), Statement::Whitespace(_) => name!("Whitespace"), Statement::Comment(_) => name!("Comment"), - Statement::Error(_) => name!("ERROR"), + Statement::Error(_) => name!("\x1b[1;31mERROR\x1b[0m"), } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 81ed9d47f..7b33a95d8 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -165,12 +165,12 @@ impl AstNode for Term<'_> { format!("aggregation/{}", operation.span().fragment()), "file".to_string(), )), - Term::Tuple(tuple) => { - tuple.identifier.map(|identifier| ( - format!("function/{}", identifier.span().fragment()), - "file".to_string(), - )) - } + Term::Tuple(tuple) => tuple.identifier.map(|identifier| { + ( + format!("function/{}", identifier.span().fragment()), + "file".to_string(), + ) + }), _ => None, } } @@ -230,6 +230,12 @@ impl std::fmt::Display for Term<'_> { #[derive(Debug, Clone, PartialEq)] pub(crate) enum Primitive<'a> { Constant(Token<'a>), + PrefixedConstant { + span: Span<'a>, + prefix: Option>, + colon: Token<'a>, + constant: Token<'a>, + }, Number { span: Span<'a>, sign: Option>, @@ -252,6 +258,20 @@ impl AstNode for Primitive<'_> { fn children(&self) -> Option> { match self { Primitive::Constant(token) => Some(vec![token]), + Primitive::PrefixedConstant { + prefix, + colon, + constant, + .. + } => { + let mut vec: Vec<&dyn AstNode> = Vec::new(); + if let Some(prefix) = prefix { + vec.push(prefix); + } + vec.push(colon); + vec.push(constant); + Some(vec) + } Primitive::Number { sign, before, @@ -292,6 +312,7 @@ impl AstNode for Primitive<'_> { fn span(&self) -> Span { match self { Primitive::Constant(token) => token.span, + Primitive::PrefixedConstant { span, .. } => *span, Primitive::Number { span, .. } => *span, Primitive::String(token) => token.span, Primitive::Iri(token) => token.span, @@ -326,6 +347,7 @@ impl AstNode for Primitive<'_> { } match self { Primitive::Constant(_) => name!("Constant"), + Primitive::PrefixedConstant { .. } => name!("Prefixed Constant"), Primitive::Number { .. } => name!("Number"), Primitive::String(_) => name!("String"), Primitive::Iri(_) => name!("Iri"), From 93a06ef80d0e2aed310817990f39abafd441faa6 Mon Sep 17 00:00:00 2001 From: logicallangs <> Date: Wed, 12 Jun 2024 12:58:35 +0000 Subject: [PATCH 105/214] update language server --- Cargo.lock | 517 +++++++++++++++--- Cargo.toml | 8 +- nemo-language-server/Cargo.toml | 3 +- nemo-language-server/src/language_server.rs | 389 +++++++------ .../src/language_server/nemo_position.rs | 28 +- nemo-language-server/src/lib.rs | 2 +- nemo/src/io/lexer.rs | 10 +- nemo/src/io/parser.rs | 4 +- nemo/src/io/parser/ast.rs | 61 ++- nemo/src/io/parser/ast/atom.rs | 30 +- nemo/src/io/parser/ast/directive.rs | 13 +- nemo/src/io/parser/ast/map.rs | 22 +- nemo/src/io/parser/ast/program.rs | 12 +- nemo/src/io/parser/ast/statement.rs | 13 +- nemo/src/io/parser/ast/term.rs | 84 ++- nemo/src/io/parser/ast/tuple.rs | 12 +- nemo/src/model/rule_model/syntax.rs | 1 - 17 files changed, 769 insertions(+), 440 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 847bd5322..36187cf68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + [[package]] name = "arbitrary" version = "1.3.2" @@ -155,9 +161,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.79" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", @@ -176,7 +182,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -200,9 +206,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.72" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -282,7 +288,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", - "regex-automata 0.4.6", + "regex-automata 0.4.7", "serde", ] @@ -312,9 +318,9 @@ checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" [[package]] name = "cc" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" [[package]] name = "cexpr" @@ -359,9 +365,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.4" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" dependencies = [ "clap_builder", "clap_derive", @@ -369,9 +375,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.2" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" dependencies = [ "anstream", "anstyle", @@ -381,9 +387,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.4" +version = "4.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -393,9 +399,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" [[package]] name = "colorchoice" @@ -413,6 +419,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -546,6 +562,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -727,6 +754,7 @@ checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", + "futures-executor", "futures-io", "futures-sink", "futures-task", @@ -749,6 +777,17 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.30" @@ -830,8 +869,8 @@ dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", ] [[package]] @@ -845,6 +884,19 @@ dependencies = [ "walkdir", ] +[[package]] +name = "gloo-utils" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037fcb07216cb3a30f7292bd0176b050b7b9a052ba830ef7d5d65f6dc64ba58e" +dependencies = [ + "js-sys", + "serde", + "serde_json", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "h2" version = "0.4.5" @@ -895,6 +947,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "howlong" version = "0.1.7" @@ -933,12 +991,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", - "futures-core", + "futures-util", "http", "http-body", "pin-project-lite", @@ -946,9 +1004,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "d0e7a4dd27b9476dc40cb050d3632d3bba3a70ddbff012285f7f8559a1e7e545" [[package]] name = "humantime" @@ -1012,14 +1070,134 @@ dependencies = [ "tracing", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f8ac670d7422d7f76b32e17a5db556510825b29ec9154f235977c9caba61036" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "4716a3a0933a1d01c2f72450e89596eb51dd34ef3c211ccd875acdf1f8fe47ed" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", + "smallvec", + "utf8_iter", ] [[package]] @@ -1032,7 +1210,7 @@ dependencies = [ "globset", "log", "memchr", - "regex-automata 0.4.6", + "regex-automata 0.4.7", "same-file", "walkdir", "winapi-util", @@ -1121,6 +1299,16 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "line-index" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67d61795376ae2683928c218fda7d7d7db136fd38c06b7552904667f0d55580a" +dependencies = [ + "nohash-hasher", + "text-size", +] + [[package]] name = "linked-hash-map" version = "0.5.6" @@ -1133,6 +1321,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -1301,7 +1495,7 @@ version = "0.5.2-dev" dependencies = [ "assert_cmd", "assert_fs", - "clap 4.5.4", + "clap 4.5.7", "colored", "dir-test", "env_logger 0.11.3", @@ -1312,6 +1506,19 @@ dependencies = [ "test-log", ] +[[package]] +name = "nemo-language-server" +version = "0.5.2-dev" +dependencies = [ + "anyhow", + "futures", + "line-index", + "nemo", + "tokio", + "tower-lsp", + "tower-service", +] + [[package]] name = "nemo-physical" version = "0.5.2-dev" @@ -1351,6 +1558,30 @@ dependencies = [ "pyo3", ] +[[package]] +name = "nemo-wasm" +version = "0.5.2-dev" +dependencies = [ + "console_error_panic_hook", + "futures", + "gloo-utils", + "js-sys", + "nemo", + "nemo-language-server", + "nemo-physical", + "thiserror", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test", + "web-sys", +] + +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "5.1.3" @@ -1494,11 +1725,21 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.9", + "libc", +] + [[package]] name = "object" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" dependencies = [ "memchr", ] @@ -1883,14 +2124,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", ] [[package]] @@ -1904,13 +2145,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -1921,9 +2162,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" @@ -2067,6 +2308,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -2190,6 +2437,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "streaming-iterator" version = "0.1.9" @@ -2236,6 +2489,17 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -2318,6 +2582,12 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "text-size" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" + [[package]] name = "textwrap" version = "0.11.0" @@ -2358,20 +2628,15 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.6.0" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "tinyvec_macros", + "displaydoc", + "zerovec", ] -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" version = "1.38.0" @@ -2382,11 +2647,24 @@ dependencies = [ "bytes", "libc", "mio", + "num_cpus", "pin-project-lite", "socket2", + "tokio-macros", "windows-sys 0.48.0", ] +[[package]] +name = "tokio-macros" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "tokio-native-tls" version = "0.3.1" @@ -2537,27 +2815,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-segmentation" version = "1.11.0" @@ -2578,9 +2841,9 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "url" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "f7c25da092f0a868cdf09e8674cd3b7ef3a7d92a24253e663a2fb85e2496de56" dependencies = [ "form_urlencoded", "idna", @@ -2588,11 +2851,23 @@ dependencies = [ "serde", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "valuable" @@ -2718,6 +2993,31 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "wasm-bindgen-test" +version = "0.3.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9bf62a58e0780af3e852044583deee40983e5886da43a271dd772379987667b" +dependencies = [ + "console_error_panic_hook", + "js-sys", + "scoped-tls", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test-macro", +] + +[[package]] +name = "wasm-bindgen-test-macro" +version = "0.3.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "web-sys" version = "0.3.69" @@ -2917,6 +3217,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -2932,6 +3244,30 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.34" @@ -2951,3 +3287,46 @@ dependencies = [ "quote", "syn 2.0.66", ] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] diff --git a/Cargo.toml b/Cargo.toml index cde9888d4..2b98272e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,16 @@ [workspace] resolver = "2" +default-members = [ + "nemo", + "nemo-cli", + "nemo-physical", + "nemo-python", +] members = [ "nemo", "nemo-cli", "nemo-physical", "nemo-python", -] -exclude = [ "nemo-language-server", "nemo-wasm", ] diff --git a/nemo-language-server/Cargo.toml b/nemo-language-server/Cargo.toml index 6e16e33f3..6f0c258dd 100644 --- a/nemo-language-server/Cargo.toml +++ b/nemo-language-server/Cargo.toml @@ -22,9 +22,10 @@ js = [] tokio = ["dep:tokio"] [dependencies] +anyhow = "1.0" line-index = "0.1.1" nemo = { path = "../nemo", default-features = false } futures = "0.3.21" -tokio = { version = "1.27.0", features = ["full"], optional = true } +tokio = { version = "1.27.0", features = ["macros", "io-util", "rt-multi-thread"], optional = true } tower-lsp = { version = "0.20.0", default-features = false } tower-service = "0.3.2" diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 428633b54..2a179937d 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -1,25 +1,26 @@ -use std::collections::HashMap; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::vec; +use anyhow::anyhow; use futures::lock::Mutex; use line_index::{LineCol, LineIndex, WideEncoding}; use nemo::io::parser::ast::program::Program; use nemo::io::parser::ast::{AstNode, Position}; use nemo::io::parser::new::parse_program_str; -use nemo_position::{ - lsp_position_to_nemo_position, nemo_position_to_lsp_position, PositionConversionError, -}; +use nemo_position::{lsp_position_to_nemo_position, PositionConversionError}; use tower_lsp::lsp_types::{ - CompletionOptions, Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, - DocumentChangeOperation, DocumentChanges, DocumentSymbol, DocumentSymbolOptions, - DocumentSymbolParams, DocumentSymbolResponse, InitializeParams, InitializeResult, - InitializedParams, Location, MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, - PrepareRenameResponse, Range, ReferenceParams, RenameOptions, RenameParams, ServerCapabilities, - TextDocumentEdit, TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, - TextEdit, Url, VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit, + Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, DocumentChangeOperation, + DocumentChanges, DocumentSymbol, DocumentSymbolOptions, DocumentSymbolParams, + DocumentSymbolResponse, InitializeParams, InitializeResult, InitializedParams, Location, + MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, PrepareRenameResponse, Range, + ReferenceParams, RenameOptions, RenameParams, ServerCapabilities, TextDocumentEdit, + TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, TextEdit, Url, + VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit, }; use tower_lsp::{Client, LanguageServer}; +use self::nemo_position::nemo_range_to_lsp_range; + mod nemo_position; #[derive(Debug)] @@ -42,13 +43,13 @@ struct TextDocumentInfo { } /// Converts a source position to a LSP position -pub(crate) fn line_col_to_position( +pub(crate) fn line_col_to_lsp_position( line_index: &LineIndex, line_col: LineCol, -) -> Result { +) -> Result { let wide_line_col = line_index .to_wide(WideEncoding::Utf16, line_col) - .ok_or(())?; + .ok_or(PositionConversionError::LspLineCol(line_col))?; Ok(tower_lsp::lsp_types::Position { line: wide_line_col.line, @@ -56,6 +57,14 @@ pub(crate) fn line_col_to_position( }) } +fn jsonrpc_error(error: anyhow::Error) -> tower_lsp::jsonrpc::Error { + tower_lsp::jsonrpc::Error { + code: tower_lsp::jsonrpc::ErrorCode::ServerError(1), + message: error.to_string().into(), + data: None, + } +} + impl Backend { pub fn new(client: Client) -> Self { Self { @@ -66,7 +75,11 @@ impl Backend { } } - async fn handle_change(&self, text_document: VersionedTextDocumentIdentifier, text: &str) { + async fn handle_change( + &self, + text_document: VersionedTextDocumentIdentifier, + text: &str, + ) -> anyhow::Result<()> { self.state.lock().await.text_document_store.insert( text_document.uri.clone(), TextDocumentInfo { @@ -79,52 +92,50 @@ impl Backend { let (_program, errors) = parse_program_str(text); - use std::collections::{BTreeMap, HashSet}; - let mut error_map: BTreeMap> = BTreeMap::new(); - for error in &errors { - if let Some(set) = error_map.get_mut(&error.pos) { + // Group errors by position and deduplicate error + let mut errors_by_posision: BTreeMap> = BTreeMap::new(); + for error in errors { + if let Some(set) = errors_by_posision.get_mut(&error.pos) { set.insert(error.msg.clone()); } else { - let mut set = HashSet::new(); - set.insert(error.msg.clone()); - error_map.insert(error.pos, set); + errors_by_posision.insert(error.pos, std::iter::once(error.msg.clone()).collect()); }; } - let diagnostics = error_map + let diagnostics = errors_by_posision .into_iter() - .map(|(pos, error_set)| Diagnostic { - message: /*error.msg*/ { - format!("expected{}", { - let mut string = String::new(); - for s in error_set { - string.push_str(" '"); - string.push_str(s.as_str()); - string.push_str("',"); - } - string - }) - }, - range: Range::new( - line_col_to_position( - &line_index, - LineCol { - line: pos.line - 1, - col: pos.column - 1, - }, - ) - .unwrap(), - line_col_to_position( - &line_index, - LineCol { - line: pos.line - 1, - col: pos.column - 1 + 1, - }, - ) - .unwrap(), - ), - ..Default::default() + .map(|(pos, error_set)| { + Ok(Diagnostic { + message: format!( + "expected {}", + error_set + .iter() + .map(|s| format!("'{s}'")) + .collect::>() + .join(", ") + ), + range: Range::new( + line_col_to_lsp_position( + &line_index, + LineCol { + line: pos.line - 1, + col: pos.column - 1, + }, + ) + .unwrap(), + line_col_to_lsp_position( + &line_index, + LineCol { + line: pos.line - 1, + col: pos.column - 1 + 1, + }, + ) + .unwrap(), + ), + ..Default::default() + }) }) + .filter_map(|result: Result<_, PositionConversionError>| result.ok()) .collect(); self.client @@ -134,20 +145,15 @@ impl Backend { Some(text_document.version), ) .await; + + Ok(()) } - async fn read_text_document_info(&self, uri: &Url) -> Option { + async fn read_text_document_info(&self, uri: &Url) -> anyhow::Result { if let Some(info) = self.state.lock().await.text_document_store.get(uri) { - let a = info.clone(); - Some(a) + Ok(info.clone()) } else { - self.client - .log_message( - MessageType::ERROR, - "could not find text document with URI {uri}", - ) - .await; - None + Err(anyhow!("could not find text document with URI {uri}")) } } } @@ -176,13 +182,6 @@ impl LanguageServer for Backend { ..Default::default() }, })), - completion_provider: Some(CompletionOptions { - work_done_progress_options: WorkDoneProgressOptions { - ..Default::default() - }, - ..Default::default() - }), - ..Default::default() }, ..Default::default() @@ -196,19 +195,43 @@ impl LanguageServer for Backend { } async fn did_open(&self, params: DidOpenTextDocumentParams) { - self.handle_change( - VersionedTextDocumentIdentifier { - uri: params.text_document.uri, - version: params.text_document.version, - }, - ¶ms.text_document.text, - ) - .await; + if let Err(error) = self + .handle_change( + VersionedTextDocumentIdentifier { + uri: params.text_document.uri, + version: params.text_document.version, + }, + ¶ms.text_document.text, + ) + .await + { + self.client + .log_message( + MessageType::ERROR, + format!("error while handling textDocument/didOpen request: {error}"), + ) + .await; + } } async fn did_change(&self, params: DidChangeTextDocumentParams) { - self.handle_change(params.text_document, ¶ms.content_changes[0].text) - .await; + if let Err(error) = self + .handle_change( + VersionedTextDocumentIdentifier { + uri: params.text_document.uri, + version: params.text_document.version, + }, + ¶ms.content_changes[0].text, + ) + .await + { + self.client + .log_message( + MessageType::ERROR, + format!("error while handling textDocument/didChange request: {error}"), + ) + .await; + } } async fn references( @@ -217,46 +240,44 @@ impl LanguageServer for Backend { ) -> tower_lsp::jsonrpc::Result>> { let info = self .read_text_document_info(¶ms.text_document_position.text_document.uri) - .await; + .await + .map_err(jsonrpc_error)?; - match info { - Some(info) => { - let text = info.text; - let line_index = LineIndex::new(&text); - let position = lsp_position_to_nemo_position( - &line_index, - params.text_document_position.position, - ) - .unwrap(); // TODO handle unwrap + let text = info.text; + let line_index = LineIndex::new(&text); + let position = + lsp_position_to_nemo_position(&line_index, params.text_document_position.position) + .map_err(Into::into) + .map_err(jsonrpc_error)?; - let program = parse_program_str(&text); - let program = program.0; + let (program, _) = parse_program_str(&text); - let node_path = find_in_ast(&program, position); + let node_path = find_in_ast(&program, position); - // Get most identifier most specific to the position - let indentified_node = node_path_deepest_identifier(&node_path); - let indentified_node = match indentified_node { - Some(indentified_node) => indentified_node, - None => return Ok(None), - }; + // Get most identifier most specific to the position + let indentified_node = node_path_deepest_identifier(&node_path); + let indentified_node = match indentified_node { + Some(indentified_node) => indentified_node, + None => return Ok(None), + }; - // Find other AST nodes with the same global identifier - let referenced_nodes = - find_by_identifier(indentified_node.scoping_node, &indentified_node.identifier); + // Find other AST nodes with the same global identifier + let referenced_nodes = + find_by_identifier(indentified_node.scoping_node, &indentified_node.identifier); - let locations = referenced_nodes - .iter() - .map(|node| Location { - uri: params.text_document_position.text_document.uri.clone(), - range: node_to_range_lsp(&line_index, *node), - }) - .collect(); + let locations = referenced_nodes + .iter() + .filter_map(|node| node_with_range(&line_index, *node)) + .map(|(_node, range)| { + Ok(Location { + uri: params.text_document_position.text_document.uri.clone(), + range, + }) + }) + .filter_map(|result: Result<_, ()>| result.ok()) + .collect(); - Ok(Some(locations)) - } - None => Ok(None), // TODO: Handle error - } + Ok(Some(locations)) } async fn document_symbol( @@ -265,28 +286,23 @@ impl LanguageServer for Backend { ) -> tower_lsp::jsonrpc::Result> { let info = self .read_text_document_info(¶ms.text_document.uri) - .await; - - match info { - Some(info) => { - let text = info.text; - let line_index = LineIndex::new(&text); + .await + .map_err(jsonrpc_error)?; - let program = parse_program_str(&text); - let program = program.0; + let text = info.text; + let line_index = LineIndex::new(&text); - let document_symbol = ast_node_to_document_symbol(&line_index, &program); + let (program, _) = parse_program_str(&text); - if let Ok(document_symbol) = document_symbol { - return Ok(document_symbol.map(|document_symbol| { - DocumentSymbolResponse::Nested(document_symbol.children.unwrap()) - })); - } + let document_symbol = ast_node_to_document_symbol(&line_index, &program) + .map_err(Into::into) + .map_err(jsonrpc_error)? + .ok_or(anyhow!("program has no document symbol")) + .map_err(jsonrpc_error)?; - Ok(None) - } - None => Ok(None), // TODO: Handle error - } + Ok(Some(DocumentSymbolResponse::Nested( + document_symbol.children.unwrap_or(vec![]), + ))) } /// Finds references to symbol that was renamed and sends edit operations to language client @@ -296,21 +312,17 @@ impl LanguageServer for Backend { ) -> tower_lsp::jsonrpc::Result> { let info = self .read_text_document_info(¶ms.text_document_position.text_document.uri) - .await; - - let info = match info { - Some(info) => info, - None => return Ok(None), - }; + .await + .map_err(jsonrpc_error)?; let text = info.text; let line_index = LineIndex::new(&text); let position = lsp_position_to_nemo_position(&line_index, params.text_document_position.position) - .unwrap(); + .map_err(Into::into) + .map_err(jsonrpc_error)?; - let program = parse_program_str(&text); - let program = program.0; + let (program, _) = parse_program_str(&text); let node_path = find_in_ast(&program, position); @@ -333,13 +345,17 @@ impl LanguageServer for Backend { edits: referenced_nodes .into_iter() .filter_map(|node| { - node.lsp_sub_node_to_rename().map(|renamed_node| { - OneOf::Left(TextEdit { - range: node_to_range_lsp(&line_index, renamed_node), - new_text: params.new_name.clone(), + node.lsp_range_to_rename().map(|renamed_node_range| { + Ok({ + OneOf::Left(TextEdit { + range: nemo_range_to_lsp_range(&line_index, renamed_node_range) + .map_err(|_error| ())?, // TODO: Print error, + new_text: params.new_name.clone(), + }) }) }) }) + .filter_map(|result: Result<_, ()>| result.ok()) .collect(), }; @@ -358,19 +374,16 @@ impl LanguageServer for Backend { ) -> tower_lsp::jsonrpc::Result> { let info = self .read_text_document_info(¶ms.text_document.uri) - .await; - - let info = match info { - Some(info) => info, - None => return Ok(None), - }; + .await + .map_err(jsonrpc_error)?; let text = info.text; let line_index = LineIndex::new(&text); - let position = lsp_position_to_nemo_position(&line_index, params.position).unwrap(); + let position = lsp_position_to_nemo_position(&line_index, params.position) + .map_err(Into::into) + .map_err(jsonrpc_error)?; - let program = parse_program_str(&text); - let program = program.0; + let (program, _) = parse_program_str(&text); let node_path = find_in_ast(&program, position); @@ -378,14 +391,18 @@ impl LanguageServer for Backend { let indentified_node = node_path_deepest_identifier(&node_path); match indentified_node { - Some(indentified_node) => { - Ok(indentified_node - .node - .lsp_sub_node_to_rename() - .map(|renamed_node| { - PrepareRenameResponse::Range(node_to_range_lsp(&line_index, renamed_node)) - })) - } + Some(indentified_node) => Ok(Some(PrepareRenameResponse::Range( + nemo_range_to_lsp_range( + &line_index, + indentified_node + .node + .lsp_range_to_rename() + .ok_or_else(|| anyhow!("identified node can not be renamed")) + .map_err(jsonrpc_error)?, + ) + .map_err(Into::into) + .map_err(jsonrpc_error)?, + ))), None => Ok(None), } } @@ -395,6 +412,15 @@ impl LanguageServer for Backend { } } +fn node_with_range<'a>( + line_index: &LineIndex, + node: &'a dyn AstNode, +) -> Option<(&'a dyn AstNode, Range)> { + nemo_range_to_lsp_range(line_index, node.range()) + .map(|range| (node, range)) // TODO: Print error, + .ok() +} + struct IdentifiedNode<'a> { node: &'a dyn AstNode, identifier: String, @@ -486,37 +512,12 @@ fn find_in_ast_recurse<'a>( ) { path.push(node); - if let Some(children) = node.children() { - for (child, next_child) in children.iter().zip(children.iter().skip(1)) { - if next_child.position() > position { - find_in_ast_recurse(*child, position, path); - return; - } - } - if let Some(child) = children.last() { + for child in node.children().iter().flatten() { + let range = child.range(); + if range.start <= position && position < range.end { find_in_ast_recurse(*child, position, path); + break; // Assume no nodes overlap } - }; -} - -fn node_to_range_lsp(line_index: &LineIndex, node: &dyn AstNode) -> Range { - Range { - start: nemo_position_to_lsp_position(line_index, node.position()).unwrap(), // TODO: Improve error handling - end: nemo_position_to_lsp_position( - line_index, - Position { - offset: node.position().offset + node.span().len(), - line: node.position().line + node.span().fragment().lines().count() as u32 - 1, - column: if node.span().fragment().lines().count() > 1 { - 1 + node.span().fragment().lines().last().unwrap().len() // TODO: Check if length is in correct encoding - as u32 - } else { - node.position().column + node.span().fragment().len() as u32 - // TODO: Check if length is in correct encoding - }, - }, - ) - .unwrap(), } } @@ -524,9 +525,7 @@ fn ast_node_to_document_symbol( line_index: &LineIndex, node: &dyn AstNode, ) -> Result, PositionConversionError> { - let range = node_to_range_lsp(line_index, node); - - let selection_range = range; + let range = nemo_range_to_lsp_range(line_index, node.range())?; if let Some((name, kind)) = node.lsp_symbol_info() { let children_results: Vec<_> = node @@ -555,7 +554,7 @@ fn ast_node_to_document_symbol( kind, name, range, - selection_range, + selection_range: range, tags: None, deprecated: None, }, diff --git a/nemo-language-server/src/language_server/nemo_position.rs b/nemo-language-server/src/language_server/nemo_position.rs index 4e155166e..782016991 100644 --- a/nemo-language-server/src/language_server/nemo_position.rs +++ b/nemo-language-server/src/language_server/nemo_position.rs @@ -8,22 +8,33 @@ //! * line: u32 index of the line, first line gets index 1 //! * offset: u32 index of the UTF-8 code point (byte) within the line, first column gets index 0 +use anyhow::anyhow; use line_index::{LineCol, LineIndex, WideEncoding, WideLineCol}; #[derive(Debug)] pub enum PositionConversionError { NemoPosition(nemo::io::parser::ast::Position), LspPosition(tower_lsp::lsp_types::Position), + LspLineCol(LineCol), +} + +impl From for anyhow::Error { + fn from(val: PositionConversionError) -> Self { + anyhow!("could not convert source code position: {:#?}", val) + } } fn line_col_to_nemo_position( line_index: &LineIndex, line_col: LineCol, -) -> Result { +) -> Result { Ok(nemo::io::parser::ast::Position { line: line_col.line + 1, column: line_col.col, - offset: line_index.offset(line_col).ok_or(())?.into(), + offset: line_index + .offset(line_col) + .ok_or(PositionConversionError::LspLineCol(line_col))? + .into(), }) } @@ -42,7 +53,7 @@ pub fn lsp_position_to_nemo_position( ) .ok_or(PositionConversionError::LspPosition(position))?; - Ok(line_col_to_nemo_position(line_index, line_col).unwrap()) + line_col_to_nemo_position(line_index, line_col) } fn nemo_position_to_line_col(position: nemo::io::parser::ast::Position) -> LineCol { @@ -67,3 +78,14 @@ pub fn nemo_position_to_lsp_position( character: wide_line_col.col, }) } + +/// Converts a Nemo range to a LSP range +pub fn nemo_range_to_lsp_range( + line_index: &LineIndex, + range: nemo::io::parser::ast::Range, +) -> Result { + Ok(tower_lsp::lsp_types::Range { + start: nemo_position_to_lsp_position(line_index, range.start)?, + end: nemo_position_to_lsp_position(line_index, range.end)?, + }) +} diff --git a/nemo-language-server/src/lib.rs b/nemo-language-server/src/lib.rs index de15066cd..646a4a250 100644 --- a/nemo-language-server/src/lib.rs +++ b/nemo-language-server/src/lib.rs @@ -14,6 +14,6 @@ pub fn create_language_server() -> (LspService, ClientSocket) { LspService::new(Backend::new) } -// // See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features +// See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features #[cfg(all(feature = "js", feature = "tokio"))] compile_error!("feature \"js\" and feature \"tokio\" cannot be enabled at the same time"); diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index e50a4d652..6072a126c 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -370,14 +370,6 @@ impl<'a> AstNode for Token<'a> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { true } @@ -386,7 +378,7 @@ impl<'a> AstNode for Token<'a> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 9b60b1382..0fe2f64e8 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -3869,7 +3869,7 @@ pub mod new { .map(|(rest_input, var)| { ( rest_input, - Term::Variable(Token { + Term::UniversalVariable(Token { kind: TokenKind::Variable, span: var.input, }), @@ -3892,7 +3892,7 @@ pub mod new { .map(|(rest_input, existential)| { ( rest_input, - Term::Existential(Token { + Term::ExistentialVariable(Token { kind: TokenKind::Existential, span: existential.input, }), diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 41c767ec7..9acc56a59 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,4 +1,3 @@ -use nom::Offset; use tower_lsp::lsp_types::SymbolKind; use crate::io::lexer::{Span, Token}; @@ -16,7 +15,32 @@ pub(crate) mod tuple; pub trait AstNode: std::fmt::Debug + Display + Sync { fn children(&self) -> Option>; fn span(&self) -> Span; - fn position(&self) -> Position; + + fn range(&self) -> Range { + let span = self.span(); + + let start_position = Position { + offset: self.span().location_offset(), + line: self.span().location_line(), + column: self.span().get_utf8_column() as u32, + }; + + let end_position = Position { + offset: start_position.offset + span.len(), + line: start_position.line + span.fragment().lines().count() as u32 - 1, + column: if span.fragment().lines().count() > 1 { + 1 + span.fragment().lines().last().unwrap().len() as u32 // Column is on new line + } else { + start_position.column + span.fragment().len() as u32 // Column is on same line + }, + }; + + Range { + start: start_position, + end: end_position, + } + } + fn is_token(&self) -> bool; fn name(&self) -> String; @@ -29,7 +53,8 @@ pub trait AstNode: std::fmt::Debug + Display + Sync { /// This can be used to restict rename operations to be local, e.g. for variable idenfiers inside of rules. fn lsp_identifier(&self) -> Option<(String, String)>; fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)>; - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode>; + /// Range of the part of the node that should be renamed or [`None`] if the node can not be renamed + fn lsp_range_to_rename(&self) -> Option; } #[derive(Debug, Clone, Copy, Hash)] @@ -64,6 +89,12 @@ impl Default for Position { } } +#[derive(Debug, Clone, Copy, Hash)] +pub struct Range { + pub start: Position, + pub end: Position, +} + /// Whitespace or Comment token #[derive(Debug, Clone, PartialEq)] pub struct Wsoc<'a> { @@ -84,14 +115,6 @@ impl AstNode for Wsoc<'_> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -109,7 +132,7 @@ impl AstNode for Wsoc<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } @@ -182,14 +205,6 @@ impl AstNode for List<'_, T> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -207,7 +222,7 @@ impl AstNode for List<'_, T> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } @@ -398,7 +413,7 @@ mod test { ws2: None, terms: Some(List { span: s!(304, 12, "?VarA"), - first: Term::Variable(Token { + first: Term::UniversalVariable(Token { kind: TokenKind::Variable, span: s!(304, 12, "?VarA"), }), @@ -425,7 +440,7 @@ mod test { ws2: None, terms: Some(List { span: s!(328, 12, "?Var, ConstB"), - first: Term::Variable(Token { + first: Term::UniversalVariable(Token { kind: TokenKind::Variable, span: s!(328, 12, "?VarA"), }), diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 5410ae9d1..bbf6e88b4 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -1,9 +1,9 @@ use tower_lsp::lsp_types::SymbolKind; +use super::map::Map; use super::term::Term; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, Wsoc}; -use super::{map::Map, Position}; +use super::{ast_to_ascii_tree, AstNode, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -74,15 +74,6 @@ impl AstNode for Atom<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -108,19 +99,18 @@ impl AstNode for Atom<'_> { } fn lsp_identifier(&self) -> Option<(String, String)> { - self.tuple().map(|tuple| ( + self.tuple().map(|tuple| { + ( format!("atom/{}", tuple.identifier.unwrap().span().fragment()), "file".to_string(), - )) + ) + }) } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { - None - // TODO: - // match self.tuple() { - // Some(tuple) => Some(&tuple.identifier.unwrap()), - // None => None, - // } + fn lsp_range_to_rename(&self) -> Option { + self.tuple() + .and_then(|tuple| tuple.identifier) + .map(|identifier| identifier.range()) } fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 415b584d4..86c50a315 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -1,7 +1,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::map::Map; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -230,15 +230,6 @@ impl AstNode for Directive<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -268,7 +259,7 @@ impl AstNode for Directive<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index cdafbd7f9..061231e13 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -1,7 +1,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; use std::fmt::Debug; @@ -44,14 +44,6 @@ impl AstNode for Map<'_> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -64,7 +56,7 @@ impl AstNode for Map<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } @@ -109,14 +101,6 @@ impl AstNode for Pair<'_, K, V> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -134,7 +118,7 @@ impl AstNode for Pair<'_, K, V> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 9331e59d5..f2f111461 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,6 +1,6 @@ use tower_lsp::lsp_types::SymbolKind; -use super::{ast_to_ascii_tree, statement::Statement, AstNode, Position}; +use super::{ast_to_ascii_tree, statement::Statement, AstNode, Position, Range}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -29,14 +29,6 @@ impl AstNode for Program<'_> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -63,7 +55,7 @@ impl AstNode for Program<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index d9a201500..63014a6d3 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -2,7 +2,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::atom::Atom; use super::directive::Directive; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -100,15 +100,6 @@ impl AstNode for Statement<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -140,7 +131,7 @@ impl AstNode for Statement<'_> { Some(("statement".to_string(), "statement".to_string())) } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 7b33a95d8..beda1993d 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -2,15 +2,15 @@ use tower_lsp::lsp_types::SymbolKind; use super::map::Map; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub enum Term<'a> { Primitive(Primitive<'a>), - Variable(Token<'a>), - Existential(Token<'a>), + UniversalVariable(Token<'a>), + ExistentialVariable(Token<'a>), // TODO: Is whitespace needed? Figure out how unary terms look UnaryPrefix { span: Span<'a>, @@ -43,8 +43,8 @@ impl AstNode for Term<'_> { fn children(&self) -> Option> { match self { Term::Primitive(token) => Some(vec![token]), - Term::Variable(token) => Some(vec![token]), - Term::Existential(token) => Some(vec![token]), + Term::UniversalVariable(token) => Some(vec![token]), + Term::ExistentialVariable(token) => Some(vec![token]), Term::UnaryPrefix { operation, term, .. } => Some(vec![operation, &**term]), @@ -100,8 +100,8 @@ impl AstNode for Term<'_> { fn span(&self) -> Span { match self { Term::Primitive(t) => t.span(), - Term::Variable(t) => t.span(), - Term::Existential(t) => t.span(), + Term::UniversalVariable(t) => t.span(), + Term::ExistentialVariable(t) => t.span(), Term::UnaryPrefix { span, .. } => *span, Term::Binary { span, .. } => *span, Term::Aggregation { span, .. } => *span, @@ -111,15 +111,6 @@ impl AstNode for Term<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -138,8 +129,8 @@ impl AstNode for Term<'_> { } match self { Term::Primitive(_) => name!("Primitive"), - Term::Variable(_) => name!("Variable"), - Term::Existential(_) => name!("Existential Variable"), + Term::UniversalVariable(_) => name!("Variable"), + Term::ExistentialVariable(_) => name!("Existential Variable"), Term::UnaryPrefix { .. } => name!("Unary Term"), Term::Binary { .. } => name!("Binary Term"), Term::Aggregation { .. } => name!("Aggregation"), @@ -157,7 +148,7 @@ impl AstNode for Term<'_> { fn lsp_identifier(&self) -> Option<(String, String)> { match self { - Term::Variable(t) => Some(( + Term::UniversalVariable(t) => Some(( format!("variable/{}", t.span().fragment()), "statement".to_string(), )), @@ -175,31 +166,31 @@ impl AstNode for Term<'_> { } } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { - None - // TODO: - // match self { - // Term::Variable(t) => Some(t), - // Term::Aggregation { operation, .. } => Some(operation), - // Term::Tuple(tuple) => { - // if let Some(identifier) = tuple.identifier { - // Some(identifier) - // } else { - // None - // } - // } - // // Term::Function(named_tuple) => Some(&named_tuple.identifier), - // _ => None, - // } + fn lsp_range_to_rename(&self) -> Option { + match self { + Term::Primitive(_) => None, + Term::UniversalVariable(t) => Some(t.range()), + Term::UnaryPrefix { .. } => None, + Term::Blank { .. } => None, + Term::ExistentialVariable(t) => Some(t.range()), + Term::Binary { .. } => None, + Term::Aggregation { operation, .. } => Some(operation.range()), + Term::Tuple(tuple) => tuple.identifier.map(|identifier| identifier.range()), + Term::Map(_map) => None, + } } fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { match self { Term::Primitive(_) => Some((String::from("Primitive term"), SymbolKind::CONSTANT)), - Term::Variable(t) => Some((format!("Variable: {}", t.span()), SymbolKind::VARIABLE)), + Term::UniversalVariable(t) => { + Some((format!("Variable: {}", t.span()), SymbolKind::VARIABLE)) + } Term::UnaryPrefix { .. } => Some((String::from("Unary prefix"), SymbolKind::OPERATOR)), Term::Blank { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), - Term::Existential { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), + Term::ExistentialVariable { .. } => { + Some((String::from("Existential"), SymbolKind::VARIABLE)) + } Term::Binary { .. } => Some((String::from("Binary term"), SymbolKind::OPERATOR)), Term::Aggregation { operation, .. } => Some(( format!("Aggregation: {}", operation.span.fragment()), @@ -215,7 +206,7 @@ impl AstNode for Term<'_> { Some((String::from("Tuple"), SymbolKind::ARRAY)) } } - Term::Map(map) => Some((String::from("Map"), SymbolKind::ARRAY)), + Term::Map(_map) => Some((String::from("Map"), SymbolKind::ARRAY)), } } } @@ -320,15 +311,6 @@ impl AstNode for Primitive<'_> { } } - fn position(&self) -> Position { - let span = self.span(); - Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -359,7 +341,7 @@ impl AstNode for Primitive<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } @@ -397,10 +379,6 @@ impl AstNode for Exponent<'_> { todo!() } - fn position(&self) -> Position { - todo!() - } - fn is_token(&self) -> bool { todo!() } @@ -413,7 +391,7 @@ impl AstNode for Exponent<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 459d22962..b6e5a0bca 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -1,7 +1,7 @@ use tower_lsp::lsp_types::SymbolKind; use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position, Wsoc}; +use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -44,14 +44,6 @@ impl AstNode for Tuple<'_> { self.span } - fn position(&self) -> Position { - Position { - offset: self.span.location_offset(), - line: self.span.location_line(), - column: self.span.get_utf8_column() as u32, - } - } - fn is_token(&self) -> bool { false } @@ -69,7 +61,7 @@ impl AstNode for Tuple<'_> { None } - fn lsp_sub_node_to_rename(&self) -> Option<&dyn AstNode> { + fn lsp_range_to_rename(&self) -> Option { None } diff --git a/nemo/src/model/rule_model/syntax.rs b/nemo/src/model/rule_model/syntax.rs index 7f2f38326..e3b9cad78 100644 --- a/nemo/src/model/rule_model/syntax.rs +++ b/nemo/src/model/rule_model/syntax.rs @@ -1,7 +1,6 @@ //! Constants for strings that are relevant to the syntax of rules. //! These are kept in one location, since they are required in various //! places related to parsing and display. -use nemo_physical::datavalues::syntax; /// The "predicate name" used for the CSV format in import/export directives. pub(crate) const FILE_FORMAT_CSV: &str = "csv"; From 2ad32a453f86730d8dd47eaef0f5a76f9ae0f704 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Fri, 28 Jun 2024 13:13:08 +0200 Subject: [PATCH 106/214] Add test cases that highlight discrepencies between old and new parser --- nemo/src/io/lexer.rs | 1 + nemo/src/io/parser.rs | 126 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 6072a126c..8338c8f1c 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1008,6 +1008,7 @@ mod tests { // FIXME: change the name of this test according to the correct name for `?X > 3` // (Constraints are Rules with an empty Head) + #[ignore] #[test] fn constraints() { let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 0fe2f64e8..2db963fb5 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -5154,5 +5154,131 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters dbg!(&result); println!("{}", result.0); } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_language_tag() { + let test_string = "fact(\"テスト\"@ja)."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_rdf_literal() { + let test_string = "fact(\"2023\"^^xsd:gYear)."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_floating_point_numbers() { + // https://regex101.com/r/ObowxD/5 + + let valid_numbers = vec![ + "0.2", + "4534.34534345", + ".456456", + "1.", + "1e545", + "1.1e435", + ".1e232", + "1.e343", + "112E+12", + "12312.1231", + ".1231", + "1231", + "-1e+0", + "1e-1", + ]; + + let invalid_numbers = vec!["3", "E9", ".e3", "7E"]; + + for valid in valid_numbers { + let input = Span::new(valid); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + + let result = parse_decimal::>>(input); + assert!(result.is_ok()) + } + + for invalid in invalid_numbers { + let input = Span::new(invalid); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + + let result = parse_decimal::>>(input); + assert!(result.is_err()) + } + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_complex_comparison() { + let test_string = "complex(?X, ?Y) :- data(?X, ?Y), ABS(?X - ?Y) >= ?X * ?X."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_negation() { + let test_string = "R(?x, ?y, ?z) :- S(?x, ?y, ?z), ~T(?x, ?y), ~ T(a, ?z)."; // should allow for spaces + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_trailing_comma() { + let test_string = "head(?X) :- body( (2,), (3, 4, ), ?X) ."; // should allow for spaces + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } } } From 1319fba6faafb4d386e10997674c46f2bcaa4633 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Fri, 28 Jun 2024 13:10:18 +0200 Subject: [PATCH 107/214] Update integration tests to reflect new input syntax --- resources/testcases/basic/join.rls | 6 +++--- resources/testcases/basic/negation.rls | 8 ++++---- resources/testcases/basic/projection.rls | 2 +- resources/testcases/basic/union.rls | 6 +++--- .../regression/planning_engine/repeated_variables/run.rls | 6 +++--- .../regression/stratification/binary_negated.rls | 4 ++-- .../regression/symmetric_transitive_closure/run.rls | 4 ++-- resources/testcases/regression/wildcards/run.rls | 2 +- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/resources/testcases/basic/join.rls b/resources/testcases/basic/join.rls index 9b3492c4f..996a4c204 100644 --- a/resources/testcases/basic/join.rls +++ b/resources/testcases/basic/join.rls @@ -1,6 +1,6 @@ -@source sourceA[3]: load-csv("sources/dataA.csv"). -@source sourceB[3]: load-csv("sources/dataB.csv"). -@source sourceC[3]: load-csv("sources/dataC.csv"). +@import sourceA :- csv { resource = "sources/dataA.csv" } . +@import sourceB :- csv { resource = "sources/dataB.csv" } . +@import sourceC :- csv { resource = "sources/dataC.csv" } . J1(?X, ?Y, ?Z) :- sourceA(?X, ?Z, ?Y), sourceB(?X, ?Y, ?T) . J2(?X, ?Y, ?Z) :- sourceA(?Z, ?Y, ?X), sourceC(?X, ?Y, ?T) . diff --git a/resources/testcases/basic/negation.rls b/resources/testcases/basic/negation.rls index 713b14fd6..0b0ee5862 100644 --- a/resources/testcases/basic/negation.rls +++ b/resources/testcases/basic/negation.rls @@ -1,7 +1,7 @@ -@source main[3]: load-csv("sources/main.csv"). -@source s1[1]: load-csv("sources/S1.csv"). -@source s2[2]: load-csv("sources/S2.csv"). -@source s3[3]: load-csv("sources/S3.csv"). +@import main :- csv { resource = "sources/main.csv" }. +@import s1 :- csv { resource = "sources/S1.csv" }. +@import s2 :- csv { resource = "sources/S2.csv" }. +@import s3 :- csv { resource = "sources/S3.csv" }. singlePositionX(?X, ?Y, ?Z) :- main(?X, ?Y, ?Z), ~s1(?X) . singlePositionY(?X, ?Y, ?Z) :- main(?X, ?Y, ?Z), ~s1(?Y) . diff --git a/resources/testcases/basic/projection.rls b/resources/testcases/basic/projection.rls index a3c7b92d3..57da6db6f 100644 --- a/resources/testcases/basic/projection.rls +++ b/resources/testcases/basic/projection.rls @@ -1,4 +1,4 @@ -@source data[3]: load-csv("sources/data.csv"). +@import data :- csv { resource = "sources/data.csv" }. A(?X, ?Z) :- data(?X, ?Y, ?Z) . B(?Y, ?X) :- A(?X, ?Y) . diff --git a/resources/testcases/basic/union.rls b/resources/testcases/basic/union.rls index 1402addc5..4258d3e1c 100644 --- a/resources/testcases/basic/union.rls +++ b/resources/testcases/basic/union.rls @@ -1,6 +1,6 @@ -@source sourceA[3]: load-csv("sources/dataA.csv"). -@source sourceB[3]: load-csv("sources/dataB.csv"). -@source sourceC[3]: load-csv("sources/dataC.csv"). +@import sourceA :- csv { resource = "sources/dataA.csv" }. +@import sourceB :- csv { resource = "sources/dataB.csv" }. +@import sourceC :- csv { resource = "sources/dataC.csv" }. ABC(?X, ?Y, ?Z) :- sourceA(?X, ?Y, ?Z) . ABC(?X, ?Y, ?Z) :- sourceB(?X, ?Y, ?Z) . diff --git a/resources/testcases/regression/planning_engine/repeated_variables/run.rls b/resources/testcases/regression/planning_engine/repeated_variables/run.rls index 49cb4ece7..b4d830ec7 100644 --- a/resources/testcases/regression/planning_engine/repeated_variables/run.rls +++ b/resources/testcases/regression/planning_engine/repeated_variables/run.rls @@ -1,6 +1,6 @@ -@source sourceA[3]: load-csv("sources/dataA.csv"). -@source sourceB[3]: load-csv("sources/dataB.csv"). -@source sourceC[3]: load-csv("sources/dataC.csv"). +@import sourceA :- csv { resource = "sources/dataA.csv" }. +@import sourceB :- csv { resource = "sources/dataB.csv" }. +@import sourceC :- csv { resource = "sources/dataC.csv" }. RepeatBody(?R, ?S) :- sourceA(?X, ?X, ?R), sourceB(?S, ?Y, ?Y) . RepeatHead(?X, ?Y, ?X, ?Y, ?Z, ?Z, ?X) :- sourceA(?X, ?Z, ?Y), sourceB(?X, ?Y, ?T) . diff --git a/resources/testcases/regression/stratification/binary_negated.rls b/resources/testcases/regression/stratification/binary_negated.rls index 0393f4ead..82ee56981 100644 --- a/resources/testcases/regression/stratification/binary_negated.rls +++ b/resources/testcases/regression/stratification/binary_negated.rls @@ -1,5 +1,5 @@ -@source equal[2]: load-csv("sources/equal.csv"). -@source data[2]: load-csv("sources/data.csv"). +@import equal :- csv { resource = "sources/equal.csv" }. +@import data :- csv { resource = "sources/data.csv" }. equal(b, b) . equal(c, c) . diff --git a/resources/testcases/regression/symmetric_transitive_closure/run.rls b/resources/testcases/regression/symmetric_transitive_closure/run.rls index 9eedc37ab..a1b24c09b 100644 --- a/resources/testcases/regression/symmetric_transitive_closure/run.rls +++ b/resources/testcases/regression/symmetric_transitive_closure/run.rls @@ -1,5 +1,5 @@ -@source city[1]: load-csv("city.csv"). -@source conn[2]: load-csv("conn.csv"). +@import city :- csv { resource = "city.csv" }. +@import conn :- csv { resource = "conn.csv" }. connected(?X,?Y) :- city(?X), city(?Y), conn(?X,?Y). conn(?X,?Y) :- conn(?Y,?X). diff --git a/resources/testcases/regression/wildcards/run.rls b/resources/testcases/regression/wildcards/run.rls index c323f7bfd..64c1ef223 100644 --- a/resources/testcases/regression/wildcards/run.rls +++ b/resources/testcases/regression/wildcards/run.rls @@ -1,4 +1,4 @@ -@source input[3]: load-csv("sources/main.csv"). +@import input :- csv { resource = "sources/main.csv" }. result(?x) :- input(?x, _, _). From 1a9fc668b32b4fc8dff91b7d7bb69d15bd9cbcda Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 1 Jul 2024 11:21:44 +0200 Subject: [PATCH 108/214] Remove Whitespace in AST Nodes --- nemo/src/io/lexer.rs | 11 +- nemo/src/io/parser.rs | 377 ++++++---------------------- nemo/src/io/parser/ast.rs | 68 ++--- nemo/src/io/parser/ast/atom.rs | 10 - nemo/src/io/parser/ast/directive.rs | 84 ------- nemo/src/io/parser/ast/map.rs | 20 -- nemo/src/io/parser/ast/statement.rs | 25 -- nemo/src/io/parser/ast/term.rs | 20 -- nemo/src/io/parser/ast/tuple.rs | 12 - 9 files changed, 101 insertions(+), 526 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 8338c8f1c..6b8811654 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -6,7 +6,7 @@ use super::parser::new::context; use nom::{ branch::alt, bytes::complete::{is_not, tag, take, take_till}, - character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace1}, + character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace0, multispace1}, combinator::{all_consuming, cut, map, opt, recognize}, error::ParseError, multi::{many0, many1}, @@ -674,14 +674,15 @@ where }) } -pub(crate) fn skip_to_dot<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Token<'a>) +pub(crate) fn skip_to_statement_end<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Token<'a>) where E: ParseError> + ContextError, Context>, { - let (rest_input, error_input) = recognize(pair( + let (rest_input, error_input) = recognize(tuple(( take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), opt(tag(".")), - ))(input) + multispace0, + )))(input) .expect("Skipping to the next dot should not fail!"); ( rest_input, @@ -1129,6 +1130,6 @@ mod tests { input, parser_state: errors, }; - dbg!(super::skip_to_dot::>(input)); + dbg!(super::skip_to_statement_end::>(input)); } } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 2db963fb5..1f2e4e270 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2443,10 +2443,11 @@ pub mod new { exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_ident, lex_iri, lex_number, lex_operators, lex_prefixed_ident, lex_string, lex_toplevel_doc_comment, lex_whitespace, minus, open_brace, open_paren, plus, - question_mark, skip_to_dot, slash, star, tilde, underscore, unequal, Context, Error, - ErrorTree, ParserState, Span, Token, TokenKind, + question_mark, skip_to_statement_end, slash, star, tilde, underscore, unequal, Context, + Error, ErrorTree, ParserState, Span, Token, TokenKind, }; use crate::io::parser::ast::AstNode; + use nom::character::complete::multispace0; use nom::combinator::{all_consuming, cut, map, opt, recognize}; use nom::error::{ErrorKind, ParseError}; use nom::sequence::{delimited, pair}; @@ -2518,7 +2519,7 @@ pub mod new { context: vec![context], }; // errors.report_error(err); - let (rest_input, token) = skip_to_dot::>>(input); + let (rest_input, token) = skip_to_statement_end::>>(input); Ok((rest_input, Statement::Error(token))) } Err(err) => Err(err), @@ -2709,19 +2710,26 @@ pub mod new { Context::Program, pair( opt(lex_toplevel_doc_comment::>>), - many0(recover( - report_error(alt(( - // TODO: Discuss wether directives should only get parsed at the beginning of the source file - parse_rule, - parse_fact, - parse_whitespace, - parse_directive, - parse_comment, - ))), - "failed to parse statement", - Context::Program, - input.parser_state, - )), + delimited( + multispace0, + many0(recover( + report_error(delimited( + multispace0, + alt(( + // TODO: Discuss wether directives should only get parsed at the beginning of the source file + parse_rule, + parse_fact, + parse_directive, + parse_comment, + )), + multispace0, + )), + "failed to parse statement", + Context::Program, + input.parser_state, + )), + multispace0, + ), ), )(input); match result { @@ -2755,17 +2763,6 @@ pub mod new { parse_program::>>(input) } - /// Parse whitespace that is between directives, facts, rules and comments. - fn parse_whitespace< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Statement<'a>, E> { - lex_whitespace(input).map(|(rest_input, ws)| (rest_input, Statement::Whitespace(ws))) - } - /// Parse normal comments that start with a `%` and ends at the line ending. fn parse_comment< 'a, @@ -2786,14 +2783,13 @@ pub mod new { Context::Fact, tuple((opt(lex_doc_comment), parse_normal_atom, wsoc0, dot)), )(input) - .map(|(rest_input, (doc_comment, atom, ws, dot))| { + .map(|(rest_input, (doc_comment, atom, _ws, dot))| { ( rest_input, Statement::Fact { span: outer_span(input.input, rest_input.input), doc_comment, atom, - ws, dot, }, ) @@ -2818,18 +2814,15 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, head, ws1, arrow, ws2, body, ws3, dot))| { + |(rest_input, (doc_comment, head, _ws1, arrow, _ws2, body, _ws3, dot))| { ( rest_input, Statement::Rule { span: outer_span(input.input, rest_input.input), doc_comment, head, - ws1, arrow, - ws2, body, - ws3, dot, }, ) @@ -2894,23 +2887,19 @@ pub mod new { dot, )), )(input) - .map(|(rest_input, (doc_comment, kw, ws1, base_iri, ws2, dot))| { - ( - rest_input, - Directive::Base { - span: outer_span(input.input, rest_input.input), - doc_comment, - kw: Token { - kind: TokenKind::Base, - span: kw.input, + .map( + |(rest_input, (doc_comment, _kw, _ws1, base_iri, _ws2, dot))| { + ( + rest_input, + Directive::Base { + span: outer_span(input.input, rest_input.input), + doc_comment, + base_iri, + dot, }, - ws1, - base_iri, - ws2, - dot, - }, - ) - }) + ) + }, + ) } /// Parse the prefix directive. @@ -2938,24 +2927,17 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, kw, ws1, prefix, ws2, prefix_iri, ws3, dot))| { + |(rest_input, (doc_comment, _kw, _ws1, prefix, _ws2, prefix_iri, _ws3, dot))| { ( rest_input, Directive::Prefix { span: outer_span(input.input, rest_input.input), doc_comment, - kw: Token { - kind: TokenKind::Prefix, - span: kw.input, - }, - ws1, prefix: Token { kind: TokenKind::Ident, span: prefix.input, }, - ws2, prefix_iri, - ws3, dot, }, ) @@ -2990,23 +2972,18 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { + |( + rest_input, + (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot), + )| { ( rest_input, Directive::Import { span: outer_span(input.input, rest_input.input), doc_comment, - kw: Token { - kind: TokenKind::Import, - span: kw.input, - }, - ws1, predicate, - ws2, arrow, - ws3, map, - ws4, dot, }, ) @@ -3041,23 +3018,18 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, kw, ws1, predicate, ws2, arrow, ws3, map, ws4, dot))| { + |( + rest_input, + (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot), + )| { ( rest_input, Directive::Export { span: outer_span(input.input, rest_input.input), doc_comment, - kw: Token { - kind: TokenKind::Export, - span: kw.input, - }, - ws1, predicate, - ws2, arrow, - ws3, map, - ws4, dot, }, ) @@ -3088,19 +3060,13 @@ pub mod new { )), )(input) .map( - |(rest_input, (doc_comment, kw, ws1, predicates, ws2, dot))| { + |(rest_input, (doc_comment, _kw, _ws1, predicates, _ws2, dot))| { ( rest_input, Directive::Output { span: outer_span(input.input, rest_input.input), doc_comment, - kw: Token { - kind: TokenKind::Output, - span: kw.input, - }, - ws1, predicates, - ws2, dot, }, ) @@ -3152,7 +3118,15 @@ pub mod new { List { span: outer_span(input.input, rest_input.input), first, - rest: if rest.is_empty() { None } else { Some(rest) }, + rest: if rest.is_empty() { + None + } else { + Some( + rest.into_iter() + .map(|(_ws1, comma, _ws2, t)| (comma, t)) + .collect(), + ) + }, }, ) }) @@ -3239,15 +3213,13 @@ pub mod new { Context::InfixAtom, tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), )(input) - .map(|(rest_input, (lhs, ws1, operation, ws2, rhs))| { + .map(|(rest_input, (lhs, _ws1, operation, _ws2, rhs))| { ( rest_input, Atom::InfixAtom { span: outer_span(input.input, rest_input.input), lhs, - ws1, operation, - ws2, rhs, }, ) @@ -3272,17 +3244,14 @@ pub mod new { )), )(input) .map( - |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { + |(rest_input, (identifier, _ws1, open_paren, _ws2, terms, _ws3, close_paren))| { ( rest_input, Tuple { span: outer_span(input.input, rest_input.input), identifier, - ws1, open_paren, - ws2, terms, - ws3, close_paren, }, ) @@ -3312,17 +3281,14 @@ pub mod new { )), )(input) .map( - |(rest_input, (identifier, ws1, open_paren, ws2, terms, ws3, close_paren))| { + |(rest_input, (identifier, _ws1, open_paren, _ws2, terms, _ws3, close_paren))| { ( rest_input, Tuple { span: outer_span(input.input, rest_input.input), identifier: Some(identifier), - ws1, open_paren, - ws2, terms, - ws3, close_paren, }, ) @@ -3348,17 +3314,14 @@ pub mod new { )), )(input) .map( - |(rest_input, (identifier, ws1, open_brace, ws2, pairs, ws3, close_brace))| { + |(rest_input, (identifier, _ws1, open_brace, _ws2, pairs, _ws3, close_brace))| { ( rest_input, Map { span: outer_span(input.input, rest_input.input), identifier, - ws1, open_brace, - ws2, pairs, - ws3, close_brace, }, ) @@ -3418,15 +3381,13 @@ pub mod new { Context::Pair, tuple((parse_term, wsoc0, equal, wsoc0, parse_term)), )(input) - .map(|(rest_input, (key, ws1, equal, ws2, value))| { + .map(|(rest_input, (key, _ws1, equal, _ws2, value))| { ( rest_input, Pair { span: outer_span(input.input, rest_input.input), key, - ws1, equal, - ws2, value, }, ) @@ -3685,13 +3646,11 @@ pub mod new { .map(|(rest_input, (lhs, opt))| { ( rest_input, - if let Some((ws1, operation, ws2, rhs)) = opt { + if let Some((_ws1, operation, _ws2, rhs)) = opt { Term::Binary { span: outer_span(input.input, rest_input.input), lhs: Box::new(lhs), - ws1, operation, - ws2, rhs: Box::new(rhs), } } else { @@ -3725,13 +3684,11 @@ pub mod new { .map(|(rest_input, (lhs, opt))| { ( rest_input, - if let Some((ws1, operation, ws2, rhs)) = opt { + if let Some((_ws1, operation, _ws2, rhs)) = opt { Term::Binary { span: outer_span(input.input, rest_input.input), lhs: Box::new(lhs), - ws1, operation, - ws2, rhs: Box::new(rhs), } } else { @@ -3801,7 +3758,7 @@ pub mod new { )), )(input) .map( - |(rest_input, (operation, open_paren, ws1, terms, ws2, close_paren))| { + |(rest_input, (operation, open_paren, _ws1, terms, _ws2, close_paren))| { ( rest_input, Term::Aggregation { @@ -3811,9 +3768,7 @@ pub mod new { span: operation.input, }, open_paren, - ws1, terms: Box::new(terms), - ws2, close_paren, }, ) @@ -3989,12 +3944,10 @@ pub mod new { kind: TokenKind::Ident, span: s!(0, 1, "a"), }), - ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(1, 1, "("), }, - ws2: None, terms: Some(List { span: s!(2, 1, "B,C"), first: Term::Primitive(Primitive::Constant(Token { @@ -4002,25 +3955,21 @@ pub mod new { span: s!(2, 1, "B"), })), rest: Some(vec![( - None, Token { kind: TokenKind::Comma, span: s!(3, 1, ",") }, - None, Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(4, 1, "C"), })), )]), }), - ws3: None, close_paren: Token { kind: TokenKind::CloseParen, span: s!(5, 1, ")"), }, }), - ws: None, dot: Token { kind: TokenKind::Dot, span: s!(6, 1, ".") @@ -4051,22 +4000,10 @@ pub mod new { Statement::Directive(Directive::Base { span: s!(0, 1, "@base ."), doc_comment: None, - kw: Token { - kind: TokenKind::Base, - span: s!(0, 1, "@base"), - }, - ws1: Some(Wsoc { - span: s!(5, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(5, 1, " ") - }] - }), base_iri: Token { kind: TokenKind::Iri, span: s!(6, 1, "") }, - ws2: None, dot: Token { kind: TokenKind::Dot, span: s!(31, 1, ".") @@ -4079,27 +4016,14 @@ pub mod new { "@prefix rdfs:." ), doc_comment: None, - kw: Token { - kind: TokenKind::Prefix, - span: s!(32, 1, "@prefix"), - }, - ws1: Some(Wsoc { - span: s!(39, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(39, 1, " ") - }] - }), prefix: Token { kind: TokenKind::Ident, span: s!(40, 1, "rdfs:"), }, - ws2: None, prefix_iri: Token { kind: TokenKind::Iri, span: s!(45, 1, ""), }, - ws3: None, dot: Token { kind: TokenKind::Dot, span: s!(84, 1, ".") @@ -4112,39 +4036,24 @@ pub mod new { r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# ), doc_comment: None, - kw: Token { - kind: TokenKind::Import, - span: s!(85, 1, "@import"), - }, - ws1: Wsoc { - span: s!(92, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(92, 1, " "), - }] - }, predicate: Token { kind: TokenKind::Ident, span: s!(93, 1, "sourceA"), }, - ws2: None, arrow: Token { kind: TokenKind::Arrow, span: s!(100, 1, ":-"), }, - ws3: None, map: Map { span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), identifier: Some(Token { kind: TokenKind::Ident, span: s!(102, 1, "csv") }), - ws1: None, open_brace: Token { kind: TokenKind::OpenBrace, span: s!(105, 1, "{") }, - ws2: None, pairs: Some(List { span: s!(106, 1, "resource=\"sources/dataA.csv\""), first: Pair { @@ -4153,12 +4062,10 @@ pub mod new { kind: TokenKind::Ident, span: s!(106, 1, "resource"), })), - ws1: None, equal: Token { kind: TokenKind::Equal, span: s!(114, 1, "="), }, - ws2: None, value: Term::Primitive(Primitive::String(Token { kind: TokenKind::String, span: s!(115, 1, "\"sources/dataA.csv\""), @@ -4166,13 +4073,11 @@ pub mod new { }, rest: None, }), - ws3: None, close_brace: Token { kind: TokenKind::CloseBrace, span: s!(134, 1, "}") }, }, - ws4: None, dot: Token { kind: TokenKind::Dot, span: s!(135, 1, ".") @@ -4181,47 +4086,30 @@ pub mod new { Statement::Directive(Directive::Export { span: s!(136, 1, "@export a:-csv{}."), doc_comment: None, - kw: Token { - kind: TokenKind::Export, - span: s!(136, 1, "@export"), - }, - ws1: Wsoc { - span: s!(143, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(143, 1, " "), - }] - }, predicate: Token { kind: TokenKind::Ident, span: s!(144, 1, "a"), }, - ws2: None, arrow: Token { kind: TokenKind::Arrow, span: s!(145, 1, ":-"), }, - ws3: None, map: Map { span: s!(147, 1, "csv{}"), identifier: Some(Token { kind: TokenKind::Ident, span: s!(147, 1, "csv"), }), - ws1: None, open_brace: Token { kind: TokenKind::OpenBrace, span: s!(150, 1, "{"), }, - ws2: None, pairs: None, - ws3: None, close_brace: Token { kind: TokenKind::CloseBrace, span: s!(151, 1, "}"), }, }, - ws4: None, dot: Token { kind: TokenKind::Dot, span: s!(152, 1, "."), @@ -4230,17 +4118,6 @@ pub mod new { Statement::Directive(Directive::Output { span: s!(153, 1, "@output a, b, c."), doc_comment: None, - kw: Token { - kind: TokenKind::Output, - span: s!(153, 1, "@output") - }, - ws1: Wsoc { - span: s!(160, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(160, 1, " "), - }] - }, predicates: Some(List { span: s!(161, 1, "a, b, c"), first: Token { @@ -4249,36 +4126,20 @@ pub mod new { }, rest: Some(vec![ ( - None, Token { kind: TokenKind::Comma, span: s!(162, 1, ","), }, - Some(Wsoc { - span: s!(163, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(163, 1, " "), - }] - }), Token { kind: TokenKind::Ident, span: s!(164, 1, "b"), }, ), ( - None, Token { kind: TokenKind::Comma, span: s!(165, 1, ","), }, - Some(Wsoc { - span: s!(166, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(166, 1, " "), - }] - }), Token { kind: TokenKind::Ident, span: s!(167, 1, "c"), @@ -4286,7 +4147,6 @@ pub mod new { ), ]), }), - ws2: None, dot: Token { kind: TokenKind::Dot, span: s!(168, 1, "."), @@ -4337,12 +4197,10 @@ pub mod new { kind: TokenKind::Ident, span: s!(0, 1, "some"), }), - ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(4, 1, "(") }, - ws2: None, terms: Some(List { span: s!(5, 1, "Fact, with, whitespace"), first: Term::Primitive(Primitive::Constant(Token { @@ -4351,36 +4209,20 @@ pub mod new { })), rest: Some(vec![ ( - None, Token { kind: TokenKind::Comma, span: s!(9, 1, ","), }, - Some(Wsoc { - span: s!(10, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(10, 1, " "), - }] - }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(11, 1, "with") })), ), ( - None, Token { kind: TokenKind::Comma, span: s!(15, 1, ","), }, - Some(Wsoc { - span: s!(16, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(16, 1, " "), - }] - }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(17, 1, "whitespace") @@ -4388,28 +4230,16 @@ pub mod new { ), ]), }), - ws3: None, close_paren: Token { kind: TokenKind::CloseParen, span: s!(27, 1, ")") }, }), - ws: Some(Wsoc { - span: s!(28, 1, " "), - token: vec![Token { - kind: TokenKind::Whitespace, - span: s!(28, 1, " "), - }] - }), dot: Token { kind: TokenKind::Dot, span: s!(29, 1, "."), }, }, - Statement::Whitespace(Token { - kind: TokenKind::Whitespace, - span: s!(30, 1, " ") - }), Statement::Comment(Token { kind: TokenKind::Comment, span: s!(31, 1, "% and a super useful comment\n") @@ -4447,18 +4277,19 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters // let ast = parse_program::>(input); let (ast, _) = parse_program::>(input); println!("{}", ast); - assert_eq!( - { - let mut string_from_tokens = String::new(); - for token in get_all_tokens(&ast) { - string_from_tokens.push_str(token.span().fragment()); - } - println!("String from Tokens:\n"); - println!("{}\n", string_from_tokens); - string_from_tokens - }, - *input.input.fragment(), - ); + // With the removal of whitespace in the AST this does not work anymore. + // assert_eq!( + // { + // let mut string_from_tokens = String::new(); + // for token in get_all_tokens(&ast) { + // string_from_tokens.push_str(token.span().fragment()); + // } + // println!("String from Tokens:\n"); + // println!("{}\n", string_from_tokens); + // string_from_tokens + // }, + // *input.input.fragment(), + // ); } #[test] @@ -4548,9 +4379,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0, 1, "35"}, exponent: None, })), - ws1: None, operation: T! {Plus, 2, 1, "+"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(3, 1, "7"), sign: None, @@ -4585,9 +4414,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0,1,"6"}, exponent: None, })), - ws1: None, operation: T! {Star, 1,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(2, 1, "7"), sign: None, @@ -4622,9 +4449,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0, 1, "49"}, exponent: None, })), - ws1: None, operation: T! {Minus, 2, 1, "-"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(3, 1, "7"), sign: None, @@ -4659,9 +4484,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0, 1, "84"}, exponent: None, })), - ws1: None, operation: T! {Slash, 2, 1, "/"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(3, 1, "2"), sign: None, @@ -4698,9 +4521,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0,1,"5"}, exponent: None, })), - ws1: None, operation: T! {Star, 1,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(2, 1, "7"), sign: None, @@ -4710,9 +4531,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters exponent: None, })), }), - ws1: None, operation: T! {Plus, 3,1,"+"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(4, 1, "7"), sign: None, @@ -4747,9 +4566,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0,1,"7"}, exponent: None })), - ws1: None, operation: T! {Plus, 1,1,"+"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(2, 1, "5*7"), lhs: Box::new(Term::Primitive(Primitive::Number { @@ -4760,9 +4577,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 2,1,"5"}, exponent: None })), - ws1: None, operation: T! {Star, 3,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(4, 1, "7"), sign: None, @@ -4816,9 +4631,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(0, 1, "(15+3*2-(7+35)*8)"), identifier: None, - ws1: None, open_paren: T!(OpenParen, 0, 1, "("), - ws2: None, terms: Some(List { span: s!(1, 1, "15+3*2-(7+35)*8"), first: Term::Binary { @@ -4831,9 +4644,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 1,1,"15"}, exponent: None, })), - ws1: None, operation: T! {Plus, 3,1,"+"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(4, 1, "3*2-(7+35)*8"), lhs: Box::new(Term::Binary { @@ -4846,9 +4657,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 4,1,"3"}, exponent: None, })), - ws1: None, operation: T! {Star, 5,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(6, 1, "2"), sign: None, @@ -4858,17 +4667,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters exponent: None, })), }), - ws1: None, operation: T! {Minus, 7,1,"-"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(8, 1, "(7+35)*8"), lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(8, 1, "(7+35)"), identifier: None, - ws1: None, open_paren: T! {OpenParen, 8, 1, "("}, - ws2: None, terms: Some(List { span: s!(9, 1, "7+35"), first: Term::Binary { @@ -4883,9 +4688,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters exponent: None, } )), - ws1: None, operation: T! {Plus, 10,1,"+"}, - ws2: None, rhs: Box::new(Term::Primitive( Primitive::Number { span: s!(11, 1, "35"), @@ -4899,12 +4702,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, rest: None }), - ws3: None, close_paren: T! {CloseParen, 13,1,")"}, }))), - ws1: None, operation: T! {Star, 14,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(15, 1, "8"), sign: None, @@ -4918,12 +4718,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, rest: None }), - ws3: None, close_paren: T!(CloseParen, 16, 1, ")") }))), - ws1: None, operation: T! {Slash, 17,1,"/"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(18, 1, "3"), sign: None, @@ -4966,9 +4763,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 0,1,"15"}, exponent: None, })), - ws1: None, operation: T! {Plus, 2,1,"+"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(3, 1, "3*2-(7+35)*8/3"), lhs: Box::new(Term::Binary { @@ -4981,9 +4776,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 3,1,"3"}, exponent: None, })), - ws1: None, operation: T! {Star, 4,1,"*"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(5, 1, "2"), sign: None, @@ -4993,17 +4786,13 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters exponent: None, })), }), - ws1: None, operation: T! {Minus, 6,1,"-"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(7, 1, "(7+35)*8/3"), lhs: Box::new(Term::Tuple(Box::new(Tuple { span: s!(7, 1, "(7+35)"), identifier: None, - ws1: None, open_paren: T! {OpenParen, 7,1,"("}, - ws2: None, terms: Some(List { span: s!(8, 1, "7+35"), first: Term::Binary { @@ -5016,9 +4805,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 8,1,"7"}, exponent: None, })), - ws1: None, operation: T! {Plus, 9,1,"+"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(10, 1, "35"), sign: None, @@ -5030,12 +4817,9 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }, rest: None, }), - ws3: None, close_paren: T! {CloseParen, 12,1,")"}, }))), - ws1: None, operation: T! {Star, 13,1,"*"}, - ws2: None, rhs: Box::new(Term::Binary { span: s!(14, 1, "8/3"), lhs: Box::new(Term::Primitive(Primitive::Number { @@ -5046,9 +4830,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 14,1,"8"}, exponent: None, })), - ws1: None, operation: T! {Slash, 15, 1, "/"}, - ws2: None, rhs: Box::new(Term::Primitive(Primitive::Number { span: s!(16, 1, "3"), sign: None, @@ -5219,6 +5001,8 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }; let result = parse_decimal::>>(input); + // dbg!(&input); + // dbg!(&result); assert!(result.is_ok()) } @@ -5248,6 +5032,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters parser_state, }; let result = parse_program::>>(input); + // dbg!(&result); assert!(result.1.is_empty()); } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 9acc56a59..84c7bcbea 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -151,15 +151,15 @@ impl Display for Wsoc<'_> { pub struct List<'a, T> { pub span: Span<'a>, pub first: T, - // ([ws]?[,][ws]?[T])* - pub rest: Option>, Token<'a>, Option>, T)>>, + // (,T)* + pub rest: Option, T)>>, } impl List<'_, T> { pub fn to_vec(&self) -> Vec { let mut vec = Vec::new(); vec.push(self.first.clone()); if let Some(rest) = &self.rest { - for (_, _, _, item) in rest { + for (_, item) in rest { vec.push(item.clone()); } } @@ -175,7 +175,7 @@ impl IntoIterator for List<'_, T> { let mut vec = Vec::new(); vec.push(self.first); if let Some(rest) = self.rest { - for (_, _, _, item) in rest { + for (_, item) in rest { vec.push(item); } } @@ -187,14 +187,8 @@ impl AstNode for List<'_, T> { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(&self.first); if let Some(rest) = &self.rest { - for (ws1, delim, ws2, item) in rest { - if let Some(ws) = ws1 { - vec.push(ws); - }; + for (delim, item) in rest { vec.push(delim); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(item); } }; @@ -312,30 +306,19 @@ mod test { kind:TokenKind::DocComment, span:s!(84,3,"%% This is the prefix used for datatypes\n") }), - kw: Token{ - kind:TokenKind::Prefix, - span:s!(125,4,"@prefix") - }, - ws1:Some(Wsoc {span: s!(132, 4, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(132,4," ")}] }), prefix: Token { kind: TokenKind::PrefixIdent, span: s!(133, 4, "xsd:"), }, - ws2: Some(Wsoc {span: s!(137, 4, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(137,4," ")}] }), prefix_iri: Token { kind: TokenKind::Iri, span: s!(138, 4, ""), }, - ws3: None, dot: Token{ kind:TokenKind::Dot, span:s!(173,4,".") } }), - Statement::Whitespace(Token { - kind: TokenKind::Whitespace, - span: s!(174, 4, "\n\n"), - }), Statement::Comment(Token { kind: TokenKind::Comment, span: s!(176, 6, "% Facts\n"), @@ -352,12 +335,10 @@ mod test { kind: TokenKind::Ident, span: s!(222, 8, "somePredicate"), }), - ws1:None , open_paren:Token{ kind:TokenKind::OpenParen, span:s!(235,8,"(") } , - ws2:None , terms: Some(List { span: s!(236, 8, "ConstA, ConstB"), first: Term::Primitive(Primitive::Constant(Token { @@ -365,34 +346,26 @@ mod test { span: s!(236, 8, "ConstA"), })), rest: Some(vec![( - None, Token { kind: TokenKind::Comma, span: s!(242, 8, ","), }, - Some(Wsoc {span: s!(243, 8, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(243,8," "),}] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(244, 8, "ConstB"), })), )]), }), - ws3: None , close_paren:Token { kind: TokenKind::CloseParen, span:s!(250,8,")") } }), - ws: None, dot: Token { kind: TokenKind::Dot, span: s!(251,8,".") } }, - Statement::Whitespace(Token { - kind: TokenKind::Whitespace, - span: s!(252, 8, "\n\n"), - }), Statement::Comment(Token { kind: TokenKind::Comment, span: s!(254, 10, "% Rules\n"), @@ -408,9 +381,7 @@ mod test { kind: TokenKind::Ident, span: s!(295, 12, "someHead"), }), - ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(303,12,"(") }, - ws2: None, terms: Some(List { span: s!(304, 12, "?VarA"), first: Term::UniversalVariable(Token { @@ -419,14 +390,11 @@ mod test { }), rest: None, }), - ws3: None, close_paren: Token { kind: TokenKind::CloseParen, span: s!(309,12,")") }, }), rest: None, }, - ws1: Some(Wsoc {span: s!(310, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(310,12," ")}] }), arrow: Token{kind:TokenKind::Arrow, span:s!(311,12,":-")}, - ws2: Some(Wsoc {span: s!(313, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(313,12," ")}] }), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), first: Atom::Positive(Tuple { @@ -435,9 +403,7 @@ mod test { kind: TokenKind::Ident, span: s!(314, 12, "somePredicate"), }), - ws1: None, open_paren: Token { kind: TokenKind::OpenParen, span: s!(327,12,"(") }, - ws2: None, terms: Some(List { span: s!(328, 12, "?Var, ConstB"), first: Term::UniversalVariable(Token { @@ -445,30 +411,22 @@ mod test { span: s!(328, 12, "?VarA"), }), rest: Some(vec![( - None, Token { kind: TokenKind::Comma, span: s!(333, 12, ","), }, - Some(Wsoc {span: s!(334, 12, " "), token: vec![Token{kind:TokenKind::Whitespace,span:s!(334,12," "),}] }), Term::Primitive(Primitive::Constant(Token { kind: TokenKind::Ident, span: s!(335, 12, "ConstB"), })), )]), }), - ws3: None, close_paren: Token { kind: TokenKind::CloseParen, span: s!(341, 12,")") }, }), rest: None, }, - ws3: None, dot: Token{kind:TokenKind::Dot,span:s!(342, 12,".")}, }, - Statement::Whitespace(Token { - kind: TokenKind::Whitespace, - span: s!(343, 12, " "), - }), Statement::Comment(Token { kind: TokenKind::Comment, span: s!(346, 12, "% all constants that are in relation with ConstB\n"), @@ -481,12 +439,14 @@ mod test { println!("{}", token); } - assert_eq!(input, { - let mut result = String::new(); - for token in &tokens1 { - result.push_str(token.span().fragment()); - } - result - }); + // This doesn't work anymore, because the whitespace and keywords got removed from + // from the AST, so you can't directly recreate the input exactly. + // assert_eq!(input, { + // let mut result = String::new(); + // for token in &tokens1 { + // result.push_str(token.span().fragment()); + // } + // result + // }); } } diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index bbf6e88b4..6ad2d77ed 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -18,9 +18,7 @@ pub enum Atom<'a> { InfixAtom { span: Span<'a>, lhs: Term<'a>, - ws1: Option>, operation: Token<'a>, - ws2: Option>, rhs: Term<'a>, }, Map(Map<'a>), @@ -43,21 +41,13 @@ impl AstNode for Atom<'_> { Atom::Negative { neg, atom, .. } => Some(vec![neg, atom]), Atom::InfixAtom { lhs, - ws1, operation, - ws2, rhs, .. } => { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(lhs); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(operation); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(rhs); Some(vec) } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 86c50a315..11424d7e4 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -11,60 +11,40 @@ pub enum Directive<'a> { Base { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Option>, base_iri: Token<'a>, - ws2: Option>, dot: Token<'a>, }, // "@prefix wikidata: ." Prefix { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Option>, prefix: Token<'a>, - ws2: Option>, prefix_iri: Token<'a>, - ws3: Option>, dot: Token<'a>, }, // "@import table :- csv{resource="path/to/file.csv"} ." Import { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Wsoc<'a>, predicate: Token<'a>, - ws2: Option>, arrow: Token<'a>, - ws3: Option>, map: Map<'a>, - ws4: Option>, dot: Token<'a>, }, // "@export result :- turtle{resource="out.ttl"} ." Export { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Wsoc<'a>, predicate: Token<'a>, - ws2: Option>, arrow: Token<'a>, - ws3: Option>, map: Map<'a>, - ws4: Option>, dot: Token<'a>, }, // "@output A, B, C." Output { span: Span<'a>, doc_comment: Option>, - kw: Token<'a>, - ws1: Wsoc<'a>, predicates: Option>>, - ws2: Option>, dot: Token<'a>, }, } @@ -73,10 +53,7 @@ impl AstNode for Directive<'_> { match self { Directive::Base { doc_comment, - kw, - ws1, base_iri, - ws2, dot, .. } => { @@ -84,25 +61,14 @@ impl AstNode for Directive<'_> { if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(base_iri); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(dot); Some(vec) } Directive::Prefix { doc_comment, - kw, - ws1, prefix, - ws2, prefix_iri, - ws3, dot, .. } => { @@ -110,31 +76,16 @@ impl AstNode for Directive<'_> { if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(prefix); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(prefix_iri); - if let Some(ws) = ws3 { - vec.push(ws); - }; vec.push(dot); Some(vec) } Directive::Import { doc_comment, - kw, - ws1, predicate, - ws2, arrow, - ws3, map, - ws4, dot, .. } => { @@ -142,33 +93,17 @@ impl AstNode for Directive<'_> { if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - vec.push(ws1); vec.push(predicate); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(arrow); - if let Some(ws) = ws3 { - vec.push(ws); - }; vec.push(map); - if let Some(ws) = ws4 { - vec.push(ws); - }; vec.push(dot); Some(vec) } Directive::Export { doc_comment, - kw, - ws1, predicate, - ws2, arrow, - ws3, map, - ws4, dot, .. } => { @@ -176,44 +111,25 @@ impl AstNode for Directive<'_> { if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - vec.push(ws1); vec.push(predicate); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(arrow); - if let Some(ws) = ws3 { - vec.push(ws); - }; vec.push(map); - if let Some(ws) = ws4 { - vec.push(ws); - }; vec.push(dot); Some(vec) } Directive::Output { span, doc_comment, - kw, - ws1, predicates, - ws2, dot, } => { let mut vec: Vec<&dyn AstNode> = Vec::new(); if let Some(dc) = doc_comment { vec.push(dc); }; - vec.push(kw); - vec.push(ws1); if let Some(p) = predicates { vec.push(p); }; - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(dot); Some(vec) } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 061231e13..509d07e2b 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -10,11 +10,8 @@ use std::fmt::Debug; pub struct Map<'a> { pub span: Span<'a>, pub identifier: Option>, - pub ws1: Option>, pub open_brace: Token<'a>, - pub ws2: Option>, pub pairs: Option, Term<'a>>>>, - pub ws3: Option>, pub close_brace: Token<'a>, } impl AstNode for Map<'_> { @@ -23,19 +20,10 @@ impl AstNode for Map<'_> { if let Some(identifier) = &self.identifier { vec.push(identifier); }; - if let Some(ws) = &self.ws1 { - vec.push(ws); - } vec.push(&self.open_brace); - if let Some(ws) = &self.ws2 { - vec.push(ws); - } if let Some(pairs) = &self.pairs { vec.push(pairs); }; - if let Some(ws) = &self.ws3 { - vec.push(ws); - } vec.push(&self.close_brace); Some(vec) } @@ -77,22 +65,14 @@ impl std::fmt::Display for Map<'_> { pub struct Pair<'a, K, V> { pub span: Span<'a>, pub key: K, - pub ws1: Option>, pub equal: Token<'a>, - pub ws2: Option>, pub value: V, } impl AstNode for Pair<'_, K, V> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(&self.key); - if let Some(ws) = &self.ws1 { - vec.push(ws); - } vec.push(&self.equal); - if let Some(ws) = &self.ws2 { - vec.push(ws); - } vec.push(&self.value); Some(vec) } diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 63014a6d3..6322b0a34 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -13,21 +13,16 @@ pub enum Statement<'a> { span: Span<'a>, doc_comment: Option>, atom: Atom<'a>, - ws: Option>, dot: Token<'a>, }, Rule { span: Span<'a>, doc_comment: Option>, head: List<'a, Atom<'a>>, - ws1: Option>, arrow: Token<'a>, - ws2: Option>, body: List<'a, Atom<'a>>, - ws3: Option>, dot: Token<'a>, }, - Whitespace(Token<'a>), Comment(Token<'a>), Error(Token<'a>), } @@ -38,7 +33,6 @@ impl AstNode for Statement<'_> { Statement::Fact { doc_comment, atom, - ws, dot, .. } => { @@ -47,20 +41,14 @@ impl AstNode for Statement<'_> { vec.push(dc); }; vec.push(atom); - if let Some(ws) = ws { - vec.push(ws); - } vec.push(dot); Some(vec) } Statement::Rule { doc_comment, head, - ws1, arrow, - ws2, body, - ws3, dot, .. } => { @@ -69,21 +57,11 @@ impl AstNode for Statement<'_> { vec.push(dc); }; vec.push(head); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(arrow); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(body); - if let Some(ws) = ws3 { - vec.push(ws); - }; vec.push(dot); Some(vec) } - Statement::Whitespace(ws) => Some(vec![ws]), Statement::Comment(c) => Some(vec![c]), Statement::Error(t) => Some(vec![t]), } @@ -94,7 +72,6 @@ impl AstNode for Statement<'_> { Statement::Directive(directive) => directive.span(), Statement::Fact { span, .. } => *span, Statement::Rule { span, .. } => *span, - Statement::Whitespace(ws) => ws.span(), Statement::Comment(c) => c.span(), Statement::Error(t) => t.span, } @@ -121,7 +98,6 @@ impl AstNode for Statement<'_> { Statement::Directive(_) => name!("Directive"), Statement::Fact { .. } => name!("Fact"), Statement::Rule { .. } => name!("Rule"), - Statement::Whitespace(_) => name!("Whitespace"), Statement::Comment(_) => name!("Comment"), Statement::Error(_) => name!("\x1b[1;31mERROR\x1b[0m"), } @@ -140,7 +116,6 @@ impl AstNode for Statement<'_> { Statement::Directive(_) => "Directive", Statement::Fact { .. } => "Fact", Statement::Rule { .. } => "Rule", - Statement::Whitespace(_ws) => return None, Statement::Comment(_) => return None, Statement::Error(_) => "Invalid", }; diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index beda1993d..db51d80e7 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -20,18 +20,14 @@ pub enum Term<'a> { Binary { span: Span<'a>, lhs: Box>, - ws1: Option>, operation: Token<'a>, - ws2: Option>, rhs: Box>, }, Aggregation { span: Span<'a>, operation: Token<'a>, open_paren: Token<'a>, - ws1: Option>, terms: Box>>, - ws2: Option>, close_paren: Token<'a>, }, Tuple(Box>), @@ -50,43 +46,27 @@ impl AstNode for Term<'_> { } => Some(vec![operation, &**term]), Term::Binary { lhs, - ws1, operation, - ws2, rhs, .. } => { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(&**lhs); - if let Some(ws) = ws1 { - vec.push(ws); - }; vec.push(operation); - if let Some(ws) = ws2 { - vec.push(ws); - }; vec.push(&**rhs); Some(vec) } Term::Aggregation { operation, open_paren, - ws1, terms, - ws2, close_paren, .. } => { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(operation); vec.push(open_paren); - if let Some(ws) = ws1 { - vec.push(ws); - } vec.push(&**terms); - if let Some(ws) = ws2 { - vec.push(ws); - } vec.push(close_paren); Some(vec) } diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index b6e5a0bca..9dd84df22 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -9,11 +9,8 @@ use ascii_tree::write_tree; pub struct Tuple<'a> { pub span: Span<'a>, pub identifier: Option>, - pub ws1: Option>, pub open_paren: Token<'a>, - pub ws2: Option>, pub terms: Option>>, - pub ws3: Option>, pub close_paren: Token<'a>, } @@ -23,19 +20,10 @@ impl AstNode for Tuple<'_> { if let Some(identifier) = &self.identifier { vec.push(identifier); } - if let Some(ws) = &self.ws1 { - vec.push(ws); - } vec.push(&self.open_paren); - if let Some(ws) = &self.ws2 { - vec.push(ws); - } if let Some(terms) = &self.terms { vec.push(terms); } - if let Some(ws) = &self.ws3 { - vec.push(ws); - } vec.push(&self.close_paren); Some(vec) } From f0423f0cf28a23c16c4002cb2c752e2f9b285999 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 1 Jul 2024 13:25:10 +0200 Subject: [PATCH 109/214] Remove Tokens --- nemo/src/io/lexer.rs | 561 ++++------------------------ nemo/src/io/parser.rs | 286 ++++---------- nemo/src/io/parser/ast.rs | 163 ++++---- nemo/src/io/parser/ast/atom.rs | 6 +- nemo/src/io/parser/ast/directive.rs | 36 +- nemo/src/io/parser/ast/map.rs | 8 +- nemo/src/io/parser/ast/program.rs | 2 +- nemo/src/io/parser/ast/statement.rs | 16 +- nemo/src/io/parser/ast/term.rs | 58 +-- nemo/src/io/parser/ast/tuple.rs | 6 +- nemo/src/io/parser/types.rs | 162 -------- 11 files changed, 263 insertions(+), 1041 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 6b8811654..00b91e7d9 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -161,6 +161,35 @@ impl ToRange for Span<'_> { start..end } } +impl AstNode for Span<'_> { + fn children(&self) -> Option> { + None + } + + fn span(&self) -> Span { + *self + } + + fn is_token(&self) -> bool { + true + } + + fn name(&self) -> String { + self.fragment().to_string() + } + + fn lsp_identifier(&self) -> Option<(String, String)> { + todo!() + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + todo!() + } + + fn lsp_range_to_rename(&self) -> Option { + todo!() + } +} pub(crate) fn to_range(span: Span<'_>) -> Range { let start = span.location_offset(); @@ -408,13 +437,13 @@ macro_rules! syntax { ($func_name: ident, $tag_str: literal, $token: expr) => { pub(crate) fn $func_name<'a, 's, E>( input: Input<'a, 's>, - ) -> IResult, Token<'a>, E> + ) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { map( context(Context::Tag($tag_str), tag($tag_str)), - |span: Input| Token::new($token, span.input), + |input: Input| input.input, )(input) } }; @@ -440,7 +469,7 @@ syntax!(at, "@", TokenKind::At); syntax!(exp_lower, "e", TokenKind::Exponent); syntax!(exp_upper, "E", TokenKind::Exponent); -pub(crate) fn exp<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn exp<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -449,7 +478,7 @@ where pub(crate) fn lex_punctuations<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> +) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -488,7 +517,7 @@ syntax!(minus, "-", TokenKind::Minus); syntax!(star, "*", TokenKind::Star); syntax!(slash, "/", TokenKind::Slash); -pub(crate) fn lex_operators<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_operators<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -509,13 +538,7 @@ where )(input) } -// pub(crate) fn lex_unary_prefix_operators<'a, 's>( -// input: Input<'a, 's>, -// ) -> IResult, Token<'a>> { -// alt((plus, minus))(input) -// } - -pub(crate) fn lex_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -527,36 +550,27 @@ where )), )(input)?; let token = match *ident.input.fragment() { - "base" => Token::new(TokenKind::Base, ident.input), - "prefix" => Token::new(TokenKind::Prefix, ident.input), - "import" => Token::new(TokenKind::Import, ident.input), - "export" => Token::new(TokenKind::Export, ident.input), - "output" => Token::new(TokenKind::Output, ident.input), - _ => Token::new(TokenKind::Ident, ident.input), + "base" => ident.input, + "prefix" => ident.input, + "import" => ident.input, + "export" => ident.input, + "output" => ident.input, + _ => ident.input, }; Ok((rest_input, token)) } pub(crate) fn lex_prefixed_ident<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> +) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - recognize(tuple((opt(lex_ident), colon, lex_ident)))(input).map( - |(rest_input, prefixed_ident)| { - ( - rest_input, - Token { - kind: TokenKind::PrefixedIdent, - span: prefixed_ident.input, - }, - ) - }, - ) + recognize(tuple((opt(lex_ident), colon, lex_ident)))(input) + .map(|(rest_input, prefixed_ident)| (rest_input, prefixed_ident.input)) } -pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -564,18 +578,17 @@ where Context::Iri, recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Iri, result.input))) + .map(|(rest, result)| (rest, result.input)) } -pub(crate) fn lex_number<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_number<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - context(Context::Number, digit1)(input) - .map(|(rest_input, result)| (rest_input, Token::new(TokenKind::Number, result.input))) + context(Context::Number, digit1)(input).map(|(rest_input, result)| (rest_input, result.input)) } -pub(crate) fn lex_string<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_string<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -583,10 +596,10 @@ where Context::String, recognize(delimited(tag("\""), is_not("\""), cut(tag("\"")))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::String, result.input))) + .map(|(rest, result)| (rest, result.input)) } -pub(crate) fn lex_comment<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_comment<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -594,12 +607,12 @@ where Context::Comment, recognize(tuple((tag("%"), many0(is_not("\n")), line_ending))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Comment, result.input))) + .map(|(rest, result)| (rest, result.input)) } pub(crate) fn lex_doc_comment<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> +) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -607,12 +620,12 @@ where Context::DocComment, recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending)))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::DocComment, result.input))) + .map(|(rest, result)| (rest, result.input)) } pub(crate) fn lex_toplevel_doc_comment<'a, 's, E>( input: Input<'a, 's>, -) -> IResult, Token<'a>, E> +) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -620,10 +633,10 @@ where Context::TlDocComment, recognize(many1(tuple((tag("%%%"), many0(is_not("\n")), line_ending)))), )(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::TlDocComment, result.input))) + .map(|(rest, result)| (rest, result.input)) } -pub(crate) fn lex_comments<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_comments<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { @@ -633,48 +646,21 @@ where )(input) } -pub(crate) fn lex_whitespace<'a, 's, E>( - input: Input<'a, 's>, -) -> IResult, Token<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context(Context::Whitespace, multispace1)(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Whitespace, result.input))) -} - -pub(crate) fn lex_illegal<'a, 's, E>(input: Input<'a, 's>) -> IResult, Token<'a>, E> +pub(crate) fn lex_whitespace<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - context(Context::Illegal, take(1usize))(input) - .map(|(rest, result)| (rest, Token::new(TokenKind::Illegal, result.input))) + context(Context::Whitespace, multispace1)(input).map(|(rest, result)| (rest, result.input)) } -pub(crate) fn lex_tokens<'a, 's, E>( - input: Input<'a, 's>, -) -> IResult, Vec>, E> +pub(crate) fn lex_illegal<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - all_consuming(many0(alt(( - lex_iri, - lex_operators, - lex_punctuations, - lex_ident, - lex_number, - lex_string, - lex_comments, - lex_whitespace, - lex_illegal, - ))))(input) - .map(|(span, mut vec)| { - vec.append(&mut vec![Token::new(TokenKind::Eof, span.input)]); - (span, vec) - }) + context(Context::Illegal, take(1usize))(input).map(|(rest, result)| (rest, result.input)) } -pub(crate) fn skip_to_statement_end<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Token<'a>) +pub(crate) fn skip_to_statement_end<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Span<'a>) where E: ParseError> + ContextError, Context>, { @@ -684,13 +670,7 @@ where multispace0, )))(input) .expect("Skipping to the next dot should not fail!"); - ( - rest_input, - Token { - kind: TokenKind::Error, - span: error_input.input, - }, - ) + (rest_input, error_input.input) } #[cfg(test)] @@ -709,420 +689,7 @@ mod tests { } #[test] - fn empty_input() { - let input = Span::new(""); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![T!(Eof, 0, 1, "")] - ) - } - - #[test] - fn base() { - let input = Span::new("@base"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![T!(At, 0, 1, "@"), T!(Base, 1, 1, "base"), T!(Eof, 5, 1, ""),] - ) - } - - #[test] - fn prefix() { - let input = Span::new("@prefix"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Prefix, 1, 1, "prefix"), - T!(Eof, 7, 1, ""), - ] - ) - } - - #[test] - fn output() { - let input = Span::new("@output"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Output, 1, 1, "output"), - T!(Eof, 7, 1, ""), - ] - ) - } - - #[test] - fn import() { - let input = Span::new("@import"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Import, 1, 1, "import"), - T!(Eof, 7, 1, ""), - ] - ) - } - - #[test] - fn export() { - let input = Span::new("@export"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Export, 1, 1, "export"), - T!(Eof, 7, 1, ""), - ] - ) - } - - #[test] - fn idents_with_keyword_prefix() { - let input = Span::new("@baseA, @prefixB, @importC, @exportD, @outputE."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(At, 0, 1, "@"), - T!(Ident, 1, 1, "baseA"), - T!(Comma, 6, 1, ","), - T!(Whitespace, 7, 1, " "), - T!(At, 8, 1, "@"), - T!(Ident, 9, 1, "prefixB"), - T!(Comma, 16, 1, ","), - T!(Whitespace, 17, 1, " "), - T!(At, 18, 1, "@"), - T!(Ident, 19, 1, "importC"), - T!(Comma, 26, 1, ","), - T!(Whitespace, 27, 1, " "), - T!(At, 28, 1, "@"), - T!(Ident, 29, 1, "exportD"), - T!(Comma, 36, 1, ","), - T!(Whitespace, 37, 1, " "), - T!(At, 38, 1, "@"), - T!(Ident, 39, 1, "outputE"), - T!(Dot, 46, 1, "."), - T!(Eof, 47, 1, ""), - ] - ) - } - - #[test] - fn tokenize() { - let input = Span::new("P(?X) :- A(?X).\t\n A(Human)."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Ident, 0, 1, "P"), - T!(OpenParen, 1, 1, "("), - T!(QuestionMark, 2, 1, "?"), - T!(Ident, 3, 1, "X"), - T!(CloseParen, 4, 1, ")"), - T!(Whitespace, 5, 1, " "), - T!(Arrow, 6, 1, ":-"), - T!(Whitespace, 8, 1, " "), - T!(Ident, 9, 1, "A"), - T!(OpenParen, 10, 1, "("), - T!(QuestionMark, 11, 1, "?"), - T!(Ident, 12, 1, "X"), - T!(CloseParen, 13, 1, ")"), - T!(Dot, 14, 1, "."), - T!(Whitespace, 15, 1, "\t\n "), - T!(Ident, 21, 2, "A"), - T!(OpenParen, 22, 2, "("), - T!(Ident, 23, 2, "Human"), - T!(CloseParen, 28, 2, ")"), - T!(Dot, 29, 2, "."), - T!(Eof, 30, 2, ""), - ] - ) - } - - #[test] - fn comment() { - let input = Span::new(" % Some Comment\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Whitespace, 0, 1, " "), - T!(Comment, 4, 1, "% Some Comment\n"), - T!(Eof, 19, 2, ""), - // T!(Comment, Span::new(0, 1, "% Some Comment\n")), - // T!(Eof, Span::new(15, 2, "")) - ] - ) - } - - #[test] - fn ident() { - let input = Span::new("some_Ident(Alice). %comment at the end of a line\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Ident, 0, 1, "some_Ident"), - T!(OpenParen, 10, 1, "("), - T!(Ident, 11, 1, "Alice"), - T!(CloseParen, 16, 1, ")"), - T!(Dot, 17, 1, "."), - T!(Whitespace, 18, 1, " "), - T!(Comment, 19, 1, "%comment at the end of a line\n"), - T!(Eof, 49, 2, ""), - ] - ) - } - - #[test] - fn forbidden_ident() { - let input = Span::new("_someIdent(Alice). %comment at the end of a line\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Underscore, 0, 1, "_"), - T!(Ident, 1, 1, "someIdent"), - T!(OpenParen, 10, 1, "("), - T!(Ident, 11, 1, "Alice"), - T!(CloseParen, 16, 1, ")"), - T!(Dot, 17, 1, "."), - T!(Whitespace, 18, 1, " "), - T!(Comment, 19, 1, "%comment at the end of a line\n"), - T!(Eof, 49, 2, ""), - ] - ) - } - - #[test] - fn iri() { - let input = Span::new(""); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Iri, 0, 1, ""), - T!(Eof, 31, 1, ""), - ] - ) - } - - #[test] - fn iri_pct_enc() { - let input = Span::new("\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Iri, 0, 1, ""), - T!(Whitespace, 37, 1, "\n"), - T!(Eof, 38, 2, ""), - ] - ) - } - - // FIXME: change the name of this test according to the correct name for `?X > 3` - // (Constraints are Rules with an empty Head) - #[ignore] - #[test] - fn constraints() { - let input = Span::new("A(?X):-B(?X),?X<42,?X>3."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Ident, 0, 1, "A"), - T!(OpenParen, 1, 1, "("), - T!(QuestionMark, 2, 1, "?"), - T!(Ident, 3, 1, "X"), - T!(CloseParen, 4, 1, ")"), - T!(Arrow, 5, 1, ":-"), - T!(Ident, 7, 1, "B"), - T!(OpenParen, 8, 1, "("), - T!(QuestionMark, 9, 1, "?"), - T!(Ident, 10, 1, "X"), - T!(CloseParen, 11, 1, ")"), - T!(Comma, 12, 1, ","), - T!(QuestionMark, 13, 1, "?"), - T!(Ident, 14, 1, "X"), - T!(Less, 15, 1, "<"), - T!(Number, 16, 1, "42"), - T!(Comma, 18, 1, ","), - T!(QuestionMark, 19, 1, "?"), - T!(Ident, 20, 1, "X"), - T!(Greater, 21, 1, ">"), - T!(Number, 22, 1, "3"), - T!(Dot, 23, 1, "."), - T!(Eof, 24, 1, ""), - ] - ) - } - - #[test] - fn pct_enc_comment() { - let input = Span::new("%d4 this should be a comment,\n% but the lexer can't distinguish a percent encoded value\n% in an iri from a comment :(\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Comment, 0, 1, "%d4 this should be a comment,\n"), - T!( - Comment, - 30, - 2, - "% but the lexer can't distinguish a percent encoded value\n" - ), - T!(Comment, 88, 3, "% in an iri from a comment :(\n"), - T!(Eof, 118, 4, ""), - ] - ) - } - - #[test] - fn fact() { - let input = Span::new("somePred(term1, term2)."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Ident, 0, 1, "somePred"), - T!(OpenParen, 8, 1, "("), - T!(Ident, 9, 1, "term1"), - T!(Comma, 14, 1, ","), - T!(Whitespace, 15, 1, " "), - T!(Ident, 16, 1, "term2"), - T!(CloseParen, 21, 1, ")"), - T!(Dot, 22, 1, "."), - T!(Eof, 23, 1, ""), - ] - ) - } - - #[test] - fn whitespace() { - let input = Span::new(" \t \n\n\t \n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // lex_tokens::>(input).unwrap().1, - lex_tokens::>(input).unwrap().1, - vec![ - T!(Whitespace, 0, 1, " \t \n\n\t \n"), - T!(Eof, 12, 4, ""), - ] - ) - } - - #[test] - fn skip_to_dot() { + fn skip_to_statement_end() { let input = Span::new("some ?broken :- rule). A(Fact)."); let refcell = RefCell::new(Vec::new()); let errors = ParserState { errors: &refcell }; diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 1f2e4e270..3e41b894a 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2519,8 +2519,8 @@ pub mod new { context: vec![context], }; // errors.report_error(err); - let (rest_input, token) = skip_to_statement_end::>>(input); - Ok((rest_input, Statement::Error(token))) + let (rest_input, span) = skip_to_statement_end::>>(input); + Ok((rest_input, Statement::Error(span))) } Err(err) => Err(err), } @@ -2879,7 +2879,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Base), + verify(lex_ident, |token| *token.fragment() == "base"), )), wsoc0, lex_iri, @@ -2916,7 +2916,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Prefix), + verify(lex_ident, |token| *token.fragment() == "prefix"), )), wsoc0, recognize(pair(opt(lex_ident), colon)), @@ -2933,10 +2933,7 @@ pub mod new { Directive::Prefix { span: outer_span(input.input, rest_input.input), doc_comment, - prefix: Token { - kind: TokenKind::Ident, - span: prefix.input, - }, + prefix: prefix.input, prefix_iri, dot, }, @@ -2959,7 +2956,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Import), + verify(lex_ident, |token| *token.fragment() == "import"), )), wsoc1, lex_ident, @@ -3005,7 +3002,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Export), + verify(lex_ident, |token| *token.fragment() == "export"), )), wsoc1, lex_ident, @@ -3051,7 +3048,7 @@ pub mod new { opt(lex_doc_comment), recognize(pair( at, - verify(lex_ident, |token| token.kind == TokenKind::Output), + verify(lex_ident, |token| *token.fragment() == "output"), )), wsoc1, opt(parse_list(lex_ident)), @@ -3485,10 +3482,7 @@ pub mod new { Primitive::RdfLiteral { span: outer_span(input.input, rest_input.input), string, - carets: Token { - kind: TokenKind::Caret, - span: carets.input, - }, + carets: carets.input, iri, }, ) @@ -3763,10 +3757,7 @@ pub mod new { rest_input, Term::Aggregation { span: outer_span(input.input, rest_input.input), - operation: Token { - kind: TokenKind::Aggregate, - span: operation.input, - }, + operation: operation.input, open_paren, terms: Box::new(terms), close_paren, @@ -3821,15 +3812,7 @@ pub mod new { Context::UniversalVariable, recognize(pair(question_mark, lex_ident)), )(input) - .map(|(rest_input, var)| { - ( - rest_input, - Term::UniversalVariable(Token { - kind: TokenKind::Variable, - span: var.input, - }), - ) - }) + .map(|(rest_input, var)| (rest_input, Term::UniversalVariable(var.input))) } /// Parse an existential variable. @@ -3844,15 +3827,7 @@ pub mod new { Context::ExistentialVariable, recognize(pair(exclamation_mark, lex_ident)), )(input) - .map(|(rest_input, existential)| { - ( - rest_input, - Term::ExistentialVariable(Token { - kind: TokenKind::Existential, - span: existential.input, - }), - ) - }) + .map(|(rest_input, existential)| (rest_input, Term::ExistentialVariable(existential.input))) } // Order of parser compinator is important, because of ordered choice and no backtracking @@ -3863,7 +3838,7 @@ pub mod new { E: ParseError> + ContextError, Context>, >( input: Input<'a, 's>, - ) -> IResult, Token<'a>, E> { + ) -> IResult, Span<'a>, E> { context( Context::Operators, alt((less_equal, greater_equal, equal, unequal, less, greater)), @@ -3890,9 +3865,7 @@ pub mod new { macro_rules! T { ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { - Token::new($tok_kind, unsafe { - Span::new_from_raw_offset($offset, $line, $str, ()) - }) + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } }; } macro_rules! s { @@ -3940,40 +3913,19 @@ pub mod new { doc_comment: None, atom: Atom::Positive(Tuple { span: s!(0, 1, "a(B,C)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(0, 1, "a"), - }), - open_paren: Token { - kind: TokenKind::OpenParen, - span: s!(1, 1, "("), - }, + identifier: Some(s!(0, 1, "a"),), + open_paren: s!(1, 1, "("), terms: Some(List { span: s!(2, 1, "B,C"), - first: Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(2, 1, "B"), - })), + first: Term::Primitive(Primitive::Constant(s!(2, 1, "B"),)), rest: Some(vec![( - Token { - kind: TokenKind::Comma, - span: s!(3, 1, ",") - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(4, 1, "C"), - })), + s!(3, 1, ","), + Term::Primitive(Primitive::Constant(s!(4, 1, "C"),)), )]), }), - close_paren: Token { - kind: TokenKind::CloseParen, - span: s!(5, 1, ")"), - }, + close_paren: s!(5, 1, ")"), }), - dot: Token { - kind: TokenKind::Dot, - span: s!(6, 1, ".") - } + dot: s!(6, 1, ".") }], } ); @@ -4000,14 +3952,8 @@ pub mod new { Statement::Directive(Directive::Base { span: s!(0, 1, "@base ."), doc_comment: None, - base_iri: Token { - kind: TokenKind::Iri, - span: s!(6, 1, "") - }, - dot: Token { - kind: TokenKind::Dot, - span: s!(31, 1, ".") - }, + base_iri: s!(6, 1, ""), + dot: s!(31, 1, "."), }), Statement::Directive(Directive::Prefix { span: s!( @@ -4016,18 +3962,9 @@ pub mod new { "@prefix rdfs:." ), doc_comment: None, - prefix: Token { - kind: TokenKind::Ident, - span: s!(40, 1, "rdfs:"), - }, - prefix_iri: Token { - kind: TokenKind::Iri, - span: s!(45, 1, ""), - }, - dot: Token { - kind: TokenKind::Dot, - span: s!(84, 1, ".") - } + prefix: s!(40, 1, "rdfs:"), + prefix_iri: s!(45, 1, ""), + dot: s!(84, 1, ".") }), Statement::Directive(Directive::Import { span: s!( @@ -4036,121 +3973,59 @@ pub mod new { r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# ), doc_comment: None, - predicate: Token { - kind: TokenKind::Ident, - span: s!(93, 1, "sourceA"), - }, - arrow: Token { - kind: TokenKind::Arrow, - span: s!(100, 1, ":-"), - }, + predicate: s!(93, 1, "sourceA"), + arrow: s!(100, 1, ":-"), map: Map { span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(102, 1, "csv") - }), - open_brace: Token { - kind: TokenKind::OpenBrace, - span: s!(105, 1, "{") - }, + identifier: Some(s!(102, 1, "csv")), + open_brace: s!(105, 1, "{"), pairs: Some(List { span: s!(106, 1, "resource=\"sources/dataA.csv\""), first: Pair { span: s!(106, 1, "resource=\"sources/dataA.csv\""), - key: Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(106, 1, "resource"), - })), - equal: Token { - kind: TokenKind::Equal, - span: s!(114, 1, "="), - }, - value: Term::Primitive(Primitive::String(Token { - kind: TokenKind::String, - span: s!(115, 1, "\"sources/dataA.csv\""), - })), + key: Term::Primitive(Primitive::Constant(s!( + 106, 1, "resource" + ),)), + equal: s!(114, 1, "="), + value: Term::Primitive(Primitive::String(s!( + 115, + 1, + "\"sources/dataA.csv\"" + ),)), }, rest: None, }), - close_brace: Token { - kind: TokenKind::CloseBrace, - span: s!(134, 1, "}") - }, + close_brace: s!(134, 1, "}"), }, - dot: Token { - kind: TokenKind::Dot, - span: s!(135, 1, ".") - } + dot: s!(135, 1, ".") }), Statement::Directive(Directive::Export { span: s!(136, 1, "@export a:-csv{}."), doc_comment: None, - predicate: Token { - kind: TokenKind::Ident, - span: s!(144, 1, "a"), - }, - arrow: Token { - kind: TokenKind::Arrow, - span: s!(145, 1, ":-"), - }, + predicate: s!(144, 1, "a"), + arrow: s!(145, 1, ":-"), map: Map { span: s!(147, 1, "csv{}"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(147, 1, "csv"), - }), - open_brace: Token { - kind: TokenKind::OpenBrace, - span: s!(150, 1, "{"), - }, + identifier: Some(s!(147, 1, "csv"),), + open_brace: s!(150, 1, "{"), + pairs: None, - close_brace: Token { - kind: TokenKind::CloseBrace, - span: s!(151, 1, "}"), - }, - }, - dot: Token { - kind: TokenKind::Dot, - span: s!(152, 1, "."), + close_brace: s!(151, 1, "}"), }, + dot: s!(152, 1, "."), }), Statement::Directive(Directive::Output { span: s!(153, 1, "@output a, b, c."), doc_comment: None, predicates: Some(List { span: s!(161, 1, "a, b, c"), - first: Token { - kind: TokenKind::Ident, - span: s!(161, 1, "a"), - }, + first: s!(161, 1, "a"), rest: Some(vec![ - ( - Token { - kind: TokenKind::Comma, - span: s!(162, 1, ","), - }, - Token { - kind: TokenKind::Ident, - span: s!(164, 1, "b"), - }, - ), - ( - Token { - kind: TokenKind::Comma, - span: s!(165, 1, ","), - }, - Token { - kind: TokenKind::Ident, - span: s!(167, 1, "c"), - }, - ), + (s!(162, 1, ","), s!(164, 1, "b"),), + (s!(165, 1, ","), s!(167, 1, "c"),), ]), }), - dot: Token { - kind: TokenKind::Dot, - span: s!(168, 1, "."), - } + dot: s!(168, 1, "."), }), ], } @@ -4193,57 +4068,31 @@ pub mod new { doc_comment: None, atom: Atom::Positive(Tuple { span: s!(0, 1, "some(Fact, with, whitespace)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(0, 1, "some"), - }), - open_paren: Token { - kind: TokenKind::OpenParen, - span: s!(4, 1, "(") - }, + identifier: Some(s!(0, 1, "some"),), + open_paren: s!(4, 1, "("), terms: Some(List { span: s!(5, 1, "Fact, with, whitespace"), - first: Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(5, 1, "Fact"), - })), + first: Term::Primitive(Primitive::Constant(s!(5, 1, "Fact"),)), rest: Some(vec![ ( - Token { - kind: TokenKind::Comma, - span: s!(9, 1, ","), - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(11, 1, "with") - })), + s!(9, 1, ","), + Term::Primitive(Primitive::Constant(s!(11, 1, "with"))), ), ( - Token { - kind: TokenKind::Comma, - span: s!(15, 1, ","), - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(17, 1, "whitespace") - })), + s!(15, 1, ","), + Term::Primitive(Primitive::Constant(s!( + 17, + 1, + "whitespace" + ))), ), ]), }), - close_paren: Token { - kind: TokenKind::CloseParen, - span: s!(27, 1, ")") - }, + close_paren: s!(27, 1, ")"), }), - dot: Token { - kind: TokenKind::Dot, - span: s!(29, 1, "."), - }, + dot: s!(29, 1, "."), }, - Statement::Comment(Token { - kind: TokenKind::Comment, - span: s!(31, 1, "% and a super useful comment\n") - }) + Statement::Comment(s!(31, 1, "% and a super useful comment\n")) ], } ); @@ -4305,7 +4154,8 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }; // let result = parse_program::>(input); let (ast, errors) = parse_program::>>(input); - println!("{}\n\n{:#?}", ast, errors); + // println!("{}\n\n{:#?}", ast, errors); + println!("{}\n\n", ast); let mut error_map: BTreeMap> = BTreeMap::new(); for error in errors { if let Some(set) = error_map.get_mut(&error.pos) { diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 84c7bcbea..5bfbffeb2 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -41,6 +41,7 @@ pub trait AstNode: std::fmt::Debug + Display + Sync { } } + // FIXME: With the removal of tokens is this method still usefull and/or should be renamed? fn is_token(&self) -> bool; fn name(&self) -> String; @@ -99,7 +100,7 @@ pub struct Range { #[derive(Debug, Clone, PartialEq)] pub struct Wsoc<'a> { pub span: Span<'a>, - pub token: Vec>, + pub token: Vec>, } impl AstNode for Wsoc<'_> { fn children(&self) -> Option> { @@ -152,7 +153,7 @@ pub struct List<'a, T> { pub span: Span<'a>, pub first: T, // (,T)* - pub rest: Option, T)>>, + pub rest: Option, T)>>, } impl List<'_, T> { pub fn to_vec(&self) -> Vec { @@ -250,7 +251,7 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { if let Some(children) = node.children() { for child in children { if child.is_token() { - vec.push(Tree::Leaf(vec![format!("{}", child)])); + vec.push(Tree::Leaf(vec![format!("\x1b[93m{:?}\x1b[0m", child.name())])); } else { vec.push(ast_to_ascii_tree(child)); } @@ -295,142 +296,108 @@ mod test { let span = Span::new(input); let ast = Program { span, - tl_doc_comment: Some(Token { - kind: TokenKind::TlDocComment, - span: s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") - }), + tl_doc_comment: Some( + s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") + ), statements: vec![ Statement::Directive(Directive::Prefix { span:s!(125,4,"@prefix xsd: ."), - doc_comment:Some(Token { - kind:TokenKind::DocComment, - span:s!(84,3,"%% This is the prefix used for datatypes\n") - }), - prefix: Token { - kind: TokenKind::PrefixIdent, - span: s!(133, 4, "xsd:"), - }, - prefix_iri: Token { - kind: TokenKind::Iri, - span: s!(138, 4, ""), - }, - dot: Token{ - kind:TokenKind::Dot, - span:s!(173,4,".") - } - }), - Statement::Comment(Token { - kind: TokenKind::Comment, - span: s!(176, 6, "% Facts\n"), + doc_comment:Some( + s!(84,3,"%% This is the prefix used for datatypes\n") + ), + prefix: + s!(133, 4, "xsd:"), + prefix_iri: + s!(138, 4, ""), + dot: + s!(173,4,".") }), + Statement::Comment( + s!(176, 6, "% Facts\n"), + ), Statement::Fact { span:s!(222,8,"somePredicate(ConstA, ConstB)."), - doc_comment: Some(Token { - kind: TokenKind::DocComment, - span:s!(184,7,"%% This is just an example predicate.\n") - }), + doc_comment: Some( + s!(184,7,"%% This is just an example predicate.\n") + ), atom: Atom::Positive(Tuple { span: s!(222,8,"somePredicate(ConstA, ConstB)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(222, 8, "somePredicate"), - }), - open_paren:Token{ - kind:TokenKind::OpenParen, - span:s!(235,8,"(") - } , + identifier: Some( + s!(222, 8, "somePredicate"), + ), + open_paren: + s!(235,8,"(") + , terms: Some(List { span: s!(236, 8, "ConstA, ConstB"), - first: Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(236, 8, "ConstA"), - })), + first: Term::Primitive(Primitive::Constant( s!(236, 8, "ConstA"), + )), rest: Some(vec![( - Token { - kind: TokenKind::Comma, - span: s!(242, 8, ","), - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(244, 8, "ConstB"), - })), + s!(242, 8, ","), + Term::Primitive(Primitive::Constant( s!(244, 8, "ConstB"), + )), )]), }), - close_paren:Token { - kind: TokenKind::CloseParen, - span:s!(250,8,")") - } + close_paren: + s!(250,8,")") }), - dot: Token { - kind: TokenKind::Dot, - span: s!(251,8,".") - } + dot: + s!(251,8,".") + }, - Statement::Comment(Token { - kind: TokenKind::Comment, - span: s!(254, 10, "% Rules\n"), - }), + Statement::Comment( + s!(254, 10, "% Rules\n"), + ), Statement::Rule { span: s!(295,12,"someHead(?VarA) :- somePredicate(?VarA, ConstB)."), - doc_comment: Some(Token { kind: TokenKind::DocComment, span: s!(262,11,"%% This is just an example rule.\n") }), + doc_comment: Some(s!(262,11,"%% This is just an example rule.\n")), head: List { span: s!(295, 12, "someHead(?VarA)"), first: Atom::Positive(Tuple { span: s!(295,12,"someHead(?VarA)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(295, 12, "someHead"), - }), - open_paren: Token { kind: TokenKind::OpenParen, span: s!(303,12,"(") }, + identifier: Some( + s!(295, 12, "someHead"), + ), + open_paren: s!(303,12,"(") , terms: Some(List { span: s!(304, 12, "?VarA"), - first: Term::UniversalVariable(Token { - kind: TokenKind::Variable, - span: s!(304, 12, "?VarA"), - }), + first: Term::UniversalVariable( s!(304, 12, "?VarA"), + ), rest: None, }), - close_paren: Token { kind: TokenKind::CloseParen, span: s!(309,12,")") }, + close_paren: s!(309,12,")") , }), rest: None, }, - arrow: Token{kind:TokenKind::Arrow, span:s!(311,12,":-")}, + arrow: s!(311,12,":-"), body: List { span: s!(314, 12, "somePredicate(?VarA, ConstB)"), first: Atom::Positive(Tuple { span: s!(314, 12,"somePredicate(?VarA, ConstB)"), - identifier: Some(Token { - kind: TokenKind::Ident, - span: s!(314, 12, "somePredicate"), - }), - open_paren: Token { kind: TokenKind::OpenParen, span: s!(327,12,"(") }, + identifier: Some( + s!(314, 12, "somePredicate"), + ), + open_paren: s!(327,12,"("), terms: Some(List { span: s!(328, 12, "?Var, ConstB"), - first: Term::UniversalVariable(Token { - kind: TokenKind::Variable, - span: s!(328, 12, "?VarA"), - }), + first: Term::UniversalVariable( s!(328, 12, "?VarA"), + ), rest: Some(vec![( - Token { - kind: TokenKind::Comma, - span: s!(333, 12, ","), - }, - Term::Primitive(Primitive::Constant(Token { - kind: TokenKind::Ident, - span: s!(335, 12, "ConstB"), - })), + s!(333, 12, ","), + + Term::Primitive(Primitive::Constant(s!(335, 12, "ConstB"), + )), )]), }), - close_paren: Token { kind: TokenKind::CloseParen, span: s!(341, 12,")") }, + close_paren: s!(341, 12,")") , }), rest: None, }, - dot: Token{kind:TokenKind::Dot,span:s!(342, 12,".")}, + dot: s!(342, 12,"."), }, - Statement::Comment(Token { - kind: TokenKind::Comment, - span: s!(346, 12, "% all constants that are in relation with ConstB\n"), - }), + Statement::Comment( + s!(346, 12, "% all constants that are in relation with ConstB\n"), + ), ], }; println!("{}", ast); diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index 6ad2d77ed..47ccc2e08 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -12,13 +12,13 @@ pub enum Atom<'a> { Positive(Tuple<'a>), Negative { span: Span<'a>, - neg: Token<'a>, + neg: Span<'a>, atom: Tuple<'a>, }, InfixAtom { span: Span<'a>, lhs: Term<'a>, - operation: Token<'a>, + operation: Span<'a>, rhs: Term<'a>, }, Map(Map<'a>), @@ -106,7 +106,7 @@ impl AstNode for Atom<'_> { fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { match self.tuple() { Some(tuple) => Some(( - format!("Atom: {}", tuple.identifier.unwrap().span.fragment()), + format!("Atom: {}", tuple.identifier.unwrap().fragment()), SymbolKind::FUNCTION, )), None => Some((String::from("Atom"), SymbolKind::FUNCTION)), diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 11424d7e4..4ccf406dd 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -10,42 +10,42 @@ pub enum Directive<'a> { // "@base ." Base { span: Span<'a>, - doc_comment: Option>, - base_iri: Token<'a>, - dot: Token<'a>, + doc_comment: Option>, + base_iri: Span<'a>, + dot: Span<'a>, }, // "@prefix wikidata: ." Prefix { span: Span<'a>, - doc_comment: Option>, - prefix: Token<'a>, - prefix_iri: Token<'a>, - dot: Token<'a>, + doc_comment: Option>, + prefix: Span<'a>, + prefix_iri: Span<'a>, + dot: Span<'a>, }, // "@import table :- csv{resource="path/to/file.csv"} ." Import { span: Span<'a>, - doc_comment: Option>, - predicate: Token<'a>, - arrow: Token<'a>, + doc_comment: Option>, + predicate: Span<'a>, + arrow: Span<'a>, map: Map<'a>, - dot: Token<'a>, + dot: Span<'a>, }, // "@export result :- turtle{resource="out.ttl"} ." Export { span: Span<'a>, - doc_comment: Option>, - predicate: Token<'a>, - arrow: Token<'a>, + doc_comment: Option>, + predicate: Span<'a>, + arrow: Span<'a>, map: Map<'a>, - dot: Token<'a>, + dot: Span<'a>, }, // "@output A, B, C." Output { span: Span<'a>, - doc_comment: Option>, - predicates: Option>>, - dot: Token<'a>, + doc_comment: Option>, + predicates: Option>>, + dot: Span<'a>, }, } impl AstNode for Directive<'_> { diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 509d07e2b..143640d54 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -9,10 +9,10 @@ use std::fmt::Debug; #[derive(Debug, Clone, PartialEq)] pub struct Map<'a> { pub span: Span<'a>, - pub identifier: Option>, - pub open_brace: Token<'a>, + pub identifier: Option>, + pub open_brace: Span<'a>, pub pairs: Option, Term<'a>>>>, - pub close_brace: Token<'a>, + pub close_brace: Span<'a>, } impl AstNode for Map<'_> { fn children(&self) -> Option> { @@ -65,7 +65,7 @@ impl std::fmt::Display for Map<'_> { pub struct Pair<'a, K, V> { pub span: Span<'a>, pub key: K, - pub equal: Token<'a>, + pub equal: Span<'a>, pub value: V, } impl AstNode for Pair<'_, K, V> { diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index f2f111461..7b080bcab 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -7,7 +7,7 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub struct Program<'a> { pub span: Span<'a>, - pub tl_doc_comment: Option>, + pub tl_doc_comment: Option>, pub statements: Vec>, } impl AstNode for Program<'_> { diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index 6322b0a34..c9420ec06 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -11,20 +11,20 @@ pub enum Statement<'a> { Directive(Directive<'a>), Fact { span: Span<'a>, - doc_comment: Option>, + doc_comment: Option>, atom: Atom<'a>, - dot: Token<'a>, + dot: Span<'a>, }, Rule { span: Span<'a>, - doc_comment: Option>, + doc_comment: Option>, head: List<'a, Atom<'a>>, - arrow: Token<'a>, + arrow: Span<'a>, body: List<'a, Atom<'a>>, - dot: Token<'a>, + dot: Span<'a>, }, - Comment(Token<'a>), - Error(Token<'a>), + Comment(Span<'a>), + Error(Span<'a>), } impl AstNode for Statement<'_> { fn children(&self) -> Option> { @@ -73,7 +73,7 @@ impl AstNode for Statement<'_> { Statement::Fact { span, .. } => *span, Statement::Rule { span, .. } => *span, Statement::Comment(c) => c.span(), - Statement::Error(t) => t.span, + Statement::Error(t) => *t, } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index db51d80e7..32fde33ba 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -9,30 +9,30 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub enum Term<'a> { Primitive(Primitive<'a>), - UniversalVariable(Token<'a>), - ExistentialVariable(Token<'a>), + UniversalVariable(Span<'a>), + ExistentialVariable(Span<'a>), // TODO: Is whitespace needed? Figure out how unary terms look UnaryPrefix { span: Span<'a>, - operation: Token<'a>, + operation: Span<'a>, term: Box>, }, Binary { span: Span<'a>, lhs: Box>, - operation: Token<'a>, + operation: Span<'a>, rhs: Box>, }, Aggregation { span: Span<'a>, - operation: Token<'a>, - open_paren: Token<'a>, + operation: Span<'a>, + open_paren: Span<'a>, terms: Box>>, - close_paren: Token<'a>, + close_paren: Span<'a>, }, Tuple(Box>), Map(Box>), - Blank(Token<'a>), + Blank(Span<'a>), } impl AstNode for Term<'_> { @@ -173,13 +173,13 @@ impl AstNode for Term<'_> { } Term::Binary { .. } => Some((String::from("Binary term"), SymbolKind::OPERATOR)), Term::Aggregation { operation, .. } => Some(( - format!("Aggregation: {}", operation.span.fragment()), + format!("Aggregation: {}", operation.fragment()), SymbolKind::OPERATOR, )), Term::Tuple(tuple) => { if let Some(identifier) = tuple.identifier { Some(( - format!("Function: {}", identifier.span.fragment()), + format!("Function: {}", identifier.fragment()), SymbolKind::OPERATOR, )) } else { @@ -200,28 +200,28 @@ impl std::fmt::Display for Term<'_> { #[derive(Debug, Clone, PartialEq)] pub(crate) enum Primitive<'a> { - Constant(Token<'a>), + Constant(Span<'a>), PrefixedConstant { span: Span<'a>, - prefix: Option>, - colon: Token<'a>, - constant: Token<'a>, + prefix: Option>, + colon: Span<'a>, + constant: Span<'a>, }, Number { span: Span<'a>, - sign: Option>, - before: Option>, - dot: Option>, - after: Token<'a>, + sign: Option>, + before: Option>, + dot: Option>, + after: Span<'a>, exponent: Option>, }, - String(Token<'a>), - Iri(Token<'a>), + String(Span<'a>), + Iri(Span<'a>), RdfLiteral { span: Span<'a>, - string: Token<'a>, - carets: Token<'a>, - iri: Token<'a>, + string: Span<'a>, + carets: Span<'a>, + iri: Span<'a>, }, } @@ -282,11 +282,11 @@ impl AstNode for Primitive<'_> { fn span(&self) -> Span { match self { - Primitive::Constant(token) => token.span, + Primitive::Constant(span) => *span, Primitive::PrefixedConstant { span, .. } => *span, Primitive::Number { span, .. } => *span, - Primitive::String(token) => token.span, - Primitive::Iri(token) => token.span, + Primitive::String(span) => *span, + Primitive::Iri(span) => *span, Primitive::RdfLiteral { span, .. } => *span, } } @@ -339,9 +339,9 @@ impl std::fmt::Display for Primitive<'_> { #[derive(Debug, Clone, PartialEq)] pub(crate) struct Exponent<'a> { - pub(crate) e: Token<'a>, - pub(crate) sign: Option>, - pub(crate) number: Token<'a>, + pub(crate) e: Span<'a>, + pub(crate) sign: Option>, + pub(crate) number: Span<'a>, } impl AstNode for Exponent<'_> { diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 9dd84df22..d3f5e7625 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -8,10 +8,10 @@ use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] pub struct Tuple<'a> { pub span: Span<'a>, - pub identifier: Option>, - pub open_paren: Token<'a>, + pub identifier: Option>, + pub open_paren: Span<'a>, pub terms: Option>>, - pub close_paren: Token<'a>, + pub close_paren: Span<'a>, } impl AstNode for Tuple<'_> { diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 2b0832655..4ba1b045c 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -428,168 +428,6 @@ impl FromExternalError, DataValueCreationError> for LocatedParseError { } } -use crate::io::lexer::Token; - -#[derive(Debug, Copy, Clone, PartialEq)] -pub(crate) struct Tokens<'a> { - pub(crate) tok: &'a [Token<'a>], -} -impl<'a> Tokens<'a> { - fn new(vec: &'a [Token]) -> Tokens<'a> { - Tokens { tok: vec } - } -} -impl<'a> AsBytes for Tokens<'a> { - fn as_bytes(&self) -> &[u8] { - todo!() - } -} -impl<'a, T> nom::Compare for Tokens<'a> { - fn compare(&self, t: T) -> nom::CompareResult { - todo!() - } - - fn compare_no_case(&self, t: T) -> nom::CompareResult { - todo!() - } -} -// impl<'a> nom::ExtendInto for Tokens<'a> { -// type Item; - -// type Extender; - -// fn new_builder(&self) -> Self::Extender { -// todo!() -// } - -// fn extend_into(&self, acc: &mut Self::Extender) { -// todo!() -// } -// } -impl<'a, T> nom::FindSubstring for Tokens<'a> { - fn find_substring(&self, substr: T) -> Option { - todo!() - } -} -impl<'a, T> nom::FindToken for Tokens<'a> { - fn find_token(&self, token: T) -> bool { - todo!() - } -} -impl<'a> InputIter for Tokens<'a> { - type Item = &'a Token<'a>; - - type Iter = std::iter::Enumerate>>; - - type IterElem = std::slice::Iter<'a, Token<'a>>; - - fn iter_indices(&self) -> Self::Iter { - self.tok.iter().enumerate() - } - - fn iter_elements(&self) -> Self::IterElem { - self.tok.iter() - } - - fn position

(&self, predicate: P) -> Option - where - P: Fn(Self::Item) -> bool, - { - self.tok.iter().position(predicate) - } - - fn slice_index(&self, count: usize) -> Result { - if self.tok.len() >= count { - Ok(count) - } else { - Err(nom::Needed::Unknown) - } - } -} -impl<'a> InputLength for Tokens<'a> { - fn input_len(&self) -> usize { - self.tok.len() - } -} -impl<'a> InputTake for Tokens<'a> { - fn take(&self, count: usize) -> Self { - Tokens { - tok: &self.tok[0..count], - } - } - - fn take_split(&self, count: usize) -> (Self, Self) { - ( - Tokens { - tok: &self.tok[count..self.tok.len()], - }, - Tokens { - tok: &self.tok[0..count], - }, - ) - } -} -impl<'a> InputTakeAtPosition for Tokens<'a> { - type Item = &'a Token<'a>; - - fn split_at_position>( - &self, - predicate: P, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } - - fn split_at_position1>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } - - fn split_at_position_complete>( - &self, - predicate: P, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } - - fn split_at_position1_complete>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } -} -impl<'a> nom::Offset for Tokens<'a> { - fn offset(&self, second: &Self) -> usize { - todo!() - } -} -impl<'a, R> nom::ParseTo for Tokens<'a> { - fn parse_to(&self) -> Option { - todo!() - } -} -impl<'a, R> nom::Slice for Tokens<'a> { - fn slice(&self, range: R) -> Self { - todo!() - } -} - #[derive(Debug, Clone, Copy)] pub(crate) struct Input<'a, 's> { pub(crate) input: crate::io::lexer::Span<'a>, From 3ddb313a16d110d3ba7d768b9c923dbb992fea2b Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 1 Jul 2024 07:18:05 +0200 Subject: [PATCH 110/214] Adjust logical model (WIP) --- nemo/src/lib.rs | 1 + nemo/src/rule_model.rs | 6 ++ nemo/src/rule_model/component.rs | 24 ++++++ nemo/src/rule_model/component/variable.rs | 96 +++++++++++++++++++++++ nemo/src/rule_model/error.rs | 9 +++ nemo/src/rule_model/origin.rs | 10 +++ 6 files changed, 146 insertions(+) create mode 100644 nemo/src/rule_model.rs create mode 100644 nemo/src/rule_model/component.rs create mode 100644 nemo/src/rule_model/component/variable.rs create mode 100644 nemo/src/rule_model/error.rs create mode 100644 nemo/src/rule_model/origin.rs diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 246ab7f6c..89f3d962d 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -29,6 +29,7 @@ pub mod io; pub mod execution; pub mod model; +pub mod rule_model; pub mod util; mod program_analysis; diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs new file mode 100644 index 000000000..b30b5c65d --- /dev/null +++ b/nemo/src/rule_model.rs @@ -0,0 +1,6 @@ +//! This module defines the representation of nemo programs + +pub(crate) mod origin; + +pub mod component; +pub mod error; diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/component.rs new file mode 100644 index 000000000..803185dfb --- /dev/null +++ b/nemo/src/rule_model/component.rs @@ -0,0 +1,24 @@ +//! This module defines a logical component and ... + +use std::fmt::{Debug, Display}; + +use crate::io::parser::ast::AstNode; + +use super::{ + error::ProgramConstructionError, + origin::{ComponentOrigin, OriginParseReference}, +}; + +pub trait ProgramComponent: Debug + Display { + type Node<'a>: AstNode; + + fn from_ast_node<'a>(node: Self::Node<'a>, origin: OriginParseReference) -> Self; + + fn parse(string: &str) -> Result + where + Self: Sized; + + fn origin(&self) -> &ComponentOrigin; +} + +pub mod variable; diff --git a/nemo/src/rule_model/component/variable.rs b/nemo/src/rule_model/component/variable.rs new file mode 100644 index 000000000..02a5f202e --- /dev/null +++ b/nemo/src/rule_model/component/variable.rs @@ -0,0 +1,96 @@ +use std::fmt::Display; + +use crate::{ + io::parser::ast::term::Term, + rule_model::{ + error::ProgramConstructionError, + origin::{ComponentOrigin, OriginParseReference}, + }, +}; + +use super::ProgramComponent; + +/// Name of a variable +#[derive(Debug, Clone)] +pub struct VariableName(String); + +impl VariableName { + fn new(name: String) -> Result { + // TODO: Validate name + if name.is_empty() { + return Err(ProgramConstructionError::InvalidVariableName(name)); + } + + Ok(Self::new_unvalidated(name)) + } + + fn new_unvalidated(name: String) -> Self { + Self(name) + } +} + +impl Display for VariableName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Debug)] +pub struct UniversalVariale { + origin: ComponentOrigin, + + name: Option, +} + +impl UniversalVariale { + fn from_term(term: Term) -> Self { + todo!() + } +} + +impl Display for UniversalVariale { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.name { + Some(name) => write!(f, "?{}", name), + None => write!(f, "_"), + } + } +} + +impl ProgramComponent for UniversalVariale { + type Node<'a> = Term<'a>; + + fn from_ast_node<'a>(node: Term<'a>, origin: OriginParseReference) -> Self { + if let Term::UniversalVariable(token) = node { + let string = token.span.to_string(); + } + + todo!() + } + + fn parse(string: &str) -> Result { + todo!() + } + + fn origin(&self) -> &ComponentOrigin { + &self.origin + } +} + +#[derive(Debug)] +pub struct ExistentialVariable { + origin: ComponentOrigin, + + name: VariableName, +} + +#[derive(Debug)] +pub enum Variable { + Universal(UniversalVariale), + Existential(ExistentialVariable), +} + +mod test { + #[test] + fn create_variable() {} +} diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs new file mode 100644 index 000000000..8f9331064 --- /dev/null +++ b/nemo/src/rule_model/error.rs @@ -0,0 +1,9 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ProgramConstructionError { + #[error("invalid variable name: {0}")] + InvalidVariableName(String), + #[error("parse error")] // TODO: Return parser error here + ParseError, +} diff --git a/nemo/src/rule_model/origin.rs b/nemo/src/rule_model/origin.rs new file mode 100644 index 000000000..e2660002b --- /dev/null +++ b/nemo/src/rule_model/origin.rs @@ -0,0 +1,10 @@ +//! This module defines + +pub(crate) type OriginParseReference = usize; + +#[derive(Debug)] +pub enum ComponentOrigin { + Created, + Parsed(OriginParseReference), + Something(Box), +} From 322c997052ea76de4c27f1a0a445f35f02d112c8 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 2 Jul 2024 22:24:54 +0200 Subject: [PATCH 111/214] Rebuild logical model --- nemo/src/io/parser/ast.rs | 2 +- nemo/src/io/parser/ast/term.rs | 2 +- nemo/src/rule_model.rs | 1 + nemo/src/rule_model/component.rs | 37 ++- nemo/src/rule_model/component/atom.rs | 98 +++++++ nemo/src/rule_model/component/fact.rs | 74 ++++++ .../src/rule_model/component/import_export.rs | 243 ++++++++++++++++++ nemo/src/rule_model/component/literal.rs | 59 +++++ nemo/src/rule_model/component/rule.rs | 224 ++++++++++++++++ nemo/src/rule_model/component/term.rs | 133 ++++++++++ .../rule_model/component/term/aggregate.rs | 96 +++++++ .../src/rule_model/component/term/function.rs | 107 ++++++++ nemo/src/rule_model/component/term/map.rs | 69 +++++ .../rule_model/component/term/operation.rs | 195 ++++++++++++++ .../rule_model/component/term/primitive.rs | 97 +++++++ .../component/term/primitive/ground.rs | 134 ++++++++++ .../component/term/primitive/variable.rs | 140 ++++++++++ .../term/primitive/variable/existential.rs | 103 ++++++++ .../term/primitive/variable/universal.rs | 123 +++++++++ nemo/src/rule_model/component/term/tuple.rs | 63 +++++ nemo/src/rule_model/component/variable.rs | 96 ------- nemo/src/rule_model/error.rs | 17 +- nemo/src/rule_model/origin.rs | 18 +- nemo/src/rule_model/program.rs | 26 ++ 24 files changed, 2039 insertions(+), 118 deletions(-) create mode 100644 nemo/src/rule_model/component/atom.rs create mode 100644 nemo/src/rule_model/component/fact.rs create mode 100644 nemo/src/rule_model/component/import_export.rs create mode 100644 nemo/src/rule_model/component/literal.rs create mode 100644 nemo/src/rule_model/component/rule.rs create mode 100644 nemo/src/rule_model/component/term.rs create mode 100644 nemo/src/rule_model/component/term/aggregate.rs create mode 100644 nemo/src/rule_model/component/term/function.rs create mode 100644 nemo/src/rule_model/component/term/map.rs create mode 100644 nemo/src/rule_model/component/term/operation.rs create mode 100644 nemo/src/rule_model/component/term/primitive.rs create mode 100644 nemo/src/rule_model/component/term/primitive/ground.rs create mode 100644 nemo/src/rule_model/component/term/primitive/variable.rs create mode 100644 nemo/src/rule_model/component/term/primitive/variable/existential.rs create mode 100644 nemo/src/rule_model/component/term/primitive/variable/universal.rs create mode 100644 nemo/src/rule_model/component/term/tuple.rs delete mode 100644 nemo/src/rule_model/component/variable.rs create mode 100644 nemo/src/rule_model/program.rs diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 5bfbffeb2..931813397 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -143,7 +143,7 @@ impl AstNode for Wsoc<'_> { } impl Display for Wsoc<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 32fde33ba..07feb03f7 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -381,7 +381,7 @@ impl AstNode for Exponent<'_> { } impl std::fmt::Display for Exponent<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() } } diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs index b30b5c65d..f3422305c 100644 --- a/nemo/src/rule_model.rs +++ b/nemo/src/rule_model.rs @@ -4,3 +4,4 @@ pub(crate) mod origin; pub mod component; pub mod error; +pub mod program; diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/component.rs index 803185dfb..5a925fb77 100644 --- a/nemo/src/rule_model/component.rs +++ b/nemo/src/rule_model/component.rs @@ -1,24 +1,33 @@ -//! This module defines a logical component and ... +//! This module defines the logical components that make up a program. -use std::fmt::{Debug, Display}; +pub mod atom; +pub mod fact; +pub mod import_export; +pub mod literal; +pub mod rule; +pub mod term; -use crate::io::parser::ast::AstNode; +use std::fmt::{Debug, Display}; -use super::{ - error::ProgramConstructionError, - origin::{ComponentOrigin, OriginParseReference}, -}; +use super::{error::ProgramConstructionError, origin::Origin}; -pub trait ProgramComponent: Debug + Display { - type Node<'a>: AstNode; +/// Trait implemented by objects that are part of the logical rule model of the nemo language. +pub trait ProgramComponent: Debug + Display + Clone + PartialEq + Eq { + /// Construct this object from a string. + fn parse(_string: &str) -> Result + where + Self: Sized; - fn from_ast_node<'a>(node: Self::Node<'a>, origin: OriginParseReference) -> Self; + /// Return the [Origin] of this component. + fn origin(&self) -> &Origin; - fn parse(string: &str) -> Result + /// Set the [Origin] of this component. + fn set_origin(self, origin: Origin) -> Self where Self: Sized; - fn origin(&self) -> &ComponentOrigin; + /// Validate this component + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized; } - -pub mod variable; diff --git a/nemo/src/rule_model/component/atom.rs b/nemo/src/rule_model/component/atom.rs new file mode 100644 index 000000000..7943ed9c7 --- /dev/null +++ b/nemo/src/rule_model/component/atom.rs @@ -0,0 +1,98 @@ +//! This module defines an [Atom]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; + +use super::{ + term::{Identifier, Term}, + ProgramComponent, +}; + +/// An atom +#[derive(Debug, Clone, Eq)] +pub struct Atom { + /// Origin of this component. + origin: Origin, + + /// Predicate name associated with this atom + name: Identifier, + /// Subterms of the function + terms: Vec, +} + +impl Atom { + /// Create a new [Atom]. + pub fn new(name: &str, subterms: Vec) -> Self { + Self { + origin: Origin::Created, + name: Identifier::new(name.to_string()), + terms: subterms, + } + } + + /// Return an iterator over the subterms of this atom. + pub fn subterms(&self) -> impl Iterator { + self.terms.iter() + } + + /// Return an mutable iterator over the subterms of this atom. + pub fn subterms_mut(&mut self) -> impl Iterator { + self.terms.iter_mut() + } +} + +impl Display for Atom { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Atom { + fn eq(&self, other: &Self) -> bool { + self.origin == other.origin && self.name == other.name && self.terms == other.terms + } +} + +impl Hash for Atom { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.terms.hash(state); + } +} + +impl ProgramComponent for Atom { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + if !self.name.is_valid() { + todo!() + } + + for term in self.subterms() { + term.validate()?; + } + + Ok(()) + } +} diff --git a/nemo/src/rule_model/component/fact.rs b/nemo/src/rule_model/component/fact.rs new file mode 100644 index 000000000..69afd6fdb --- /dev/null +++ b/nemo/src/rule_model/component/fact.rs @@ -0,0 +1,74 @@ +//! This module defines [Fact]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::origin::Origin; + +use super::{term::Term, ProgramComponent}; + +/// A (ground) fact +#[derive(Debug, Clone, Eq)] +pub struct Fact { + /// Origin of this component + origin: Origin, + + terms: Vec, +} + +impl Fact { + /// Return an iterator over the subterms of this fact. + pub fn subterms(&self) -> impl Iterator { + self.terms.iter() + } + + /// Return an mutable iterator over the subterms of this fact. + pub fn subterms_mut(&mut self) -> impl Iterator { + self.terms.iter_mut() + } +} + +impl Display for Fact { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Fact { + fn eq(&self, other: &Self) -> bool { + self.terms == other.terms + } +} + +impl Hash for Fact { + fn hash(&self, state: &mut H) { + self.terms.hash(state); + } +} + +impl ProgramComponent for Fact { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + todo!() + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/import_export.rs b/nemo/src/rule_model/component/import_export.rs new file mode 100644 index 000000000..37f09f3d8 --- /dev/null +++ b/nemo/src/rule_model/component/import_export.rs @@ -0,0 +1,243 @@ +//! Import and export directives are a direct representation of the syntactic information +//! given in rule files. + +use std::{fmt::Display, hash::Hash}; + +use nemo_physical::datavalues::MapDataValue; + +use crate::rule_model::origin::Origin; + +use super::{term::Identifier, ProgramComponent}; + +/// The different supported variants of the RDF format. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RdfVariant { + /// An unspecified format, using the resource name as a heuristic. + #[default] + Unspecified, + /// RDF 1.1 N-Triples + NTriples, + /// RDF 1.1 N-Quads + NQuads, + /// RDF 1.1 Turtle + Turtle, + /// RDF 1.1 RDF/XML + RDFXML, + /// RDF 1.1 TriG + TriG, +} + +impl Display for RdfVariant { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NTriples => write!(f, "RDF N-Triples"), + Self::NQuads => write!(f, "RDF N-Quads"), + Self::Turtle => write!(f, "RDF Turtle"), + Self::RDFXML => write!(f, "RDF/XML"), + Self::TriG => write!(f, "RDF TriG"), + Self::Unspecified => write!(f, "RDF"), + } + } +} + +/// Supported file formats. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum FileFormat { + /// Comma-separated values + CSV, + /// Delimiter-separated values + DSV, + /// Tab-separated values + TSV, + /// RDF Triples or Quads, with the given format variant. + RDF(RdfVariant), + /// JSON objects + JSON, +} + +impl Display for FileFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::DSV => write!(f, "DSV"), + Self::CSV => write!(f, "CSV"), + Self::TSV => write!(f, "TSV"), + Self::JSON => write!(f, "JSON"), + Self::RDF(variant) => write!(f, "{variant}"), + } + } +} + +/// An import/export specification. This object captures all information that is typically +/// present in an import or export directive in a Nemo program, including the main format, +/// optional attributes that define additional parameters, and an indentifier to map the data +/// to or from (i.e., a predicate name). +#[derive(Debug, Clone, Eq)] +pub(crate) struct ImportExportDirective { + /// Origin of this component + origin: Origin, + + /// The predicate we're handling. + predicate: Identifier, + /// The file format and resource we're using. + format: FileFormat, + /// The attributes we've been given. + attributes: MapDataValue, +} + +impl PartialEq for ImportExportDirective { + fn eq(&self, other: &Self) -> bool { + self.predicate == other.predicate + && self.format == other.format + && self.attributes == other.attributes + } +} + +impl Hash for ImportExportDirective { + fn hash(&self, state: &mut H) { + self.predicate.hash(state); + self.format.hash(state); + self.attributes.hash(state); + } +} + +/// An import specification. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ImportDirective(pub(crate) ImportExportDirective); + +impl ImportDirective { + /// Create a new [ImportDirective]. + pub fn new(predicate: Identifier, format: FileFormat, attributes: MapDataValue) -> Self { + Self(ImportExportDirective { + origin: Origin::default(), + predicate, + format, + attributes, + }) + } + + /// Return the predicate. + pub fn predicate(&self) -> &Identifier { + &self.0.predicate + } + + /// Return the file format. + pub fn file_format(&self) -> FileFormat { + self.0.format + } + + /// Return the attributes. + pub fn attributes(&self) -> &MapDataValue { + &self.0.attributes + } +} + +impl From for ImportDirective { + fn from(value: ImportExportDirective) -> Self { + Self(value) + } +} + +impl Display for ImportDirective { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl ProgramComponent for ImportDirective { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.0.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.0.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +/// An export specification. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExportDirective(pub(crate) ImportExportDirective); + +impl ExportDirective { + /// Create a new [ExportDirective]. + pub fn new(predicate: Identifier, format: FileFormat, attributes: MapDataValue) -> Self { + Self(ImportExportDirective { + origin: Origin::default(), + predicate, + format, + attributes, + }) + } + + /// Return the predicate. + pub fn predicate(&self) -> &Identifier { + &self.0.predicate + } + + /// Return the file format. + pub fn file_format(&self) -> FileFormat { + self.0.format + } + + /// Return the attributes. + pub fn attributes(&self) -> &MapDataValue { + &self.0.attributes + } +} + +impl From for ExportDirective { + fn from(value: ImportExportDirective) -> Self { + Self(value) + } +} + +impl Display for ExportDirective { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl ProgramComponent for ExportDirective { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.0.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.0.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/literal.rs b/nemo/src/rule_model/component/literal.rs new file mode 100644 index 000000000..3514b65b1 --- /dev/null +++ b/nemo/src/rule_model/component/literal.rs @@ -0,0 +1,59 @@ +//! This module defines [Literal] + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::error::ProgramConstructionError; + +use super::{atom::Atom, term::operation::Operation, ProgramComponent}; + +/// A literal that can either be a positive or negative atom or an operation +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Literal { + /// Positive atom + Positive(Atom), + /// Negative atom + Negative(Atom), + /// Operation + Operation(Operation), +} + +impl Display for Literal { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl ProgramComponent for Literal { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &crate::rule_model::origin::Origin { + match self { + Literal::Positive(positive) => positive.origin(), + Literal::Negative(negative) => negative.origin(), + Literal::Operation(operation) => operation.origin(), + } + } + + fn set_origin(self, origin: crate::rule_model::origin::Origin) -> Self + where + Self: Sized, + { + match self { + Literal::Positive(positive) => Literal::Positive(positive.set_origin(origin)), + Literal::Negative(negative) => Literal::Negative(negative.set_origin(origin)), + Literal::Operation(operation) => Literal::Operation(operation.set_origin(origin)), + } + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/rule.rs b/nemo/src/rule_model/component/rule.rs new file mode 100644 index 000000000..e06f3fef4 --- /dev/null +++ b/nemo/src/rule_model/component/rule.rs @@ -0,0 +1,224 @@ +//! This module defines [Rule] and [RuleBuilder] + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::origin::Origin; + +use super::{atom::Atom, literal::Literal, term::operation::Operation, ProgramComponent}; + +/// A rule +#[derive(Debug, Clone, Eq)] +pub struct Rule { + /// Origin of this component + origin: Origin, + + /// Name of the rule + name: Option, + + /// Head of the rule + head: Vec, + /// Body of the rule + body: Vec, +} + +impl Rule { + /// Create a new [Rule]. + pub fn new(head: Vec, body: Vec) -> Self { + Self { + origin: Origin::Created, + name: None, + head, + body, + } + } + + /// Set the name of the rule. + pub fn set_name(mut self, name: &str) -> Self { + self.name = Some(name.to_string()); + self + } +} + +impl Display for Rule { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Rule { + fn eq(&self, other: &Self) -> bool { + self.head == other.head && self.body == other.body + } +} + +impl Hash for Rule { + fn hash(&self, state: &mut H) { + self.head.hash(state); + self.body.hash(state); + } +} + +impl ProgramComponent for Rule { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +/// Builder for a rule +#[derive(Debug, Default)] +pub struct RuleBuilder { + /// Origin of the rule + origin: Origin, + + /// Builder for the head of the rule + head: RuleHeadBuilder, + /// Builder for the body of the rule + body: RuleBodyBuilder, +} + +impl RuleBuilder { + /// Set the [Origin] of the built rule. + pub fn origin(mut self, origin: Origin) -> Self { + self.origin = origin; + self + } + + /// Return a builder for the body of the rule. + pub fn body(self) -> RuleBodySubBuilder { + RuleBodySubBuilder { builder: self } + } + + /// Return a builder for the head of the rule. + pub fn head(self) -> RuleHeadSubBuilder { + RuleHeadSubBuilder { builder: self } + } + + /// Finish building and return a [Rule]. + pub fn finalize(self) -> Rule { + Rule::new(self.head.finalize(), self.body.finalize()).set_origin(self.origin) + } +} + +/// Builder for the rule body +#[derive(Debug, Default)] +pub struct RuleBodyBuilder { + /// Current list of [Literal]s + literals: Vec, +} + +impl RuleBodyBuilder { + /// Add a positive atom to the body of the rule. + pub fn add_positive_atom(mut self, atom: Atom) -> Self { + self.literals.push(Literal::Positive(atom)); + self + } + + /// Add a negative atom to the body of the rule. + pub fn add_negative_atom(mut self, atom: Atom) -> Self { + self.literals.push(Literal::Negative(atom)); + self + } + + /// Add an operation to the body of the rule. + pub fn add_operation(mut self, opreation: Operation) -> Self { + self.literals.push(Literal::Operation(opreation)); + self + } + + /// Finish building and return a list of [Literal]s. + pub fn finalize(self) -> Vec { + self.literals + } +} + +/// Subbuilder for building the body of a rule +#[derive(Debug)] +pub struct RuleBodySubBuilder { + builder: RuleBuilder, +} + +impl RuleBodySubBuilder { + /// Add a positive atom to the body of the rule. + pub fn add_positive_atom(mut self, atom: Atom) -> Self { + self.builder.body = self.builder.body.add_positive_atom(atom); + self + } + + /// Add a negative atom to the body of the rule. + pub fn add_negative_atom(mut self, atom: Atom) -> Self { + self.builder.body = self.builder.body.add_negative_atom(atom); + self + } + + /// Add an operation to the body of the rule. + pub fn add_operation(mut self, opreation: Operation) -> Self { + self.builder.body = self.builder.body.add_operation(opreation); + self + } + + /// Return to the [RuleBuilder] + pub fn done(self) -> RuleBuilder { + self.builder + } +} + +/// Builder for the rule head +#[derive(Debug, Default)] +pub struct RuleHeadBuilder { + /// Current list of [Atom]s + atoms: Vec, +} + +impl RuleHeadBuilder { + /// Add another atom to the head of the rule. + pub fn add_atom(mut self, atom: Atom) -> Self { + self.atoms.push(atom); + self + } + + /// Finish building and return a list of [Atom]s. + pub fn finalize(self) -> Vec { + self.atoms + } +} + +/// Subbuilder for building the head of a rule +#[derive(Debug)] +pub struct RuleHeadSubBuilder { + builder: RuleBuilder, +} + +impl RuleHeadSubBuilder { + /// Add another atom to the head of the rule. + pub fn add_atom(mut self, atom: Atom) -> Self { + self.builder.head = self.builder.head.add_atom(atom); + self + } + + /// Return to the [RuleBuilder] + pub fn done(self) -> RuleBuilder { + self.builder + } +} diff --git a/nemo/src/rule_model/component/term.rs b/nemo/src/rule_model/component/term.rs new file mode 100644 index 000000000..2616ff44a --- /dev/null +++ b/nemo/src/rule_model/component/term.rs @@ -0,0 +1,133 @@ +//! This module defines [Term]. + +use std::fmt::{Debug, Display}; + +use function::FunctionTerm; +use map::Map; +use nemo_physical::datavalues::AnyDataValue; +use operation::Operation; +use primitive::{ground::GroundTerm, variable::Variable, Primitive}; + +use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; + +use super::ProgramComponent; + +pub mod aggregate; +pub mod function; +pub mod map; +pub mod operation; +pub mod primitive; +pub mod tuple; + +/// Name of a term +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Identifier(String); + +impl Identifier { + /// Create a new [Identifier]. + pub fn new(name: String) -> Self { + Self(name) + } + + /// Validate term name. + pub fn is_valid(&self) -> bool { + !self.0.is_empty() + } +} + +/// TODO +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum Term { + /// Unstructured, primitive term + Primitive(Primitive), + /// Abstract function over a list of terms + FunctionTerm(FunctionTerm), + /// Map of terms + Map(Map), + /// Operation applied to a list of terms + Operation(Operation), +} + +impl Term { + /// Create a universal variable term. + pub fn universal_variable(name: &str) -> Self { + Self::Primitive(Primitive::Variable(Variable::universal(name))) + } + + /// Create a anynmous variable term. + pub fn anonymous_variable() -> Self { + Self::Primitive(Primitive::Variable(Variable::anonymous())) + } + + /// Create a existential variable term. + pub fn existential_variable(name: &str) -> Self { + Self::Primitive(Primitive::Variable(Variable::existential(name))) + } + + /// Create an integer term + pub fn integer(number: i64) -> Self { + Self::Primitive(Primitive::Ground(GroundTerm::new( + AnyDataValue::new_integer_from_i64(number), + ))) + } +} + +impl Display for Term { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Term::Primitive(primitive) => write!(f, "{}", primitive), + Term::FunctionTerm(function) => write!(f, "{}", function), + Term::Map(map) => write!(f, "{}", map), + Term::Operation(operation) => write!(f, "{}", operation), + } + } +} + +impl ProgramComponent for Term { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + match self { + Term::Primitive(primitive) => primitive.origin(), + Term::FunctionTerm(function) => function.origin(), + Term::Map(map) => map.origin(), + Term::Operation(operation) => operation.origin(), + } + } + + fn set_origin(self, origin: Origin) -> Self + where + Self: Sized, + { + match self { + Term::Primitive(primitive) => Term::Primitive(primitive.set_origin(origin)), + Term::FunctionTerm(function) => Term::FunctionTerm(function.set_origin(origin)), + Term::Map(map) => Term::Map(map.set_origin(origin)), + Term::Operation(operation) => Term::Operation(operation.set_origin(origin)), + } + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +// impl ASTConstructable for Term { +// type Node<'a> = crate::io::parser::ast::term::Term<'a>; + +// fn from_ast_node<'a>( +// node: Self::Node<'a>, +// origin: crate::rule_model::origin::ExternalReference, +// context: &super::ASTContext, +// ) -> Self { +// todo!() +// } +// } diff --git a/nemo/src/rule_model/component/term/aggregate.rs b/nemo/src/rule_model/component/term/aggregate.rs new file mode 100644 index 000000000..1d6764d75 --- /dev/null +++ b/nemo/src/rule_model/component/term/aggregate.rs @@ -0,0 +1,96 @@ +//! This module defines [Aggregate] + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +use super::{primitive::variable::Variable, Term}; + +/// Aggregate operation on logical values +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum AggregateKind { + /// Count of distinct values + CountValues, + /// Minimum numerical value + MinNumber, + /// Maximum numerical value + MaxNumber, + /// Sum of numerical values + SumOfNumbers, +} + +/// An aggregate +#[derive(Debug, Clone, Eq)] +pub struct Aggregate { + /// Origin of this component + origin: Origin, + + /// Type of aggrgate operation + kind: AggregateKind, + /// Expression over which to aggragte + aggregate: Term, + /// Distinct variables + distinct: Vec, +} + +impl Aggregate { + /// Create a new [Aggregate]. + pub fn new(kind: AggregateKind, aggregate: Term, distinct: Vec) -> Self { + Self { + origin: Origin::default(), + kind, + aggregate, + distinct, + } + } +} + +impl Display for Aggregate { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Aggregate { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind + && self.aggregate == other.aggregate + && self.distinct == other.distinct + } +} + +impl Hash for Aggregate { + fn hash(&self, state: &mut H) { + self.kind.hash(state); + self.aggregate.hash(state); + self.distinct.hash(state); + } +} + +impl ProgramComponent for Aggregate { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/term/function.rs b/nemo/src/rule_model/component/term/function.rs new file mode 100644 index 000000000..84942a5ab --- /dev/null +++ b/nemo/src/rule_model/component/term/function.rs @@ -0,0 +1,107 @@ +//! This module defines [FunctionTerm] + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{ + component::ProgramComponent, error::ProgramConstructionError, origin::Origin, +}; + +use super::{Identifier, Term}; + +/// Function term +#[derive(Debug, Clone, Eq)] +pub struct FunctionTerm { + /// Origin of this component + origin: Origin, + + /// Name of the function + name: Identifier, + /// Subterms of the function + terms: Vec, +} + +impl FunctionTerm { + /// Create a new [FunctionTerm]. + pub fn new(name: &str, subterms: Vec) -> Self { + Self { + origin: Origin::Created, + name: Identifier::new(name.to_string()), + terms: subterms, + } + } + + /// Return an iterator over the subterms of this function term. + pub fn subterms(&self) -> impl Iterator { + self.terms.iter() + } + + /// Return an mutable iterator over the subterms of this function terms. + pub fn subterms_mut(&mut self) -> impl Iterator { + self.terms.iter_mut() + } +} + +impl Display for FunctionTerm { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for FunctionTerm { + fn eq(&self, other: &Self) -> bool { + self.origin == other.origin && self.name == other.name && self.terms == other.terms + } +} + +impl PartialOrd for FunctionTerm { + fn partial_cmp(&self, other: &Self) -> Option { + match self.name.partial_cmp(&other.name) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + self.terms.partial_cmp(&other.terms) + } +} + +impl Hash for FunctionTerm { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.terms.hash(state); + } +} + +impl ProgramComponent for FunctionTerm { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + if !self.name.is_valid() { + todo!() + } + + for term in self.subterms() { + term.validate()? + } + + Ok(()) + } +} diff --git a/nemo/src/rule_model/component/term/map.rs b/nemo/src/rule_model/component/term/map.rs new file mode 100644 index 000000000..a353d323b --- /dev/null +++ b/nemo/src/rule_model/component/term/map.rs @@ -0,0 +1,69 @@ +//! This module defines [Map] + +use std::{collections::BTreeMap, fmt::Display, hash::Hash}; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +use super::Term; + +/// Map term +#[derive(Debug, Clone, Eq)] +pub struct Map { + /// Origin of this component + origin: Origin, + + /// Map associating [Term]s with [Term]s + map: BTreeMap, +} + +impl Display for Map { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Map { + fn eq(&self, other: &Self) -> bool { + self.map == other.map + } +} + +impl PartialOrd for Map { + fn partial_cmp(&self, other: &Self) -> Option { + self.map.partial_cmp(&other.map) + } +} + +impl Hash for Map { + fn hash(&self, state: &mut H) { + self.map.hash(state); + } +} + +impl ProgramComponent for Map { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + todo!() + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/term/operation.rs b/nemo/src/rule_model/component/term/operation.rs new file mode 100644 index 000000000..f847d8896 --- /dev/null +++ b/nemo/src/rule_model/component/term/operation.rs @@ -0,0 +1,195 @@ +//! This module defines [Operation]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +use super::Term; + +/// Supported operations +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum OperationKind { + /// Equality + Equal, + /// Inequality + Unequals, + /// Sum of numeric values + NumericSum, + /// Subtraction between two numeric values + NumericSubtraction, + /// Product of numeric values + NumericProduct, + /// Division between two numeric values + NumericDivision, + /// Logarithm of a numeric value to some numeric base + NumericLogarithm, + /// Numeric value raised to another numeric value + NumericPower, + /// Remainder of a division between two numeric values + NumericRemainder, + /// Numeric greater than comparison + NumericGreaterthan, + /// Numeric greater than or equals comparison + NumericGreaterthaneq, + /// Numeric less than comparison + NumericLessthan, + /// Numeric less than or equals comparison + NumericLessthaneq, + /// Lexicographic comparison between strings + StringCompare, + /// Check whether string is contained in another, correspondng to SPARQL function CONTAINS. + StringContains, + /// String starting at some start position + StringSubstring, + /// First part of a string split by some other string + StringBefore, + /// Second part of a string split by some other string + StringAfter, + /// Whether string starts with a certain string + StringStarts, + /// Whether string ends with a certain string + StringEnds, + /// Boolean negation + BooleanNegation, + /// Cast to double + CastToDouble, + /// Cast to float + CastToFloat, + /// Cast to integer + CastToInteger, + /// Canonical string representation of a value + CanonicalString, + /// Check if value is an integer + CheckIsInteger, + /// Check if value is a float + CheckIsFloat, + /// Check if value is a double + CheckIsDouble, + /// Check if value is an iri + CheckIsIri, + /// Check if value is numeric + CheckIsNumeric, + /// Check if value is a null + CheckIsNull, + /// Check if value is a string + CheckIsString, + /// Get datatype of a value + Datatype, + /// Get language tag of a languaged tagged string + LanguageTag, + /// Lexical value + LexicalValue, + /// Absolute value of a numeric value + NumericAbsolute, + /// Cosine of a numeric value + NumericCosine, + /// Rounding up of a numeric value + NumericCeil, + /// Rounding down of a numeric value + NumericFloor, + /// Additive inverse of a numeric value + NumericNegation, + /// Rounding of a numeric value + NumericRound, + /// Sine of a numeric value + NumericSine, + /// Square root of a numeric value + NumericSquareroot, + /// Tangent of a numeric value + NumericTangent, + /// Length of a string value + StringLength, + /// Reverse of a string value + StringReverse, + /// String converted to lowercase letters + StringLowercase, + /// String converted to uppercase letters + StringUppercase, + /// Bitwise and operation + BitAnd, + /// Bitwise or operation + BitOr, + /// Bitwise xor operation + BitXor, + /// Conjunction of boolean values + BooleanConjunction, + /// Disjunction of boolean values + BooleanDisjunction, + /// Minimum of numeric values + NumericMinimum, + /// Maximum of numeric values + NumericMaximum, + /// Lukasiewicz norm of numeric values + NumericLukasiewicz, + /// Concatentation of two string values, correspondng to SPARQL function CONCAT. + StringConcatenation, +} + +/// Operation that can be applied to terms +#[derive(Debug, Clone, Eq)] +pub struct Operation { + /// Origin of this component + origin: Origin, + + /// The kind of operation + kind: OperationKind, + /// The input arguments for the operation + subterms: Vec, +} + +impl Display for Operation { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Operation { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.subterms == other.subterms + } +} + +impl PartialOrd for Operation { + fn partial_cmp(&self, other: &Self) -> Option { + match self.kind.partial_cmp(&other.kind) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + self.subterms.partial_cmp(&other.subterms) + } +} + +impl Hash for Operation { + fn hash(&self, state: &mut H) { + self.kind.hash(state); + self.subterms.hash(state); + } +} + +impl ProgramComponent for Operation { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + todo!() + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/term/primitive.rs b/nemo/src/rule_model/component/term/primitive.rs new file mode 100644 index 000000000..15d065b21 --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive.rs @@ -0,0 +1,97 @@ +//! This module defines [PrimitiveTerm]. + +pub mod ground; +pub mod variable; + +use std::{fmt::Display, hash::Hash}; + +use ground::GroundTerm; +use variable::Variable; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +/// Primitive term +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum Primitive { + /// Variable + Variable(Variable), + /// Ground term + Ground(GroundTerm), +} + +impl Primitive { + /// Return `true` when this term is not a variable and `false` otherwise. + pub fn is_ground(&self) -> bool { + matches!(self, Self::Ground(_)) + } +} + +impl Display for Primitive { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Primitive::Variable(variable) => variable.fmt(f), + Primitive::Ground(ground) => ground.fmt(f), + } + } +} + +impl ProgramComponent for Primitive { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + match self { + Self::Variable(variable) => variable.origin(), + Self::Ground(ground) => ground.origin(), + } + } + + fn set_origin(self, origin: Origin) -> Self + where + Self: Sized, + { + match self { + Self::Variable(variable) => Self::Variable(variable.set_origin(origin)), + Self::Ground(ground) => Self::Ground(ground.set_origin(origin)), + } + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + match self { + Primitive::Variable(variable) => variable.validate(), + Primitive::Ground(ground) => ground.validate(), + } + } +} + +// impl ASTConstructable for Primitive { +// type Node<'a> = Term<'a>; + +// fn from_ast_node<'a>( +// node: Self::Node<'a>, +// origin: ExternalReference, +// context: &ASTContext, +// ) -> Self { +// match node { +// Term::Primitive(primitive) => { +// Primitive::Ground(GroundTerm::from_ast_node(primitive, origin, context)) +// } +// Term::Blank(token) => { +// let value: AnyDataValue = todo!(); + +// Primitive::Ground(GroundTerm::create_parsed(value, origin)) +// } +// Term::UniversalVariable(_) | Term::ExistentialVariable(_) => { +// Primitive::Variable(Variable::from_ast_node(node, origin, context)) +// } +// _ => unreachable!("TODO"), +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/primitive/ground.rs b/nemo/src/rule_model/component/term/primitive/ground.rs new file mode 100644 index 000000000..2cc76e577 --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive/ground.rs @@ -0,0 +1,134 @@ +//! This module defines [GroundTerm]. + +use std::{fmt::Display, hash::Hash}; + +use nemo_physical::datavalues::AnyDataValue; + +use crate::rule_model::{ + component::ProgramComponent, error::ProgramConstructionError, origin::Origin, +}; + +/// Primitive ground term +#[derive(Debug, Clone, Eq)] +pub struct GroundTerm { + /// Origin of this component + origin: Origin, + /// Value of this term + value: AnyDataValue, +} + +impl GroundTerm { + /// Create a new [GroundTerm]. + pub fn new(value: AnyDataValue) -> Self { + Self { + origin: Origin::Created, + value, + } + } +} + +impl Display for GroundTerm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.value.fmt(f) + } +} + +impl PartialEq for GroundTerm { + fn eq(&self, other: &Self) -> bool { + self.value == other.value + } +} + +impl PartialOrd for GroundTerm { + fn partial_cmp(&self, other: &Self) -> Option { + self.value.partial_cmp(&other.value) + } +} + +impl Hash for GroundTerm { + fn hash(&self, state: &mut H) { + self.value.hash(state); + } +} + +impl ProgramComponent for GroundTerm { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + Ok(()) + } +} + +// impl ASTConstructable for GroundTerm { +// type Node<'a> = Primitive<'a>; + +// fn from_ast_node<'a>( +// node: Self::Node<'a>, +// origin: ExternalReference, +// context: &ASTContext, +// ) -> Self { +// match node { +// Primitive::Constant(token) => { +// Self::create_parsed(AnyDataValue::new_iri(token.to_string()), origin) +// } +// Primitive::PrefixedConstant { +// prefix, constant, .. +// } => { +// let prefixed_constant = prefix +// .map(|token| { +// context +// .prefixes +// .get(&token.to_string()) +// .cloned() +// .unwrap_or(token.to_string()) // TODO: We could also panic here +// }) +// .unwrap_or(String::from("")) +// + &constant.to_string(); + +// Self::create_parsed(AnyDataValue::new_iri(prefixed_constant), origin) +// } +// Primitive::Number { +// span, +// sign, +// before, +// dot, +// after, +// exponent, +// } => { +// // TODO: Create number values +// // Self::create_parsed(AnyDataValue:: span.to_string(), origin) +// todo!() +// } +// Primitive::String(string) => { +// Self::create_parsed(AnyDataValue::new_plain_string(string.to_string()), origin) +// } +// Primitive::Iri(iri) => { +// Self::create_parsed(AnyDataValue::new_iri(iri.to_string()), origin) +// } +// Primitive::RdfLiteral { string, iri, .. } => Self::create_parsed( +// AnyDataValue::new_other(string.to_string(), iri.to_string()), +// origin, +// ), +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/primitive/variable.rs b/nemo/src/rule_model/component/term/primitive/variable.rs new file mode 100644 index 000000000..d98ea3caf --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive/variable.rs @@ -0,0 +1,140 @@ +//! This module defines [Variable] + +use std::fmt::Display; + +use existential::ExistentialVariable; +use universal::UniversalVariable; + +use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; + +use super::ProgramComponent; + +pub mod existential; +pub mod universal; + +/// Name of a variable +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct VariableName(String); + +impl VariableName { + /// Create a new [VariableName]. + fn new(name: String) -> Self { + Self(name) + } + + /// Validate variable name. + pub fn is_valid(&self) -> bool { + !self.0.is_empty() + } +} + +impl Display for VariableName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +/// Variable that can be bound to a specific value +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum Variable { + /// Universal variable + Universal(UniversalVariable), + /// Existential variable + Existential(ExistentialVariable), +} + +impl Variable { + /// Create a new universal variable. + pub fn universal(name: &str) -> Self { + Self::Universal(UniversalVariable::new(name)) + } + + /// Create a new existential variable. + pub fn existential(name: &str) -> Self { + Self::Existential(ExistentialVariable::new(name)) + } + + /// Create a new anonymous variable. + pub fn anonymous() -> Self { + Self::Universal(UniversalVariable::new_anonymous()) + } + + /// Return the name of the variable or `None` if it is anonymous + pub fn name(&self) -> Option { + match self { + Variable::Universal(variable) => variable.name(), + Variable::Existential(variable) => Some(variable.name()), + } + } +} + +impl Display for Variable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Variable::Universal(variable) => variable.fmt(f), + Variable::Existential(variable) => variable.fmt(f), + } + } +} + +impl ProgramComponent for Variable { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + match self { + Variable::Universal(variable) => variable.origin(), + Variable::Existential(variable) => variable.origin(), + } + } + + fn set_origin(self, origin: Origin) -> Self + where + Self: Sized, + { + match self { + Variable::Universal(variable) => Self::Universal(variable.set_origin(origin)), + Variable::Existential(variable) => Self::Existential(variable.set_origin(origin)), + } + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + match &self { + Variable::Universal(universal) => { + universal.validate()?; + } + Variable::Existential(existential) => { + existential.validate()?; + } + } + + Ok(()) + } +} + +// impl ASTConstructable for Variable { +// type Node<'a> = Term<'a>; + +// fn from_ast_node<'a>( +// node: Self::Node<'a>, +// origin: ExternalReference, +// context: &ASTContext, +// ) -> Self { +// match node { +// Term::UniversalVariable(_) => { +// Variable::Universal(UniversalVariable::from_ast_node(node, origin, context)) +// } +// Term::ExistentialVariable(_) => { +// Variable::Existential(ExistentialVariable::from_ast_node(node, origin, context)) +// } +// _ => unreachable!("TODO"), +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/primitive/variable/existential.rs b/nemo/src/rule_model/component/term/primitive/variable/existential.rs new file mode 100644 index 000000000..f584aab57 --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive/variable/existential.rs @@ -0,0 +1,103 @@ +//! This module defines [ExistentialVariable]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{ + component::ProgramComponent, error::ProgramConstructionError, origin::Origin, +}; + +use super::VariableName; + +/// Variable that allows to assert the existence of an object +#[derive(Debug, Clone, Eq)] +pub struct ExistentialVariable { + /// Origin of this component + origin: Origin, + + /// Name of the variable + name: VariableName, +} + +impl ExistentialVariable { + /// Create a new [ExistentialVariable]. + pub fn new(name: &str) -> Self { + Self { + origin: Origin::Created, + name: VariableName::new(name.to_string()), + } + } + + /// Return the name of this variable. + pub fn name(&self) -> String { + self.name.to_string() + } +} + +impl Display for ExistentialVariable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!{}", self.name) + } +} + +impl PartialEq for ExistentialVariable { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} + +impl PartialOrd for ExistentialVariable { + fn partial_cmp(&self, other: &Self) -> Option { + self.name.partial_cmp(&other.name) + } +} + +impl Hash for ExistentialVariable { + fn hash(&self, state: &mut H) { + self.name.hash(state); + } +} + +impl ProgramComponent for ExistentialVariable { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +// impl ASTConstructable for ExistentialVariable { +// type Node<'a> = Term<'a>; + +// fn from_ast_node<'a>(node: Term<'a>, origin: ExternalReference, _context: &ASTContext) -> Self { +// if let Term::UniversalVariable(token) = node { +// let name = token.span.to_string(); + +// Self { +// origin: Origin::External(origin), +// name: VariableName::new(name), +// } +// } else { +// unreachable!("TODO") +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/primitive/variable/universal.rs b/nemo/src/rule_model/component/term/primitive/variable/universal.rs new file mode 100644 index 000000000..e2dae3680 --- /dev/null +++ b/nemo/src/rule_model/component/term/primitive/variable/universal.rs @@ -0,0 +1,123 @@ +//! This module defines [UniversalVariable]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{ + component::ProgramComponent, error::ProgramConstructionError, origin::Origin, +}; + +use super::VariableName; + +/// Variable that can be bound to a specific value +/// +/// Universal variables may not have a name, +/// in which case we call them anonymous. +#[derive(Debug, Clone, Eq)] +pub struct UniversalVariable { + /// Origin of this component + origin: Origin, + + /// Name of the variable + /// + /// This can be `None` in case this is an anonymous variable. + name: Option, +} + +impl UniversalVariable { + /// Create a new named [UniversalVariable] + pub fn new(name: &str) -> Self { + Self { + origin: Origin::Created, + name: Some(VariableName::new(name.to_string())), + } + } + + /// Create a new anonymous [UniversalVariable] + pub fn new_anonymous() -> Self { + Self { + origin: Origin::Created, + name: None, + } + } + + /// Return the name of this variable, + /// or `None` if the variable is unnamed. + pub fn name(&self) -> Option { + self.name.as_ref().map(|name| name.to_string()) + } + + /// Return `true` if this is an anonymous variable, + /// and `false` otherwise + pub fn is_anonymous(&self) -> bool { + self.name.is_none() + } +} + +impl Display for UniversalVariable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.name { + Some(name) => write!(f, "?{}", name), + None => write!(f, "_"), + } + } +} + +impl PartialEq for UniversalVariable { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} + +impl PartialOrd for UniversalVariable { + fn partial_cmp(&self, other: &Self) -> Option { + self.name.partial_cmp(&other.name) + } +} + +impl Hash for UniversalVariable { + fn hash(&self, state: &mut H) { + self.name.hash(state); + } +} + +impl ProgramComponent for UniversalVariable { + fn parse(_string: &str) -> Result { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} + +// impl ASTConstructable for UniversalVariable { +// type Node<'a> = Term<'a>; + +// fn from_ast_node<'a>(node: Term<'a>, origin: ExternalReference, _context: &ASTContext) -> Self { +// if let Term::UniversalVariable(token) = node { +// let name = token.span.to_string(); + +// Self { +// origin: Origin::External(origin), +// name: Some(VariableName::new(name)), +// } +// } else { +// unreachable!("TODO") +// } +// } +// } diff --git a/nemo/src/rule_model/component/term/tuple.rs b/nemo/src/rule_model/component/term/tuple.rs new file mode 100644 index 000000000..ff010f14c --- /dev/null +++ b/nemo/src/rule_model/component/term/tuple.rs @@ -0,0 +1,63 @@ +//! This module defines [Tuple]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::{component::ProgramComponent, origin::Origin}; + +use super::Term; + +/// An ordered list of terms +#[derive(Debug, Clone, Eq)] +pub struct Tuple { + /// Origin of this component + origin: Origin, + + /// Ordered list of terms contained in this tuple + terms: Vec, +} + +impl Display for Tuple { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl PartialEq for Tuple { + fn eq(&self, other: &Self) -> bool { + self.terms == other.terms + } +} + +impl Hash for Tuple { + fn hash(&self, state: &mut H) { + self.terms.hash(state); + } +} + +impl ProgramComponent for Tuple { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + todo!() + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/variable.rs b/nemo/src/rule_model/component/variable.rs deleted file mode 100644 index 02a5f202e..000000000 --- a/nemo/src/rule_model/component/variable.rs +++ /dev/null @@ -1,96 +0,0 @@ -use std::fmt::Display; - -use crate::{ - io::parser::ast::term::Term, - rule_model::{ - error::ProgramConstructionError, - origin::{ComponentOrigin, OriginParseReference}, - }, -}; - -use super::ProgramComponent; - -/// Name of a variable -#[derive(Debug, Clone)] -pub struct VariableName(String); - -impl VariableName { - fn new(name: String) -> Result { - // TODO: Validate name - if name.is_empty() { - return Err(ProgramConstructionError::InvalidVariableName(name)); - } - - Ok(Self::new_unvalidated(name)) - } - - fn new_unvalidated(name: String) -> Self { - Self(name) - } -} - -impl Display for VariableName { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - -#[derive(Debug)] -pub struct UniversalVariale { - origin: ComponentOrigin, - - name: Option, -} - -impl UniversalVariale { - fn from_term(term: Term) -> Self { - todo!() - } -} - -impl Display for UniversalVariale { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match &self.name { - Some(name) => write!(f, "?{}", name), - None => write!(f, "_"), - } - } -} - -impl ProgramComponent for UniversalVariale { - type Node<'a> = Term<'a>; - - fn from_ast_node<'a>(node: Term<'a>, origin: OriginParseReference) -> Self { - if let Term::UniversalVariable(token) = node { - let string = token.span.to_string(); - } - - todo!() - } - - fn parse(string: &str) -> Result { - todo!() - } - - fn origin(&self) -> &ComponentOrigin { - &self.origin - } -} - -#[derive(Debug)] -pub struct ExistentialVariable { - origin: ComponentOrigin, - - name: VariableName, -} - -#[derive(Debug)] -pub enum Variable { - Universal(UniversalVariale), - Existential(ExistentialVariable), -} - -mod test { - #[test] - fn create_variable() {} -} diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 8f9331064..236195a5e 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -1,9 +1,24 @@ +//! This module defines [ProgramConstructionError] + use thiserror::Error; +use super::component::{ + atom::Atom, + fact::Fact, + term::{primitive::variable::Variable, Term}, +}; + +/// Error returned during the construction of objects from nemo's logical rule model #[derive(Error, Debug)] pub enum ProgramConstructionError { - #[error("invalid variable name: {0}")] + #[error("variable \"{0}\" has an invalid name")] InvalidVariableName(String), + #[error("term \"{0}\" has an invalid name")] + InvalidIdentifier(String), + #[error("atom \"{0}\" has an invalid name")] + InvalidAtomName(String), + #[error("fact {0} contains ")] + NonGroundFact(Fact), #[error("parse error")] // TODO: Return parser error here ParseError, } diff --git a/nemo/src/rule_model/origin.rs b/nemo/src/rule_model/origin.rs index e2660002b..994ae7d52 100644 --- a/nemo/src/rule_model/origin.rs +++ b/nemo/src/rule_model/origin.rs @@ -1,10 +1,18 @@ //! This module defines -pub(crate) type OriginParseReference = usize; +pub(crate) type ExternalReference = usize; -#[derive(Debug)] -pub enum ComponentOrigin { +/// Origin of a program component +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Origin { + /// Component was created via a constructor Created, - Parsed(OriginParseReference), - Something(Box), + /// Component was created due translation from an external input, e.g., parsing + External(ExternalReference), +} + +impl Default for Origin { + fn default() -> Self { + Self::Created + } } diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs new file mode 100644 index 000000000..8f884614c --- /dev/null +++ b/nemo/src/rule_model/program.rs @@ -0,0 +1,26 @@ +//! This module defines [Program]. + +use super::component::{ + fact::Fact, + import_export::{ExportDirective, ImportDirective}, + rule::Rule, +}; + +/// Representation of a nemo program +#[derive(Debug)] +pub struct Program { + /// Imported resources + imports: Vec, + /// Exported resources + exports: Vec, + /// Rules + rules: Vec, + /// Facts + facts: Vec, +} + +impl Program { + pub fn from_ast() -> Self { + todo!() + } +} From 453b480dbb68d02b75f6c0eac9bf4bea9e605d75 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 3 Jul 2024 09:50:38 +0200 Subject: [PATCH 112/214] Translate AST model into logical model --- nemo/src/io/parser/ast.rs | 1 + nemo/src/io/parser/ast/term.rs | 2 +- nemo/src/rule_model/component.rs | 2 + nemo/src/rule_model/component/base.rs | 30 ++ nemo/src/rule_model/component/output.rs | 25 ++ nemo/src/rule_model/component/rule.rs | 12 + nemo/src/rule_model/component/term.rs | 33 ++- .../rule_model/component/term/operation.rs | 81 ++++++ nemo/src/rule_model/component/term/tuple.rs | 16 ++ nemo/src/rule_model/program.rs | 269 +++++++++++++++++- 10 files changed, 449 insertions(+), 22 deletions(-) create mode 100644 nemo/src/rule_model/component/base.rs create mode 100644 nemo/src/rule_model/component/output.rs diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 931813397..17fc7c492 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -167,6 +167,7 @@ impl List<'_, T> { vec } } + impl IntoIterator for List<'_, T> { type Item = T; diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 07feb03f7..d57044ae6 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -199,7 +199,7 @@ impl std::fmt::Display for Term<'_> { } #[derive(Debug, Clone, PartialEq)] -pub(crate) enum Primitive<'a> { +pub enum Primitive<'a> { Constant(Span<'a>), PrefixedConstant { span: Span<'a>, diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/component.rs index 5a925fb77..b218f9097 100644 --- a/nemo/src/rule_model/component.rs +++ b/nemo/src/rule_model/component.rs @@ -1,9 +1,11 @@ //! This module defines the logical components that make up a program. pub mod atom; +pub mod base; pub mod fact; pub mod import_export; pub mod literal; +pub mod output; pub mod rule; pub mod term; diff --git a/nemo/src/rule_model/component/base.rs b/nemo/src/rule_model/component/base.rs new file mode 100644 index 000000000..a2c597ac9 --- /dev/null +++ b/nemo/src/rule_model/component/base.rs @@ -0,0 +1,30 @@ +//! This module defines [Base] + +use std::fmt::Display; + +use crate::rule_model::origin::Origin; + +/// TODO +#[derive(Debug, Clone)] +pub struct Base { + /// Origin of this component + origin: Origin, + + base: String, +} + +impl Base { + /// Create a new [Base] + pub fn new(base: String) -> Self { + Self { + origin: Origin::default(), + base, + } + } +} + +impl Display for Base { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} diff --git a/nemo/src/rule_model/component/output.rs b/nemo/src/rule_model/component/output.rs new file mode 100644 index 000000000..44389cb08 --- /dev/null +++ b/nemo/src/rule_model/component/output.rs @@ -0,0 +1,25 @@ +//! This module defines [Output] + +use crate::rule_model::origin::Origin; + +use super::term::Identifier; + +/// TODO +#[derive(Debug, Clone)] +pub struct Output { + /// Origin of this component + origin: Origin, + + /// + predicate: Identifier, +} + +impl Output { + /// Create a mew [Output] + pub fn new(predicate: Identifier) -> Self { + Self { + origin: Origin::default(), + predicate, + } + } +} diff --git a/nemo/src/rule_model/component/rule.rs b/nemo/src/rule_model/component/rule.rs index e06f3fef4..d5a3e934b 100644 --- a/nemo/src/rule_model/component/rule.rs +++ b/nemo/src/rule_model/component/rule.rs @@ -147,6 +147,12 @@ impl RuleBodyBuilder { self } + /// Add a literal to the body of the rule. + pub fn add_literal(mut self, literal: Literal) -> Self { + self.literals.push(literal); + self + } + /// Finish building and return a list of [Literal]s. pub fn finalize(self) -> Vec { self.literals @@ -178,6 +184,12 @@ impl RuleBodySubBuilder { self } + /// Add a literal to the body of the rule. + pub fn add_literal(mut self, literal: Literal) -> Self { + self.builder.body = self.builder.body.add_literal(literal); + self + } + /// Return to the [RuleBuilder] pub fn done(self) -> RuleBuilder { self.builder diff --git a/nemo/src/rule_model/component/term.rs b/nemo/src/rule_model/component/term.rs index 2616ff44a..3d202b85c 100644 --- a/nemo/src/rule_model/component/term.rs +++ b/nemo/src/rule_model/component/term.rs @@ -1,5 +1,12 @@ //! This module defines [Term]. +pub mod aggregate; +pub mod function; +pub mod map; +pub mod operation; +pub mod primitive; +pub mod tuple; + use std::fmt::{Debug, Display}; use function::FunctionTerm; @@ -7,18 +14,12 @@ use map::Map; use nemo_physical::datavalues::AnyDataValue; use operation::Operation; use primitive::{ground::GroundTerm, variable::Variable, Primitive}; +use tuple::Tuple; use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; use super::ProgramComponent; -pub mod aggregate; -pub mod function; -pub mod map; -pub mod operation; -pub mod primitive; -pub mod tuple; - /// Name of a term #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Identifier(String); @@ -46,6 +47,8 @@ pub enum Term { Map(Map), /// Operation applied to a list of terms Operation(Operation), + /// Tuple + Tuple(Tuple), } impl Term { @@ -64,6 +67,11 @@ impl Term { Self::Primitive(Primitive::Variable(Variable::existential(name))) } + /// Create a groud term. + pub fn ground(value: AnyDataValue) -> Self { + Self::Primitive(Primitive::Ground(GroundTerm::new(value))) + } + /// Create an integer term pub fn integer(number: i64) -> Self { Self::Primitive(Primitive::Ground(GroundTerm::new( @@ -73,13 +81,8 @@ impl Term { } impl Display for Term { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Term::Primitive(primitive) => write!(f, "{}", primitive), - Term::FunctionTerm(function) => write!(f, "{}", function), - Term::Map(map) => write!(f, "{}", map), - Term::Operation(operation) => write!(f, "{}", operation), - } + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() } } @@ -97,6 +100,7 @@ impl ProgramComponent for Term { Term::FunctionTerm(function) => function.origin(), Term::Map(map) => map.origin(), Term::Operation(operation) => operation.origin(), + Term::Tuple(tuple) => tuple.origin(), } } @@ -109,6 +113,7 @@ impl ProgramComponent for Term { Term::FunctionTerm(function) => Term::FunctionTerm(function.set_origin(origin)), Term::Map(map) => Term::Map(map.set_origin(origin)), Term::Operation(operation) => Term::Operation(operation.set_origin(origin)), + Term::Tuple(tuple) => Term::Tuple(tuple.set_origin(origin)), } } diff --git a/nemo/src/rule_model/component/term/operation.rs b/nemo/src/rule_model/component/term/operation.rs index f847d8896..b447ba431 100644 --- a/nemo/src/rule_model/component/term/operation.rs +++ b/nemo/src/rule_model/component/term/operation.rs @@ -125,6 +125,71 @@ pub enum OperationKind { StringConcatenation, } +impl OperationKind { + /// Return the [OperationKind] corresponding to the given operation name or `None` if there is no such operation. + pub fn from_name(name: &str) -> Option { + Some(match name.to_uppercase().as_str() { + "+" => Self::NumericSum, + "-" => Self::NumericSubtraction, + "/" => Self::NumericDivision, + "*" => Self::NumericProduct, + "<" => Self::NumericLessthan, + ">" => Self::NumericGreaterthan, + "<=" => Self::NumericLessthaneq, + ">=" => Self::NumericGreaterthaneq, + "isInteger" => Self::CheckIsInteger, + "isFloat" => Self::CheckIsFloat, + "isDouble" => Self::CheckIsDouble, + "isIri" => Self::CheckIsIri, + "isNumeric" => Self::CheckIsNumeric, + "isNull" => Self::CheckIsNull, + "isString" => Self::CheckIsString, + "ABS" => Self::NumericAbsolute, + "SQRT" => Self::NumericSquareroot, + "NOT" => Self::BooleanNegation, + "fullStr" => Self::CanonicalString, + "STR" => Self::LexicalValue, + "SIN" => Self::NumericSine, + "COS" => Self::NumericCosine, + "TAN" => Self::NumericTangent, + "STRLEN" => Self::StringLength, + "STRREV" => Self::StringReverse, + "UCASE" => Self::StringLowercase, + "LCASE" => Self::StringUppercase, + "ROUND" => Self::NumericRound, + "CEIL" => Self::NumericCeil, + "FLOOR" => Self::NumericFloor, + "DATATYPE" => Self::Datatype, + "LANG" => Self::LanguageTag, + "INT" => Self::CastToInteger, + "DOUBLE" => Self::CastToDouble, + "FLOAT" => Self::CastToFloat, + "LOG" => Self::NumericLogarithm, + "POW" => Self::NumericPower, + "COMPARE" => Self::StringCompare, + "CONTAINS" => Self::StringContains, + "SUBSTR" => Self::StringSubstring, + "STRSTARTS" => Self::StringStarts, + "STRENDS" => Self::StringEnds, + "STRBEFORE" => Self::StringBefore, + "STRAFTER" => Self::StringAfter, + "REM" => Self::NumericRemainder, + "BITAND" => Self::BitAnd, + "BITOR" => Self::BitOr, + "BITXOR" => Self::BitXor, + "MAX" => Self::NumericMaximum, + "MIN" => Self::NumericMinimum, + "LUKA" => Self::NumericLukasiewicz, + "SUM" => Self::NumericSum, + "PROD" => Self::NumericProduct, + "AND" => Self::BooleanConjunction, + "OR" => Self::BooleanDisjunction, + "CONCAT" => Self::StringConcatenation, + _ => return None, + }) + } +} + /// Operation that can be applied to terms #[derive(Debug, Clone, Eq)] pub struct Operation { @@ -137,6 +202,22 @@ pub struct Operation { subterms: Vec, } +impl Operation { + /// Create a new [Operation] + pub fn new(kind: OperationKind, subterms: Vec) -> Self { + Self { + origin: Origin::default(), + kind, + subterms, + } + } + + /// Create a new [Operation] giving the string name of the operation. + pub fn new_from_name(operation: &str, subterms: Vec) -> Option { + Some(Self::new(OperationKind::from_name(operation)?, subterms)) + } +} + impl Display for Operation { fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() diff --git a/nemo/src/rule_model/component/term/tuple.rs b/nemo/src/rule_model/component/term/tuple.rs index ff010f14c..92a6eeca1 100644 --- a/nemo/src/rule_model/component/term/tuple.rs +++ b/nemo/src/rule_model/component/term/tuple.rs @@ -16,6 +16,16 @@ pub struct Tuple { terms: Vec, } +impl Tuple { + /// Create a new [Tuple]. + pub fn new(terms: Vec) -> Self { + Self { + origin: Origin::default(), + terms, + } + } +} + impl Display for Tuple { fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { todo!() @@ -34,6 +44,12 @@ impl Hash for Tuple { } } +impl PartialOrd for Tuple { + fn partial_cmp(&self, other: &Self) -> Option { + self.terms.partial_cmp(&other.terms) + } +} + impl ProgramComponent for Tuple { fn parse(_string: &str) -> Result where diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 8f884614c..4f6ea9e17 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -1,13 +1,30 @@ //! This module defines [Program]. -use super::component::{ - fact::Fact, - import_export::{ExportDirective, ImportDirective}, - rule::Rule, +use nemo_physical::datavalues::AnyDataValue; + +use crate::{io::parser::ast, rule_model::component::term::tuple::Tuple}; + +use super::{ + component::{ + atom::Atom, + base::Base, + fact::Fact, + import_export::{ExportDirective, ImportDirective}, + literal::Literal, + output::Output, + rule::{Rule, RuleBuilder}, + term::{ + function::FunctionTerm, + operation::{Operation, OperationKind}, + Term, + }, + ProgramComponent, + }, + origin::Origin, }; /// Representation of a nemo program -#[derive(Debug)] +#[derive(Debug, Default)] pub struct Program { /// Imported resources imports: Vec, @@ -17,10 +34,248 @@ pub struct Program { rules: Vec, /// Facts facts: Vec, + /// Base + base: Option, + /// Outputs + outputs: Vec, } impl Program { - pub fn from_ast() -> Self { - todo!() + /// Build a [Program] from an [ast::program::Program]. + pub fn from_ast(ast_program: ast::program::Program) -> Self { + let mut program = Program::default(); + + for (statement_index, statement) in ast_program.statements.iter().enumerate() { + match statement { + ast::statement::Statement::Directive(directive) => { + program.ast_build_directive(directive); + } + ast::statement::Statement::Fact { + span, + doc_comment, + atom, + dot, + } => todo!(), + ast::statement::Statement::Rule { head, body, .. } => { + program.ast_build_rule(head, body); + } + ast::statement::Statement::Comment(_) => todo!(), + ast::statement::Statement::Error(_) => todo!(), + } + } + + program + } + + fn ast_build_rule( + &mut self, + head: &ast::List, + body: &ast::List, + ) { + let origin = Origin::External(self.rules.len()); + + let rule_builder = RuleBuilder::default().origin(origin); + let mut head_builder = rule_builder.head(); + + // TODO: Implement a normal iterator to avoid cloning + for (head_index, head_atom) in head.clone().into_iter().enumerate() { + let origin = Origin::External(head_index); + if let Literal::Positive(atom) = Self::ast_build_literal(origin, &head_atom) { + head_builder = head_builder.add_atom(atom); + } else { + unreachable!("head must only contain positive atoms") + } + } + + let mut body_builder = head_builder.done().body(); + + // TODO: Implement a normal iterator to avoid cloning + for (body_index, body_atom) in head.clone().into_iter().enumerate() { + let origin = Origin::External(body_index); + body_builder = body_builder.add_literal(Self::ast_build_literal(origin, &body_atom)); + } + + self.rules.push(body_builder.done().finalize()); + } + + fn ast_build_literal(origin: Origin, atom: &ast::atom::Atom) -> Literal { + match atom { + ast::atom::Atom::Positive(positive_atom) => { + Literal::Positive(Self::ast_build_atom(origin, positive_atom)) + } + ast::atom::Atom::Negative { + atom: negative_atom, + .. + } => Literal::Negative(Self::ast_build_atom(origin, negative_atom)), + ast::atom::Atom::InfixAtom { + lhs, + operation, + rhs, + .. + } => { + let left = Self::ast_build_inner_term(Origin::External(0), lhs); + let right = Self::ast_build_inner_term(Origin::External(1), rhs); + + Literal::Operation( + Operation::new_from_name(&operation.to_string(), vec![left, right]) + .expect("unkown infix operation"), + ) + } + ast::atom::Atom::Map(_) => { + // Return unsupported error + todo!() + } + } + } + + fn ast_build_atom(origin: Origin, atom: &ast::tuple::Tuple) -> Atom { + let predicate_name = atom + .identifier + .expect("Atom must have a predicate name") + .to_string(); + let subterms = match &atom.terms { + Some(terms) => terms.to_vec(), + None => vec![], + }; + + let mut translated_subterms = Vec::new(); + + for (term_index, subterm) in subterms.into_iter().enumerate() { + let origin = Origin::External(term_index); + translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); + } + + Atom::new(&predicate_name, translated_subterms).set_origin(origin) + } + + fn ast_build_inner_term(origin: Origin, term: &ast::term::Term) -> Term { + match term { + ast::term::Term::Primitive(primitive) => Self::ast_build_primitive(origin, primitive), + ast::term::Term::UniversalVariable(name) => Term::universal_variable(&name.to_string()), + ast::term::Term::ExistentialVariable(name) => { + Term::existential_variable(&name.to_string()) + } + ast::term::Term::UnaryPrefix { + operation, term, .. + } => { + // TODO: Currently no associated function with this + todo!() + } + ast::term::Term::Binary { + lhs, + operation, + rhs, + .. + } => { + let left = Self::ast_build_inner_term(Origin::External(0), lhs); + let right = Self::ast_build_inner_term(Origin::External(1), rhs); + + Term::Operation( + Operation::new_from_name(&operation.to_string(), vec![left, right]) + .expect("unrecognized binary operation"), + ) + } + ast::term::Term::Aggregation { + operation, terms, .. + } => { + todo!() + } + ast::term::Term::Tuple(tuple) => Self::ast_build_inner_tuple(origin, tuple), + ast::term::Term::Map(_) => todo!(), + ast::term::Term::Blank(_) => todo!(), + } + .set_origin(origin) + } + + fn ast_build_primitive(origin: Origin, primitive: &ast::term::Primitive) -> Term { + match primitive { + ast::term::Primitive::Constant(value) => { + Term::ground(AnyDataValue::new_iri(value.to_string())) + } + ast::term::Primitive::PrefixedConstant { + span, + prefix, + colon, + constant, + } => todo!(), + ast::term::Primitive::Number { + span, + sign, + before, + dot, + after, + exponent, + } => todo!(), + ast::term::Primitive::String(string) => { + Term::ground(AnyDataValue::new_plain_string(string.to_string())) + } + ast::term::Primitive::Iri(iri) => Term::ground(AnyDataValue::new_iri(iri.to_string())), + ast::term::Primitive::RdfLiteral { string, iri, .. } => { + Term::ground(AnyDataValue::new_other(string.to_string(), iri.to_string())) + } + } + .set_origin(origin) + } + + fn ast_build_inner_tuple(origin: Origin, tuple: &ast::tuple::Tuple) -> Term { + let subterms = match &tuple.terms { + Some(terms) => terms.to_vec(), + None => vec![], + }; + + let mut translated_subterms = Vec::new(); + + for (term_index, subterm) in subterms.into_iter().enumerate() { + let origin = Origin::External(term_index); + translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); + } + + match tuple.identifier { + Some(name) => match OperationKind::from_name(&name.to_string()) { + Some(kind) => Term::Operation(Operation::new(kind, translated_subterms)), + None => { + Term::FunctionTerm(FunctionTerm::new(&name.to_string(), translated_subterms)) + } + }, + None => Term::Tuple(Tuple::new(translated_subterms)), + } + } + + fn ast_build_directive(&mut self, directive: &ast::directive::Directive) { + match directive { + ast::directive::Directive::Base { base_iri, .. } => { + self.base = Some(Base::new(base_iri.to_string())); + // TODO: Set origin + } + ast::directive::Directive::Prefix { + span, + doc_comment, + prefix, + prefix_iri, + dot, + } => todo!(), + ast::directive::Directive::Import { + span, + doc_comment, + predicate, + arrow, + map, + dot, + } => todo!(), + ast::directive::Directive::Export { + span, + doc_comment, + predicate, + arrow, + map, + dot, + } => todo!(), + ast::directive::Directive::Output { + span, + doc_comment, + predicates, + dot, + } => todo!(), + } } } From e5fc4c5ac110d9d8a7ae8c8e688e3427d89ac41a Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 9 Jul 2024 16:31:47 +0200 Subject: [PATCH 113/214] Clean up logical model --- nemo/src/rule_model.rs | 4 + nemo/src/rule_model/component.rs | 33 +++ nemo/src/rule_model/component/atom.rs | 86 ++++++-- nemo/src/rule_model/component/base.rs | 51 ++++- nemo/src/rule_model/component/fact.rs | 34 ++- .../src/rule_model/component/import_export.rs | 35 ++- nemo/src/rule_model/component/literal.rs | 14 +- nemo/src/rule_model/component/output.rs | 62 +++++- nemo/src/rule_model/component/rule.rs | 201 +++++++++--------- nemo/src/rule_model/component/term.rs | 188 ++++++++++++---- .../rule_model/component/term/aggregate.rs | 91 +++++++- .../src/rule_model/component/term/function.rs | 84 ++++++-- nemo/src/rule_model/component/term/map.rs | 79 ++++++- .../rule_model/component/term/operation.rs | 192 ++++++++++++++++- .../rule_model/component/term/primitive.rs | 116 +++++++--- .../component/term/primitive/ground.rs | 101 ++++----- .../component/term/primitive/variable.rs | 37 ++-- .../term/primitive/variable/existential.rs | 21 +- .../term/primitive/variable/universal.rs | 21 +- nemo/src/rule_model/component/term/tuple.rs | 75 ++++++- nemo/src/rule_model/program.rs | 13 +- nemo/src/rule_model/syntax.rs | 63 ++++++ nemo/src/rule_model/util.rs | 32 +++ 23 files changed, 1269 insertions(+), 364 deletions(-) create mode 100644 nemo/src/rule_model/syntax.rs create mode 100644 nemo/src/rule_model/util.rs diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs index f3422305c..37704a45f 100644 --- a/nemo/src/rule_model.rs +++ b/nemo/src/rule_model.rs @@ -1,6 +1,10 @@ //! This module defines the representation of nemo programs +#[macro_use] +pub mod util; + pub(crate) mod origin; +pub(crate) mod syntax; pub mod component; pub mod error; diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/component.rs index b218f9097..0e151ba7e 100644 --- a/nemo/src/rule_model/component.rs +++ b/nemo/src/rule_model/component.rs @@ -11,8 +11,32 @@ pub mod term; use std::fmt::{Debug, Display}; +use term::primitive::variable::Variable; + use super::{error::ProgramConstructionError, origin::Origin}; +/// Name of a term +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Tag(String); + +impl Tag { + /// Create a new [Tag]. + pub fn new(name: String) -> Self { + Self(name) + } + + /// Validate term name. + pub fn is_valid(&self) -> bool { + !self.0.is_empty() + } +} + +impl Display for Tag { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + /// Trait implemented by objects that are part of the logical rule model of the nemo language. pub trait ProgramComponent: Debug + Display + Clone + PartialEq + Eq { /// Construct this object from a string. @@ -33,3 +57,12 @@ pub trait ProgramComponent: Debug + Display + Clone + PartialEq + Eq { where Self: Sized; } + +/// Trait implemented by program components that allow iterating over [Variable]s +pub trait IteratableVariables { + /// Return an iterator over all [Variable]s contained within this program component. + fn variables<'a>(&'a self) -> Box + 'a>; + + /// Return a mutable iterator over all [Variable]s contained within this program component. + fn variables_mut<'a>(&'a mut self) -> Box + 'a>; +} diff --git a/nemo/src/rule_model/component/atom.rs b/nemo/src/rule_model/component/atom.rs index 7943ed9c7..5bfc30246 100644 --- a/nemo/src/rule_model/component/atom.rs +++ b/nemo/src/rule_model/component/atom.rs @@ -5,29 +5,48 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; use super::{ - term::{Identifier, Term}, - ProgramComponent, + term::{primitive::variable::Variable, Term}, + IteratableVariables, ProgramComponent, Tag, }; -/// An atom +/// Atom +/// +/// Tagged list of [Term]s. +/// It forms the core component of rules, +/// representing a logical proposition that can be true or false. #[derive(Debug, Clone, Eq)] pub struct Atom { /// Origin of this component. origin: Origin, /// Predicate name associated with this atom - name: Identifier, + predicate: Tag, /// Subterms of the function terms: Vec, } +/// Construct an [Atom]. +#[macro_export] +macro_rules! atom { + // Base case: no elements + ($name:tt) => { + crate::rule_model::component::atom::Atom::new($name, Vec::new()) + }; + // Recursive case: handle each term, separated by commas + ($name:tt; $($tt:tt)*) => {{ + let mut terms = Vec::new(); + term_list!(terms; $($tt)*); + crate::rule_model::component::atom::Atom::new($name, terms) + }}; +} + impl Atom { /// Create a new [Atom]. - pub fn new(name: &str, subterms: Vec) -> Self { + pub fn new>(predicate: &str, subterms: Terms) -> Self { Self { origin: Origin::Created, - name: Identifier::new(name.to_string()), - terms: subterms, + predicate: Tag::new(predicate.to_string()), + terms: subterms.into_iter().collect(), } } @@ -43,20 +62,32 @@ impl Atom { } impl Display for Atom { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}(", self.predicate))?; + + for (term_index, term) in self.terms.iter().enumerate() { + term.fmt(f)?; + + if term_index < self.terms.len() - 1 { + f.write_str(", ")?; + } + } + + f.write_str(")") } } impl PartialEq for Atom { fn eq(&self, other: &Self) -> bool { - self.origin == other.origin && self.name == other.name && self.terms == other.terms + self.origin == other.origin + && self.predicate == other.predicate + && self.terms == other.terms } } impl Hash for Atom { fn hash(&self, state: &mut H) { - self.name.hash(state); + self.predicate.hash(state); self.terms.hash(state); } } @@ -85,7 +116,7 @@ impl ProgramComponent for Atom { where Self: Sized, { - if !self.name.is_valid() { + if !self.predicate.is_valid() { todo!() } @@ -96,3 +127,34 @@ impl ProgramComponent for Atom { Ok(()) } } + +impl IteratableVariables for Atom { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms.iter().flat_map(|term| term.variables())) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new(self.terms.iter_mut().flat_map(|term| term.variables_mut())) + } +} + +#[cfg(test)] +mod test { + use crate::rule_model::component::{term::primitive::variable::Variable, IteratableVariables}; + + #[test] + fn atom_basic() { + let variable = Variable::universal("u"); + let function = atom!("p"; 12, variable, !e, "abc", ?v); + + let variables = function.variables().cloned().collect::>(); + assert_eq!( + variables, + vec![ + Variable::universal("u"), + Variable::existential("e"), + Variable::universal("v") + ] + ); + } +} diff --git a/nemo/src/rule_model/component/base.rs b/nemo/src/rule_model/component/base.rs index a2c597ac9..ead9f7ac4 100644 --- a/nemo/src/rule_model/component/base.rs +++ b/nemo/src/rule_model/component/base.rs @@ -1,15 +1,18 @@ //! This module defines [Base] -use std::fmt::Display; +use std::{fmt::Display, hash::Hash}; use crate::rule_model::origin::Origin; -/// TODO -#[derive(Debug, Clone)] +use super::ProgramComponent; + +/// Global prefix +#[derive(Debug, Clone, Eq)] pub struct Base { /// Origin of this component origin: Origin, + /// Prefix base: String, } @@ -24,7 +27,47 @@ impl Base { } impl Display for Base { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "@base {} .", self.base) + } +} + +impl PartialEq for Base { + fn eq(&self, other: &Self) -> bool { + self.base == other.base + } +} + +impl Hash for Base { + fn hash(&self, state: &mut H) { + self.base.hash(state); + } +} + +impl ProgramComponent for Base { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { todo!() } } diff --git a/nemo/src/rule_model/component/fact.rs b/nemo/src/rule_model/component/fact.rs index 69afd6fdb..c01546c6a 100644 --- a/nemo/src/rule_model/component/fact.rs +++ b/nemo/src/rule_model/component/fact.rs @@ -4,7 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::origin::Origin; -use super::{term::Term, ProgramComponent}; +use super::{term::Term, ProgramComponent, Tag}; /// A (ground) fact #[derive(Debug, Clone, Eq)] @@ -12,10 +12,23 @@ pub struct Fact { /// Origin of this component origin: Origin, + /// Predicate of the fact + predicate: Tag, + + /// List of [Term]s terms: Vec, } impl Fact { + /// Create a new [Atom]. + pub fn new>(predicate: &str, subterms: Terms) -> Self { + Self { + origin: Origin::Created, + predicate: Tag::new(predicate.to_string()), + terms: subterms.into_iter().collect(), + } + } + /// Return an iterator over the subterms of this fact. pub fn subterms(&self) -> impl Iterator { self.terms.iter() @@ -28,19 +41,30 @@ impl Fact { } impl Display for Fact { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}(", self.predicate))?; + + for (term_index, term) in self.terms.iter().enumerate() { + term.fmt(f)?; + + if term_index < self.terms.len() - 1 { + f.write_str(", ")?; + } + } + + f.write_str(")") } } impl PartialEq for Fact { fn eq(&self, other: &Self) -> bool { - self.terms == other.terms + self.predicate == other.predicate && self.terms == other.terms } } impl Hash for Fact { fn hash(&self, state: &mut H) { + self.predicate.hash(state); self.terms.hash(state); } } @@ -54,7 +78,7 @@ impl ProgramComponent for Fact { } fn origin(&self) -> &Origin { - todo!() + &self.origin } fn set_origin(mut self, origin: Origin) -> Self diff --git a/nemo/src/rule_model/component/import_export.rs b/nemo/src/rule_model/component/import_export.rs index 37f09f3d8..432e43d52 100644 --- a/nemo/src/rule_model/component/import_export.rs +++ b/nemo/src/rule_model/component/import_export.rs @@ -7,7 +7,7 @@ use nemo_physical::datavalues::MapDataValue; use crate::rule_model::origin::Origin; -use super::{term::Identifier, ProgramComponent}; +use super::{ProgramComponent, Tag}; /// The different supported variants of the RDF format. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] @@ -77,13 +77,26 @@ pub(crate) struct ImportExportDirective { origin: Origin, /// The predicate we're handling. - predicate: Identifier, + predicate: Tag, /// The file format and resource we're using. format: FileFormat, /// The attributes we've been given. attributes: MapDataValue, } +impl ImportExportDirective { + /// Helper function for the display implementations of + /// [ImportDirective] and [ExportDirective] + /// to format the content of this object. + fn display_content(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{} :- {} {} .", + self.predicate, self.format, self.attributes + ) + } +} + impl PartialEq for ImportExportDirective { fn eq(&self, other: &Self) -> bool { self.predicate == other.predicate @@ -106,7 +119,7 @@ pub struct ImportDirective(pub(crate) ImportExportDirective); impl ImportDirective { /// Create a new [ImportDirective]. - pub fn new(predicate: Identifier, format: FileFormat, attributes: MapDataValue) -> Self { + pub fn new(predicate: Tag, format: FileFormat, attributes: MapDataValue) -> Self { Self(ImportExportDirective { origin: Origin::default(), predicate, @@ -116,7 +129,7 @@ impl ImportDirective { } /// Return the predicate. - pub fn predicate(&self) -> &Identifier { + pub fn predicate(&self) -> &Tag { &self.0.predicate } @@ -138,8 +151,9 @@ impl From for ImportDirective { } impl Display for ImportDirective { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("@import ")?; + self.0.display_content(f) } } @@ -177,7 +191,7 @@ pub struct ExportDirective(pub(crate) ImportExportDirective); impl ExportDirective { /// Create a new [ExportDirective]. - pub fn new(predicate: Identifier, format: FileFormat, attributes: MapDataValue) -> Self { + pub fn new(predicate: Tag, format: FileFormat, attributes: MapDataValue) -> Self { Self(ImportExportDirective { origin: Origin::default(), predicate, @@ -187,7 +201,7 @@ impl ExportDirective { } /// Return the predicate. - pub fn predicate(&self) -> &Identifier { + pub fn predicate(&self) -> &Tag { &self.0.predicate } @@ -209,8 +223,9 @@ impl From for ExportDirective { } impl Display for ExportDirective { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("@export ")?; + self.0.display_content(f) } } diff --git a/nemo/src/rule_model/component/literal.rs b/nemo/src/rule_model/component/literal.rs index 3514b65b1..dd94e3983 100644 --- a/nemo/src/rule_model/component/literal.rs +++ b/nemo/src/rule_model/component/literal.rs @@ -6,7 +6,11 @@ use crate::rule_model::error::ProgramConstructionError; use super::{atom::Atom, term::operation::Operation, ProgramComponent}; -/// A literal that can either be a positive or negative atom or an operation +/// Literal +/// +/// An [Atom], its negation, or an [Operation]. +/// Literals are used to represent conditions that must be satisfied +/// for a rule to be applicable. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Literal { /// Positive atom @@ -18,8 +22,12 @@ pub enum Literal { } impl Display for Literal { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Literal::Positive(positive) => write!(f, "{}", positive), + Literal::Negative(negative) => write!(f, "~{}", negative), + Literal::Operation(operation) => write!(f, "{}", operation), + } } } diff --git a/nemo/src/rule_model/component/output.rs b/nemo/src/rule_model/component/output.rs index 44389cb08..dbb5665e5 100644 --- a/nemo/src/rule_model/component/output.rs +++ b/nemo/src/rule_model/component/output.rs @@ -1,25 +1,75 @@ //! This module defines [Output] +use std::{fmt::Display, hash::Hash}; + use crate::rule_model::origin::Origin; -use super::term::Identifier; +use super::{ProgramComponent, Tag}; -/// TODO -#[derive(Debug, Clone)] +/// Output directive +/// +/// Marks a predicate as an output predicate. +#[derive(Debug, Clone, Eq)] pub struct Output { /// Origin of this component origin: Origin, - /// - predicate: Identifier, + /// Output predicate + predicate: Tag, } impl Output { /// Create a mew [Output] - pub fn new(predicate: Identifier) -> Self { + pub fn new(predicate: Tag) -> Self { Self { origin: Origin::default(), predicate, } } } + +impl Display for Output { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "@output {} .", self.predicate) + } +} + +impl PartialEq for Output { + fn eq(&self, other: &Self) -> bool { + self.predicate == other.predicate + } +} + +impl Hash for Output { + fn hash(&self, state: &mut H) { + self.predicate.hash(state); + } +} + +impl ProgramComponent for Output { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + where + Self: Sized, + { + todo!() + } +} diff --git a/nemo/src/rule_model/component/rule.rs b/nemo/src/rule_model/component/rule.rs index d5a3e934b..96ea1050d 100644 --- a/nemo/src/rule_model/component/rule.rs +++ b/nemo/src/rule_model/component/rule.rs @@ -6,7 +6,11 @@ use crate::rule_model::origin::Origin; use super::{atom::Atom, literal::Literal, term::operation::Operation, ProgramComponent}; -/// A rule +/// Rule +/// +/// A logical statement that defines a relationship between a head (conjunction of [Atom]s) +/// and a body (conjunction of [Literal]s). +/// It specifies how new facts can be inferred from existing ones. #[derive(Debug, Clone, Eq)] pub struct Rule { /// Origin of this component @@ -22,6 +26,11 @@ pub struct Rule { } impl Rule { + /// Return a [RuleBuilder]. + pub fn builder() -> RuleBuilder { + RuleBuilder::default() + } + /// Create a new [Rule]. pub fn new(head: Vec, body: Vec) -> Self { Self { @@ -37,11 +46,49 @@ impl Rule { self.name = Some(name.to_string()); self } + + /// Return a reference to the body of the rule. + pub fn body(&self) -> &Vec { + &self.body + } + + /// Return a mutable reference to the body of the rule. + pub fn body_mut(&mut self) -> &mut Vec { + &mut self.body + } + + /// Return a reference to the head of the rule. + pub fn head(&self) -> &Vec { + &self.head + } + + /// Return a mutable reference to the head of the rule. + pub fn head_mut(&mut self) -> &mut Vec { + &mut self.head + } } impl Display for Rule { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (head_index, head_atom) in self.head.iter().enumerate() { + write!(f, "{}", head_atom)?; + + if head_index < self.head.len() - 1 { + f.write_str(", ")?; + } + } + + f.write_str(" :- ")?; + + for (body_index, body_literal) in self.body.iter().enumerate() { + write!(f, "{}", body_literal)?; + + if body_index < self.body.len() - 1 { + f.write_str(", ")?; + } + } + + f.write_str(" .") } } @@ -92,145 +139,95 @@ pub struct RuleBuilder { /// Origin of the rule origin: Origin, - /// Builder for the head of the rule - head: RuleHeadBuilder, - /// Builder for the body of the rule - body: RuleBodyBuilder, + /// Name of the rule + name: Option, + + /// Head of the rule + head: Vec, + /// Body of the rule + body: Vec, } impl RuleBuilder { + /// Set the name of the built rule. + pub fn name(mut self, name: &str) -> Self { + self.name = Some(name.to_string()); + self + } + /// Set the [Origin] of the built rule. pub fn origin(mut self, origin: Origin) -> Self { self.origin = origin; self } - /// Return a builder for the body of the rule. - pub fn body(self) -> RuleBodySubBuilder { - RuleBodySubBuilder { builder: self } - } - - /// Return a builder for the head of the rule. - pub fn head(self) -> RuleHeadSubBuilder { - RuleHeadSubBuilder { builder: self } - } - - /// Finish building and return a [Rule]. - pub fn finalize(self) -> Rule { - Rule::new(self.head.finalize(), self.body.finalize()).set_origin(self.origin) - } -} - -/// Builder for the rule body -#[derive(Debug, Default)] -pub struct RuleBodyBuilder { - /// Current list of [Literal]s - literals: Vec, -} - -impl RuleBodyBuilder { /// Add a positive atom to the body of the rule. - pub fn add_positive_atom(mut self, atom: Atom) -> Self { - self.literals.push(Literal::Positive(atom)); + pub fn add_body_positive(mut self, atom: Atom) -> Self { + self.body.push(Literal::Positive(atom)); self } - /// Add a negative atom to the body of the rule. - pub fn add_negative_atom(mut self, atom: Atom) -> Self { - self.literals.push(Literal::Negative(atom)); - self - } - - /// Add an operation to the body of the rule. - pub fn add_operation(mut self, opreation: Operation) -> Self { - self.literals.push(Literal::Operation(opreation)); + /// Add a positive atom to the body of the rule. + pub fn add_body_positive_mut(&mut self, atom: Atom) -> &mut Self { + self.body.push(Literal::Positive(atom)); self } - /// Add a literal to the body of the rule. - pub fn add_literal(mut self, literal: Literal) -> Self { - self.literals.push(literal); + /// Add a negative atom to the body of the rule. + pub fn add_body_negative(mut self, atom: Atom) -> Self { + self.body.push(Literal::Negative(atom)); self } - /// Finish building and return a list of [Literal]s. - pub fn finalize(self) -> Vec { - self.literals - } -} - -/// Subbuilder for building the body of a rule -#[derive(Debug)] -pub struct RuleBodySubBuilder { - builder: RuleBuilder, -} - -impl RuleBodySubBuilder { - /// Add a positive atom to the body of the rule. - pub fn add_positive_atom(mut self, atom: Atom) -> Self { - self.builder.body = self.builder.body.add_positive_atom(atom); + /// Add a negative atom to the body of the rule. + pub fn add_body_negative_mut(&mut self, atom: Atom) -> &mut Self { + self.body.push(Literal::Negative(atom)); self } - /// Add a negative atom to the body of the rule. - pub fn add_negative_atom(mut self, atom: Atom) -> Self { - self.builder.body = self.builder.body.add_negative_atom(atom); + /// Add an operation to the body of the rule. + pub fn add_body_operation(mut self, opreation: Operation) -> Self { + self.body.push(Literal::Operation(opreation)); self } /// Add an operation to the body of the rule. - pub fn add_operation(mut self, opreation: Operation) -> Self { - self.builder.body = self.builder.body.add_operation(opreation); + pub fn add_body_operation_mut(&mut self, opreation: Operation) -> &mut Self { + self.body.push(Literal::Operation(opreation)); self } /// Add a literal to the body of the rule. - pub fn add_literal(mut self, literal: Literal) -> Self { - self.builder.body = self.builder.body.add_literal(literal); + pub fn add_body_literal(mut self, literal: Literal) -> Self { + self.body.push(literal); self } - /// Return to the [RuleBuilder] - pub fn done(self) -> RuleBuilder { - self.builder - } -} - -/// Builder for the rule head -#[derive(Debug, Default)] -pub struct RuleHeadBuilder { - /// Current list of [Atom]s - atoms: Vec, -} - -impl RuleHeadBuilder { - /// Add another atom to the head of the rule. - pub fn add_atom(mut self, atom: Atom) -> Self { - self.atoms.push(atom); + /// Add a literal to the body of the rule. + pub fn add_body_literal_mut(&mut self, literal: Literal) -> &mut Self { + self.body.push(literal); self } - /// Finish building and return a list of [Atom]s. - pub fn finalize(self) -> Vec { - self.atoms + /// Add an atom to the head of the rule. + pub fn add_head_atom(mut self, atom: Atom) -> Self { + self.head.push(atom); + self } -} - -/// Subbuilder for building the head of a rule -#[derive(Debug)] -pub struct RuleHeadSubBuilder { - builder: RuleBuilder, -} -impl RuleHeadSubBuilder { - /// Add another atom to the head of the rule. - pub fn add_atom(mut self, atom: Atom) -> Self { - self.builder.head = self.builder.head.add_atom(atom); + /// Add an atom to the head of the rule. + pub fn add_head_atom_mut(&mut self, atom: Atom) -> &mut Self { + self.head.push(atom); self } - /// Return to the [RuleBuilder] - pub fn done(self) -> RuleBuilder { - self.builder + /// Finish building and return a [Rule]. + pub fn finalize(self) -> Rule { + let rule = Rule::new(self.head, self.body).set_origin(self.origin); + + match &self.name { + Some(name) => rule.set_name(name), + None => rule, + } } } diff --git a/nemo/src/rule_model/component/term.rs b/nemo/src/rule_model/component/term.rs index 3d202b85c..e228dd46e 100644 --- a/nemo/src/rule_model/component/term.rs +++ b/nemo/src/rule_model/component/term.rs @@ -5,6 +5,7 @@ pub mod function; pub mod map; pub mod operation; pub mod primitive; +#[macro_use] pub mod tuple; use std::fmt::{Debug, Display}; @@ -13,30 +14,20 @@ use function::FunctionTerm; use map::Map; use nemo_physical::datavalues::AnyDataValue; use operation::Operation; -use primitive::{ground::GroundTerm, variable::Variable, Primitive}; +use primitive::{ + ground::GroundTerm, + variable::{existential::ExistentialVariable, universal::UniversalVariable, Variable}, + Primitive, +}; use tuple::Tuple; use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; -use super::ProgramComponent; +use super::{IteratableVariables, ProgramComponent}; -/// Name of a term -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct Identifier(String); - -impl Identifier { - /// Create a new [Identifier]. - pub fn new(name: String) -> Self { - Self(name) - } - - /// Validate term name. - pub fn is_valid(&self) -> bool { - !self.0.is_empty() - } -} - -/// TODO +/// Term +/// +/// Basic building block for expressions like atoms or facts. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] pub enum Term { /// Unstructured, primitive term @@ -71,18 +62,101 @@ impl Term { pub fn ground(value: AnyDataValue) -> Self { Self::Primitive(Primitive::Ground(GroundTerm::new(value))) } +} + +impl From for Term { + fn from(value: Variable) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From for Term { + fn from(value: UniversalVariable) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From for Term { + fn from(value: ExistentialVariable) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From for Term { + fn from(value: Primitive) -> Self { + Self::Primitive(value) + } +} + +impl From for Term { + fn from(value: AnyDataValue) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From for Term { + fn from(value: i64) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From for Term { + fn from(value: i32) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From for Term { + fn from(value: u64) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From for Term { + fn from(value: String) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From<&str> for Term { + fn from(value: &str) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + +impl From for Term { + fn from(value: FunctionTerm) -> Self { + Self::FunctionTerm(value) + } +} + +impl From for Term { + fn from(value: Map) -> Self { + Self::Map(value) + } +} + +impl From for Term { + fn from(value: Operation) -> Self { + Self::Operation(value) + } +} - /// Create an integer term - pub fn integer(number: i64) -> Self { - Self::Primitive(Primitive::Ground(GroundTerm::new( - AnyDataValue::new_integer_from_i64(number), - ))) +impl From for Term { + fn from(value: Tuple) -> Self { + Self::Tuple(value) } } impl Display for Term { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Term::Primitive(term) => write!(f, "{}", term), + Term::FunctionTerm(term) => write!(f, "{}", term), + Term::Map(term) => write!(f, "{}", term), + Term::Operation(term) => write!(f, "{}", term), + Term::Tuple(term) => write!(f, "{}", term), + } } } @@ -125,14 +199,56 @@ impl ProgramComponent for Term { } } -// impl ASTConstructable for Term { -// type Node<'a> = crate::io::parser::ast::term::Term<'a>; +impl IteratableVariables for Term { + fn variables<'a>(&'a self) -> Box + 'a> { + let mut iter_primitive = None; + let mut iter_function = None; + let mut iter_map = None; + let mut iter_operation = None; + let mut iter_tuple = None; -// fn from_ast_node<'a>( -// node: Self::Node<'a>, -// origin: crate::rule_model::origin::ExternalReference, -// context: &super::ASTContext, -// ) -> Self { -// todo!() -// } -// } + match self { + Term::Primitive(primitive) => iter_primitive = Some(primitive.variables()), + Term::FunctionTerm(function) => iter_function = Some(function.variables()), + Term::Map(map) => iter_map = Some(map.variables()), + Term::Operation(operation) => iter_operation = Some(operation.variables()), + Term::Tuple(tuple) => iter_tuple = Some(tuple.variables()), + } + + Box::new( + iter_primitive + .into_iter() + .flatten() + .chain(iter_function.into_iter().flatten()) + .chain(iter_map.into_iter().flatten()) + .chain(iter_operation.into_iter().flatten()) + .chain(iter_tuple.into_iter().flatten()), + ) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + let mut iter_primitive = None; + let mut iter_function = None; + let mut iter_map = None; + let mut iter_operation = None; + let mut iter_tuple = None; + + match self { + Term::Primitive(primitive) => iter_primitive = Some(primitive.variables_mut()), + Term::FunctionTerm(function) => iter_function = Some(function.variables_mut()), + Term::Map(map) => iter_map = Some(map.variables_mut()), + Term::Operation(operation) => iter_operation = Some(operation.variables_mut()), + Term::Tuple(tuple) => iter_tuple = Some(tuple.variables_mut()), + } + + Box::new( + iter_primitive + .into_iter() + .flatten() + .chain(iter_function.into_iter().flatten()) + .chain(iter_map.into_iter().flatten()) + .chain(iter_operation.into_iter().flatten()) + .chain(iter_tuple.into_iter().flatten()), + ) + } +} diff --git a/nemo/src/rule_model/component/term/aggregate.rs b/nemo/src/rule_model/component/term/aggregate.rs index 1d6764d75..56ed4cb55 100644 --- a/nemo/src/rule_model/component/term/aggregate.rs +++ b/nemo/src/rule_model/component/term/aggregate.rs @@ -2,7 +2,10 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{component::ProgramComponent, origin::Origin}; +use crate::rule_model::{ + component::{IteratableVariables, ProgramComponent}, + origin::Origin, +}; use super::{primitive::variable::Variable, Term}; @@ -19,7 +22,23 @@ pub enum AggregateKind { SumOfNumbers, } -/// An aggregate +impl Display for AggregateKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = match self { + AggregateKind::CountValues => "count", + AggregateKind::MinNumber => "min", + AggregateKind::MaxNumber => "max", + AggregateKind::SumOfNumbers => "sum", + }; + + f.write_fmt(format_args!("#{}", name)) + } +} + +/// Aggregate +/// +/// Function that performs a computatin over a set of [Term]s +/// and returns a single value. #[derive(Debug, Clone, Eq)] pub struct Aggregate { /// Origin of this component @@ -35,19 +54,65 @@ pub struct Aggregate { impl Aggregate { /// Create a new [Aggregate]. - pub fn new(kind: AggregateKind, aggregate: Term, distinct: Vec) -> Self { + pub fn new>( + kind: AggregateKind, + aggregate: Term, + distinct: Variables, + ) -> Self { Self { origin: Origin::default(), kind, aggregate, - distinct, + distinct: distinct.into_iter().collect(), } } + + /// Create a new sum [Aggregate]. + pub fn sum>( + aggregate: Term, + distinct: Variables, + ) -> Self { + Self::new(AggregateKind::SumOfNumbers, aggregate, distinct) + } + + /// Create a new count [Aggregate]. + pub fn count>( + aggregate: Term, + distinct: Variables, + ) -> Self { + Self::new(AggregateKind::CountValues, aggregate, distinct) + } + + /// Create a new min [Aggregate]. + pub fn min>( + aggregate: Term, + distinct: Variables, + ) -> Self { + Self::new(AggregateKind::MinNumber, aggregate, distinct) + } + + /// Create a new sum [Aggregate]. + pub fn max>( + aggregate: Term, + distinct: Variables, + ) -> Self { + Self::new(AggregateKind::MaxNumber, aggregate, distinct) + } } impl Display for Aggregate { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}({}", self.kind, self.aggregate))?; + + for (distinct_index, variable) in self.distinct.iter().enumerate() { + variable.fmt(f)?; + + if distinct_index < self.distinct.len() - 1 { + f.write_str(", ")?; + } + } + + f.write_str(")") } } @@ -94,3 +159,17 @@ impl ProgramComponent for Aggregate { todo!() } } + +impl IteratableVariables for Aggregate { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(self.aggregate.variables().chain(self.distinct.iter())) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.aggregate + .variables_mut() + .chain(self.distinct.iter_mut()), + ) + } +} diff --git a/nemo/src/rule_model/component/term/function.rs b/nemo/src/rule_model/component/term/function.rs index 84942a5ab..4d9df6feb 100644 --- a/nemo/src/rule_model/component/term/function.rs +++ b/nemo/src/rule_model/component/term/function.rs @@ -3,30 +3,49 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - component::ProgramComponent, error::ProgramConstructionError, origin::Origin, + component::{IteratableVariables, ProgramComponent, Tag}, + error::ProgramConstructionError, + origin::Origin, }; -use super::{Identifier, Term}; +use super::{primitive::variable::Variable, Term}; /// Function term +/// +/// List of [Term]s with a [Tag]. #[derive(Debug, Clone, Eq)] pub struct FunctionTerm { /// Origin of this component origin: Origin, /// Name of the function - name: Identifier, + tag: Tag, /// Subterms of the function terms: Vec, } +/// Construct a [FunctionTerm]. +#[macro_export] +macro_rules! function { + // Base case: no elements + ($name:tt) => { + crate::rule_model::component::term::function::FunctionTerm::new($name, Vec::new()) + }; + // Recursive case: handle each term, separated by commas + ($name:tt; $($tt:tt)*) => {{ + let mut terms = Vec::new(); + term_list!(terms; $($tt)*); + crate::rule_model::component::term::function::FunctionTerm::new($name,terms) + }}; +} + impl FunctionTerm { /// Create a new [FunctionTerm]. - pub fn new(name: &str, subterms: Vec) -> Self { + pub fn new>(name: &str, subterms: Terms) -> Self { Self { origin: Origin::Created, - name: Identifier::new(name.to_string()), - terms: subterms, + tag: Tag::new(name.to_string()), + terms: subterms.into_iter().collect(), } } @@ -42,20 +61,30 @@ impl FunctionTerm { } impl Display for FunctionTerm { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}(", self.tag))?; + + for (term_index, term) in self.terms.iter().enumerate() { + term.fmt(f)?; + + if term_index < self.terms.len() - 1 { + f.write_str(", ")?; + } + } + + f.write_str(")") } } impl PartialEq for FunctionTerm { fn eq(&self, other: &Self) -> bool { - self.origin == other.origin && self.name == other.name && self.terms == other.terms + self.origin == other.origin && self.tag == other.tag && self.terms == other.terms } } impl PartialOrd for FunctionTerm { fn partial_cmp(&self, other: &Self) -> Option { - match self.name.partial_cmp(&other.name) { + match self.tag.partial_cmp(&other.tag) { Some(core::cmp::Ordering::Equal) => {} ord => return ord, } @@ -65,7 +94,7 @@ impl PartialOrd for FunctionTerm { impl Hash for FunctionTerm { fn hash(&self, state: &mut H) { - self.name.hash(state); + self.tag.hash(state); self.terms.hash(state); } } @@ -94,7 +123,7 @@ impl ProgramComponent for FunctionTerm { where Self: Sized, { - if !self.name.is_valid() { + if !self.tag.is_valid() { todo!() } @@ -105,3 +134,34 @@ impl ProgramComponent for FunctionTerm { Ok(()) } } + +impl IteratableVariables for FunctionTerm { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms.iter().flat_map(|term| term.variables())) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new(self.terms.iter_mut().flat_map(|term| term.variables_mut())) + } +} + +#[cfg(test)] +mod test { + use crate::rule_model::component::{term::primitive::variable::Variable, IteratableVariables}; + + #[test] + fn function_basic() { + let variable = Variable::universal("u"); + let function = function!("f"; 12, variable, !e, "abc", ?v); + + let variables = function.variables().cloned().collect::>(); + assert_eq!( + variables, + vec![ + Variable::universal("u"), + Variable::existential("e"), + Variable::universal("v") + ] + ); + } +} diff --git a/nemo/src/rule_model/component/term/map.rs b/nemo/src/rule_model/component/term/map.rs index a353d323b..c78dd579f 100644 --- a/nemo/src/rule_model/component/term/map.rs +++ b/nemo/src/rule_model/component/term/map.rs @@ -1,30 +1,72 @@ //! This module defines [Map] -use std::{collections::BTreeMap, fmt::Display, hash::Hash}; +use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{component::ProgramComponent, origin::Origin}; +use crate::rule_model::{ + component::{IteratableVariables, ProgramComponent, Tag}, + origin::Origin, +}; -use super::Term; +use super::{primitive::variable::Variable, Term}; -/// Map term +/// Map +/// +/// A collection of key-value pairs, +/// associating [Term]s with each other. #[derive(Debug, Clone, Eq)] pub struct Map { /// Origin of this component origin: Origin, - /// Map associating [Term]s with [Term]s - map: BTreeMap, + /// Name of the map + tag: Option, + + /// List of tuples associating [Term]s with [Term]s + map: Vec<(Term, Term)>, +} + +impl Map { + /// Create a new [Map]. + pub fn new>(name: &str, map: Pairs) -> Self { + Self { + origin: Origin::Created, + tag: Some(Tag::new(name.to_string())), + map: map.into_iter().collect(), + } + } + + /// Create a new [Map]. + pub fn new_unnamed>(map: Pairs) -> Self { + Self { + origin: Origin::Created, + tag: None, + map: map.into_iter().collect(), + } + } } impl Display for Map { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "{}{{", + self.tag.as_ref().map_or("", |tag| &tag.0) + ))?; + + for (term_index, (key, value)) in self.map.iter().enumerate() { + f.write_fmt(format_args!("{} = {}", key, value))?; + + if term_index < self.map.len() - 1 { + f.write_str(", ")?; + } + } + + f.write_str("}") } } impl PartialEq for Map { fn eq(&self, other: &Self) -> bool { - self.map == other.map + self.tag == other.tag && self.map == other.map } } @@ -36,6 +78,7 @@ impl PartialOrd for Map { impl Hash for Map { fn hash(&self, state: &mut H) { + self.tag.hash(state); self.map.hash(state); } } @@ -67,3 +110,21 @@ impl ProgramComponent for Map { todo!() } } + +impl IteratableVariables for Map { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new( + self.map + .iter() + .flat_map(|(key, value)| key.variables().chain(value.variables())), + ) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.map + .iter_mut() + .flat_map(|(key, value)| key.variables_mut().chain(value.variables_mut())), + ) + } +} diff --git a/nemo/src/rule_model/component/term/operation.rs b/nemo/src/rule_model/component/term/operation.rs index b447ba431..ee934f74b 100644 --- a/nemo/src/rule_model/component/term/operation.rs +++ b/nemo/src/rule_model/component/term/operation.rs @@ -2,9 +2,12 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{component::ProgramComponent, origin::Origin}; +use crate::rule_model::{ + component::{IteratableVariables, ProgramComponent}, + origin::Origin, +}; -use super::Term; +use super::{primitive::variable::Variable, Term}; /// Supported operations #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] @@ -188,9 +191,89 @@ impl OperationKind { _ => return None, }) } + + /// Precendence of operations for display purposes. + pub(crate) fn precedence(&self) -> usize { + match &self { + Self::NumericSum => 1, + Self::NumericSubtraction => 1, + Self::NumericProduct => 2, + Self::NumericDivision => 2, + _ => 3, + } + } +} + +impl Display for OperationKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let string = match self { + OperationKind::Equal => "EQUAL", + OperationKind::Unequals => "UNEQUAL", + OperationKind::NumericSum => "SUM", + OperationKind::NumericSubtraction => "MINUS", + OperationKind::NumericProduct => "PROD", + OperationKind::NumericDivision => "DIV", + OperationKind::NumericLogarithm => "LOG", + OperationKind::NumericPower => "POW", + OperationKind::NumericRemainder => "REM", + OperationKind::NumericGreaterthan => "GT", + OperationKind::NumericGreaterthaneq => "GTE", + OperationKind::NumericLessthan => "LT", + OperationKind::NumericLessthaneq => "LTE", + OperationKind::StringCompare => "COMPARE", + OperationKind::StringContains => "CONTAINS", + OperationKind::StringSubstring => "SUBSTR", + OperationKind::StringBefore => "STRBEFORE", + OperationKind::StringAfter => "STRAFTER", + OperationKind::StringStarts => "STRSTARTS", + OperationKind::StringEnds => "STRENDS", + OperationKind::BooleanNegation => "NOT", + OperationKind::CastToDouble => "DOUBLE", + OperationKind::CastToFloat => "FLOAT", + OperationKind::CastToInteger => "INT", + OperationKind::CanonicalString => "fullStr", + OperationKind::CheckIsInteger => "isInteger", + OperationKind::CheckIsFloat => "isFloat", + OperationKind::CheckIsDouble => "isDouble", + OperationKind::CheckIsIri => "isIri", + OperationKind::CheckIsNumeric => "isNumeric", + OperationKind::CheckIsNull => "isNull", + OperationKind::CheckIsString => "isString", + OperationKind::Datatype => "DATATYPE", + OperationKind::LanguageTag => "LANG", + OperationKind::LexicalValue => "STR", + OperationKind::NumericAbsolute => "ABS", + OperationKind::NumericCosine => "COS", + OperationKind::NumericCeil => "CEIL", + OperationKind::NumericFloor => "FLOOR", + OperationKind::NumericNegation => "MINUS", + OperationKind::NumericRound => "ROUND", + OperationKind::NumericSine => "SIN", + OperationKind::NumericSquareroot => "SQRT", + OperationKind::NumericTangent => "TAN", + OperationKind::StringLength => "STRLEN", + OperationKind::StringReverse => "STRREV", + OperationKind::StringLowercase => "LCASE", + OperationKind::StringUppercase => "UCASE", + OperationKind::BitAnd => "BITAND", + OperationKind::BitOr => "BITOR", + OperationKind::BitXor => "BITXOR", + OperationKind::BooleanConjunction => "AND", + OperationKind::BooleanDisjunction => "OR", + OperationKind::NumericMinimum => "MIN", + OperationKind::NumericMaximum => "MAX", + OperationKind::NumericLukasiewicz => "LUKA", + OperationKind::StringConcatenation => "CONCAT", + }; + + write!(f, "{}", string) + } } -/// Operation that can be applied to terms +/// Operation +/// +/// An action or computation performed on [Term]s. +/// This can include for example arithmetic or string operations. #[derive(Debug, Clone, Eq)] pub struct Operation { /// Origin of this component @@ -218,9 +301,92 @@ impl Operation { } } +// Helper functions related to the display implementation +impl Operation { + /// Puts braces around `term` if it has a lower precendence than `self`. + fn format_braces_priority( + &self, + f: &mut std::fmt::Formatter<'_>, + term: &Term, + ) -> std::fmt::Result { + let need_braces = if let Term::Operation(other) = term { + self.kind.precedence() > other.kind.precedence() + } else { + false + }; + + if need_braces { + self.format_braces(f, term) + } else { + write!(f, "{}", term) + } + } + + /// Put braces around the input term. + fn format_braces(&self, f: &mut std::fmt::Formatter<'_>, term: &Term) -> std::fmt::Result { + write!(f, "({})", term) + } + + /// Formats the arguments of an operation as a delimiter separated list. + fn format_operation_arguments( + &self, + f: &mut std::fmt::Formatter<'_>, + terms: &[Term], + delimiter: &str, + ) -> std::fmt::Result { + for (index, term) in terms.iter().enumerate() { + self.format_braces_priority(f, term)?; + + if index < terms.len() - 1 { + f.write_str(delimiter)?; + } + } + + Ok(()) + } + + /// Returns the infix symbol corresponding to the operation + /// or `None` if this operation should never be displayed as an infix operation. + fn infix_representation(&self) -> Option<&str> { + Some(match &self.kind { + OperationKind::NumericSum => "+", + OperationKind::NumericSubtraction => "-", + OperationKind::NumericProduct => "*", + &OperationKind::NumericDivision => "/", + _ => return None, + }) + } + + /// Format operation in the usual (, , ...) style + fn format_operation(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}(", self.kind)?; + self.format_operation_arguments(f, &self.subterms, ",")?; + f.write_str(")") + } + + /// Format operation that is more naturally written in an infix style . + fn format_infix_operation( + &self, + f: &mut std::fmt::Formatter<'_>, + operation: &str, + left: &Term, + right: &Term, + ) -> std::fmt::Result { + self.format_braces_priority(f, left)?; + write!(f, " {} ", operation)?; + self.format_braces_priority(f, right) + } +} + impl Display for Operation { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(infix) = self.infix_representation() { + if self.subterms.len() == 2 { + return self.format_infix_operation(f, infix, &self.subterms[0], &self.subterms[1]); + } + } + + self.format_operation(f) } } @@ -256,7 +422,7 @@ impl ProgramComponent for Operation { } fn origin(&self) -> &Origin { - todo!() + &self.origin } fn set_origin(mut self, origin: Origin) -> Self @@ -274,3 +440,17 @@ impl ProgramComponent for Operation { todo!() } } + +impl IteratableVariables for Operation { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(self.subterms.iter().flat_map(|term| term.variables())) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.subterms + .iter_mut() + .flat_map(|term| term.variables_mut()), + ) + } +} diff --git a/nemo/src/rule_model/component/term/primitive.rs b/nemo/src/rule_model/component/term/primitive.rs index 15d065b21..846870004 100644 --- a/nemo/src/rule_model/component/term/primitive.rs +++ b/nemo/src/rule_model/component/term/primitive.rs @@ -6,11 +6,18 @@ pub mod variable; use std::{fmt::Display, hash::Hash}; use ground::GroundTerm; -use variable::Variable; +use nemo_physical::datavalues::AnyDataValue; +use variable::{existential::ExistentialVariable, universal::UniversalVariable, Variable}; -use crate::rule_model::{component::ProgramComponent, origin::Origin}; +use crate::rule_model::{ + component::{IteratableVariables, ProgramComponent}, + origin::Origin, +}; /// Primitive term +/// +/// Represents a basic, indivisble values, which can either be [GroundTerm]s or [Variable]s. +/// Such terms are the atomic values used in the construction of more complex expressions. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] pub enum Primitive { /// Variable @@ -26,6 +33,66 @@ impl Primitive { } } +impl From for Primitive { + fn from(value: Variable) -> Self { + Self::Variable(value) + } +} + +impl From for Primitive { + fn from(value: UniversalVariable) -> Self { + Self::from(Variable::from(value)) + } +} + +impl From for Primitive { + fn from(value: ExistentialVariable) -> Self { + Self::from(Variable::from(value)) + } +} + +impl From for Primitive { + fn from(value: GroundTerm) -> Self { + Self::Ground(value) + } +} + +impl From for Primitive { + fn from(value: AnyDataValue) -> Self { + Self::Ground(GroundTerm::from(value)) + } +} + +impl From for Primitive { + fn from(value: i64) -> Self { + Self::from(GroundTerm::from(value)) + } +} + +impl From for Primitive { + fn from(value: i32) -> Self { + Self::from(GroundTerm::from(value)) + } +} + +impl From for Primitive { + fn from(value: u64) -> Self { + Self::from(GroundTerm::from(value)) + } +} + +impl From for Primitive { + fn from(value: String) -> Self { + Self::from(GroundTerm::from(value)) + } +} + +impl From<&str> for Primitive { + fn from(value: &str) -> Self { + Self::from(GroundTerm::from(value)) + } +} + impl Display for Primitive { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -71,27 +138,24 @@ impl ProgramComponent for Primitive { } } -// impl ASTConstructable for Primitive { -// type Node<'a> = Term<'a>; - -// fn from_ast_node<'a>( -// node: Self::Node<'a>, -// origin: ExternalReference, -// context: &ASTContext, -// ) -> Self { -// match node { -// Term::Primitive(primitive) => { -// Primitive::Ground(GroundTerm::from_ast_node(primitive, origin, context)) -// } -// Term::Blank(token) => { -// let value: AnyDataValue = todo!(); - -// Primitive::Ground(GroundTerm::create_parsed(value, origin)) -// } -// Term::UniversalVariable(_) | Term::ExistentialVariable(_) => { -// Primitive::Variable(Variable::from_ast_node(node, origin, context)) -// } -// _ => unreachable!("TODO"), -// } -// } -// } +impl IteratableVariables for Primitive { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new( + match self { + Primitive::Variable(variable) => Some(variable), + Primitive::Ground(_) => None, + } + .into_iter(), + ) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + match self { + Primitive::Variable(variable) => Some(variable), + Primitive::Ground(_) => None, + } + .into_iter(), + ) + } +} diff --git a/nemo/src/rule_model/component/term/primitive/ground.rs b/nemo/src/rule_model/component/term/primitive/ground.rs index 2cc76e577..9dbdbebe8 100644 --- a/nemo/src/rule_model/component/term/primitive/ground.rs +++ b/nemo/src/rule_model/component/term/primitive/ground.rs @@ -2,13 +2,16 @@ use std::{fmt::Display, hash::Hash}; -use nemo_physical::datavalues::AnyDataValue; +use nemo_physical::datavalues::{AnyDataValue, IriDataValue}; use crate::rule_model::{ component::ProgramComponent, error::ProgramConstructionError, origin::Origin, }; /// Primitive ground term +/// +/// Represents a basic, indivisble constant value like integers, or strings. +/// Such terms are the atomic values used in the construction of more complex expressions. #[derive(Debug, Clone, Eq)] pub struct GroundTerm { /// Origin of this component @@ -27,6 +30,48 @@ impl GroundTerm { } } +impl From for GroundTerm { + fn from(value: AnyDataValue) -> Self { + Self::new(value) + } +} + +impl From for GroundTerm { + fn from(value: i32) -> Self { + Self::new(AnyDataValue::new_integer_from_i64(value.into())) + } +} + +impl From for GroundTerm { + fn from(value: i64) -> Self { + Self::new(AnyDataValue::new_integer_from_i64(value)) + } +} + +impl From for GroundTerm { + fn from(value: u64) -> Self { + Self::new(AnyDataValue::new_integer_from_u64(value)) + } +} + +impl From for GroundTerm { + fn from(value: String) -> Self { + Self::new(AnyDataValue::new_plain_string(value)) + } +} + +impl From<&str> for GroundTerm { + fn from(value: &str) -> Self { + Self::new(AnyDataValue::new_plain_string(value.to_string())) + } +} + +impl From for GroundTerm { + fn from(value: IriDataValue) -> Self { + Self::new(AnyDataValue::new_iri(value.to_string())) + } +} + impl Display for GroundTerm { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.value.fmt(f) @@ -78,57 +123,3 @@ impl ProgramComponent for GroundTerm { Ok(()) } } - -// impl ASTConstructable for GroundTerm { -// type Node<'a> = Primitive<'a>; - -// fn from_ast_node<'a>( -// node: Self::Node<'a>, -// origin: ExternalReference, -// context: &ASTContext, -// ) -> Self { -// match node { -// Primitive::Constant(token) => { -// Self::create_parsed(AnyDataValue::new_iri(token.to_string()), origin) -// } -// Primitive::PrefixedConstant { -// prefix, constant, .. -// } => { -// let prefixed_constant = prefix -// .map(|token| { -// context -// .prefixes -// .get(&token.to_string()) -// .cloned() -// .unwrap_or(token.to_string()) // TODO: We could also panic here -// }) -// .unwrap_or(String::from("")) -// + &constant.to_string(); - -// Self::create_parsed(AnyDataValue::new_iri(prefixed_constant), origin) -// } -// Primitive::Number { -// span, -// sign, -// before, -// dot, -// after, -// exponent, -// } => { -// // TODO: Create number values -// // Self::create_parsed(AnyDataValue:: span.to_string(), origin) -// todo!() -// } -// Primitive::String(string) => { -// Self::create_parsed(AnyDataValue::new_plain_string(string.to_string()), origin) -// } -// Primitive::Iri(iri) => { -// Self::create_parsed(AnyDataValue::new_iri(iri.to_string()), origin) -// } -// Primitive::RdfLiteral { string, iri, .. } => Self::create_parsed( -// AnyDataValue::new_other(string.to_string(), iri.to_string()), -// origin, -// ), -// } -// } -// } diff --git a/nemo/src/rule_model/component/term/primitive/variable.rs b/nemo/src/rule_model/component/term/primitive/variable.rs index d98ea3caf..7f0dcaaed 100644 --- a/nemo/src/rule_model/component/term/primitive/variable.rs +++ b/nemo/src/rule_model/component/term/primitive/variable.rs @@ -34,7 +34,10 @@ impl Display for VariableName { } } -/// Variable that can be bound to a specific value +/// Variable +/// +/// A general placeholder that can be bound to any value. +/// We distinguish [UniversalVariable] and [ExistentialVariable]. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] pub enum Variable { /// Universal variable @@ -68,6 +71,18 @@ impl Variable { } } +impl From for Variable { + fn from(value: UniversalVariable) -> Self { + Self::Universal(value) + } +} + +impl From for Variable { + fn from(value: ExistentialVariable) -> Self { + Self::Existential(value) + } +} + impl Display for Variable { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -118,23 +133,3 @@ impl ProgramComponent for Variable { Ok(()) } } - -// impl ASTConstructable for Variable { -// type Node<'a> = Term<'a>; - -// fn from_ast_node<'a>( -// node: Self::Node<'a>, -// origin: ExternalReference, -// context: &ASTContext, -// ) -> Self { -// match node { -// Term::UniversalVariable(_) => { -// Variable::Universal(UniversalVariable::from_ast_node(node, origin, context)) -// } -// Term::ExistentialVariable(_) => { -// Variable::Existential(ExistentialVariable::from_ast_node(node, origin, context)) -// } -// _ => unreachable!("TODO"), -// } -// } -// } diff --git a/nemo/src/rule_model/component/term/primitive/variable/existential.rs b/nemo/src/rule_model/component/term/primitive/variable/existential.rs index f584aab57..e33f8958a 100644 --- a/nemo/src/rule_model/component/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/component/term/primitive/variable/existential.rs @@ -8,7 +8,9 @@ use crate::rule_model::{ use super::VariableName; -/// Variable that allows to assert the existence of an object +/// Existentially quantified variable +/// +/// Variable that implies the existence of a value satisfying a certain pattern. #[derive(Debug, Clone, Eq)] pub struct ExistentialVariable { /// Origin of this component @@ -84,20 +86,3 @@ impl ProgramComponent for ExistentialVariable { todo!() } } - -// impl ASTConstructable for ExistentialVariable { -// type Node<'a> = Term<'a>; - -// fn from_ast_node<'a>(node: Term<'a>, origin: ExternalReference, _context: &ASTContext) -> Self { -// if let Term::UniversalVariable(token) = node { -// let name = token.span.to_string(); - -// Self { -// origin: Origin::External(origin), -// name: VariableName::new(name), -// } -// } else { -// unreachable!("TODO") -// } -// } -// } diff --git a/nemo/src/rule_model/component/term/primitive/variable/universal.rs b/nemo/src/rule_model/component/term/primitive/variable/universal.rs index e2dae3680..90fd21f2b 100644 --- a/nemo/src/rule_model/component/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/component/term/primitive/variable/universal.rs @@ -8,7 +8,9 @@ use crate::rule_model::{ use super::VariableName; -/// Variable that can be bound to a specific value +/// Universally quantified variable +/// +/// Represents a variable that can take on any value in the domain. /// /// Universal variables may not have a name, /// in which case we call them anonymous. @@ -104,20 +106,3 @@ impl ProgramComponent for UniversalVariable { todo!() } } - -// impl ASTConstructable for UniversalVariable { -// type Node<'a> = Term<'a>; - -// fn from_ast_node<'a>(node: Term<'a>, origin: ExternalReference, _context: &ASTContext) -> Self { -// if let Term::UniversalVariable(token) = node { -// let name = token.span.to_string(); - -// Self { -// origin: Origin::External(origin), -// name: Some(VariableName::new(name)), -// } -// } else { -// unreachable!("TODO") -// } -// } -// } diff --git a/nemo/src/rule_model/component/term/tuple.rs b/nemo/src/rule_model/component/term/tuple.rs index 92a6eeca1..b10147483 100644 --- a/nemo/src/rule_model/component/term/tuple.rs +++ b/nemo/src/rule_model/component/term/tuple.rs @@ -2,11 +2,16 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{component::ProgramComponent, origin::Origin}; +use crate::rule_model::{ + component::{IteratableVariables, ProgramComponent}, + origin::Origin, +}; -use super::Term; +use super::{primitive::variable::Variable, Term}; -/// An ordered list of terms +/// Tuple +/// +/// An ordered list of [Term]s. #[derive(Debug, Clone, Eq)] pub struct Tuple { /// Origin of this component @@ -16,19 +21,44 @@ pub struct Tuple { terms: Vec, } +/// Construct a [Tuple]. +#[macro_export] +macro_rules! tuple { + // Base case: no elements + () => { + crate::rule_model::component::term::tuple::Tuple::new(Vec::new()) + }; + // Recursive case: handle each term, separated by commas + ($($tt:tt)*) => {{ + let mut terms = Vec::new(); + term_list!(terms; $($tt)*); + crate::rule_model::component::term::tuple::Tuple::new(terms) + }}; +} + impl Tuple { /// Create a new [Tuple]. - pub fn new(terms: Vec) -> Self { + pub fn new>(terms: Terms) -> Self { Self { origin: Origin::default(), - terms, + terms: terms.into_iter().collect(), } } } impl Display for Tuple { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("(")?; + + for (term_index, term) in self.terms.iter().enumerate() { + term.fmt(f)?; + + if term_index < self.terms.len() - 1 { + f.write_str(", ")?; + } + } + + f.write_str(")") } } @@ -77,3 +107,34 @@ impl ProgramComponent for Tuple { todo!() } } + +impl IteratableVariables for Tuple { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms.iter().flat_map(|term| term.variables())) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new(self.terms.iter_mut().flat_map(|term| term.variables_mut())) + } +} + +#[cfg(test)] +mod test { + use crate::rule_model::component::{term::primitive::variable::Variable, IteratableVariables}; + + #[test] + fn tuple_basic() { + let variable = Variable::universal("u"); + let tuple = tuple!(12, variable, !e, "abc", ?v); + + let variables = tuple.variables().cloned().collect::>(); + assert_eq!( + variables, + vec![ + Variable::universal("u"), + Variable::existential("e"), + Variable::universal("v") + ] + ); + } +} diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 4f6ea9e17..09f25439a 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -74,28 +74,25 @@ impl Program { ) { let origin = Origin::External(self.rules.len()); - let rule_builder = RuleBuilder::default().origin(origin); - let mut head_builder = rule_builder.head(); + let mut rule_builder = RuleBuilder::default().origin(origin); // TODO: Implement a normal iterator to avoid cloning for (head_index, head_atom) in head.clone().into_iter().enumerate() { let origin = Origin::External(head_index); if let Literal::Positive(atom) = Self::ast_build_literal(origin, &head_atom) { - head_builder = head_builder.add_atom(atom); + rule_builder.add_head_atom_mut(atom); } else { unreachable!("head must only contain positive atoms") } } - let mut body_builder = head_builder.done().body(); - // TODO: Implement a normal iterator to avoid cloning - for (body_index, body_atom) in head.clone().into_iter().enumerate() { + for (body_index, body_atom) in body.clone().into_iter().enumerate() { let origin = Origin::External(body_index); - body_builder = body_builder.add_literal(Self::ast_build_literal(origin, &body_atom)); + rule_builder.add_body_literal_mut(Self::ast_build_literal(origin, &body_atom)); } - self.rules.push(body_builder.done().finalize()); + self.rules.push(rule_builder.finalize()); } fn ast_build_literal(origin: Origin, atom: &ast::atom::Atom) -> Literal { diff --git a/nemo/src/rule_model/syntax.rs b/nemo/src/rule_model/syntax.rs new file mode 100644 index 000000000..e3b9cad78 --- /dev/null +++ b/nemo/src/rule_model/syntax.rs @@ -0,0 +1,63 @@ +//! Constants for strings that are relevant to the syntax of rules. +//! These are kept in one location, since they are required in various +//! places related to parsing and display. + +/// The "predicate name" used for the CSV format in import/export directives. +pub(crate) const FILE_FORMAT_CSV: &str = "csv"; +/// The "predicate name" used for the DSV format in import/export directives. +pub(crate) const FILE_FORMAT_DSV: &str = "dsv"; +/// The "predicate name" used for the TSV format in import/export directives. +pub(crate) const FILE_FORMAT_TSV: &str = "tsv"; +/// The "predicate name" used for the generic RDF format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_UNSPECIFIED: &str = "rdf"; +/// The "predicate name" used for the Ntriples format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_NTRIPLES: &str = "ntriples"; +/// The "predicate name" used for the NQuads format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_NQUADS: &str = "nquads"; +/// The "predicate name" used for the Turtle format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_TURTLE: &str = "turtle"; +/// The "predicate name" used for the TriG format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_TRIG: &str = "trig"; +/// The "predicate name" used for the RDF/XML format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_XML: &str = "rdfxml"; +/// The "predicate name" used for the json format in import/export directives. +pub(crate) const FILE_FORMAT_JSON: &str = "json"; + +/// Name of the parameter for specifying the resource in import/export directives. +pub(crate) const PARAMETER_NAME_RESOURCE: &str = "resource"; +/// Name of the parameter for specifying the format in import/export directives. +pub(crate) const PARAMETER_NAME_FORMAT: &str = "format"; +/// Name of the parameter for specifying a base IRI in import/export directives. +pub(crate) const PARAMETER_NAME_BASE: &str = "base"; +/// Name of the parameter for specifying a delimiter in import/export directives for delimiter-separated values format. +pub(crate) const PARAMETER_NAME_DSV_DELIMITER: &str = "delimiter"; +/// Name of the parameter for specifying the compression in import/export directives. +pub(crate) const PARAMETER_NAME_COMPRESSION: &str = "compression"; +/// Name of the parameter for specifying the limit in import/export directives. +pub(crate) const PARAMETER_NAME_LIMIT: &str = "limit"; + +/// The name of the general, best-effort value format. Importers/exporters suporting this format will usually +/// accept "any" input value and interpret it in the most natural way. Likewise, any value should be writable +/// in this format. +pub(crate) const VALUE_FORMAT_ANY: &str = "any"; +/// The name of the value format that interprets all values as plain strings. Importers/exporters suporting this +/// format will usually accept any input value and interpret it as strings in the most literal way. Only strings +/// can be written in this format. +pub(crate) const VALUE_FORMAT_STRING: &str = "string"; +/// The name of the value format that interprets values as integers whenever possible. Importers/exporters suporting +/// this format will usually only accept input values that are formatted like integers. Conversely, only integer values +/// can be written in this format. +pub(crate) const VALUE_FORMAT_INT: &str = "int"; +/// The name of the value format that interprets values as double-precision floating point numbers whenever possible. +/// Importers/exporters suporting this format will usually only accept input values that are formatted like decimal numbers, +/// integers, or floating-point numbers in scientific notation. Conversely, only double values +/// can be written in this format. +pub(crate) const VALUE_FORMAT_DOUBLE: &str = "double"; +/// The name of the special value format that indicates that a vlaue should be ignored altogether. +/// The respective column/parameter will be skiped in reading/writing. +pub(crate) const VALUE_FORMAT_SKIP: &str = "skip"; + +/// The name of the compression format that means "no compression". +pub(crate) const VALUE_COMPRESSION_NONE: &str = "none"; +/// The name of the compression format that means "no compression". +pub(crate) const VALUE_COMPRESSION_GZIP: &str = "gzip"; diff --git a/nemo/src/rule_model/util.rs b/nemo/src/rule_model/util.rs new file mode 100644 index 000000000..e9bb7d261 --- /dev/null +++ b/nemo/src/rule_model/util.rs @@ -0,0 +1,32 @@ +//! This module collects miscellaneous functionality for the rule model. + +/// Macro that parses individual [super::component::term::Term]s +#[macro_export] +macro_rules! term_list { + // Base case + () => {}; + // Match a single universally quantified variable + ($terms:ident; ? $var:ident) => { + $terms.push(crate::rule_model::component::term::Term::universal_variable(stringify!($var))); + }; + // Match universally quantified variables + ($terms:ident; ? $var:ident, $($others:tt)* ) => { + $terms.push(crate::rule_model::component::term::Term::universal_variable(stringify!($var))); term_list!($terms; $($others)*) + }; + // Match a single existentially quantified variable + ($terms:ident; ! $var:ident) => { + $terms.push(crate::rule_model::component::term::Term::existential_variable(stringify!($var))); + }; + // Match existentially quantified variables + ($terms:ident; ! $var:ident, $($others:tt)* ) => { + $terms.push(crate::rule_model::component::term::Term::existential_variable(stringify!($var))); term_list!($terms; $($others)*) + }; + // Match a single occurence of anything + ($terms:ident; $e:tt) => { + $terms.push(crate::rule_model::component::term::Term::from($e)); + }; + // Match a list of anything + ($terms:ident; $e:tt, $($others:tt)* ) => { + $terms.push(crate::rule_model::component::term::Term::from($e)); term_list!($terms; $($others)*) + }; +} From 303b14deaaf7669c39a2881065774df8333cd039 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 9 Jul 2024 16:34:11 +0200 Subject: [PATCH 114/214] Export macros --- nemo/src/rule_model/component.rs | 1 + nemo/src/rule_model/component/term.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/component.rs index 0e151ba7e..272abf69b 100644 --- a/nemo/src/rule_model/component.rs +++ b/nemo/src/rule_model/component.rs @@ -1,5 +1,6 @@ //! This module defines the logical components that make up a program. +#[macro_use] pub mod atom; pub mod base; pub mod fact; diff --git a/nemo/src/rule_model/component/term.rs b/nemo/src/rule_model/component/term.rs index e228dd46e..0c06ec120 100644 --- a/nemo/src/rule_model/component/term.rs +++ b/nemo/src/rule_model/component/term.rs @@ -1,9 +1,14 @@ //! This module defines [Term]. +#[macro_use] pub mod aggregate; +#[macro_use] pub mod function; +#[macro_use] pub mod map; +#[macro_use] pub mod operation; +#[macro_use] pub mod primitive; #[macro_use] pub mod tuple; From a2e103fbe777ed70a5905741768c77f66ee52f42 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 10 Jul 2024 13:09:19 +0200 Subject: [PATCH 115/214] Include error messages for program construction errors --- nemo/src/rule_model/error.rs | 59 +++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 236195a5e..f9a0ec9aa 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -2,23 +2,54 @@ use thiserror::Error; -use super::component::{ - atom::Atom, - fact::Fact, - term::{primitive::variable::Variable, Term}, -}; +use super::component::term::{aggregate::Aggregate, primitive::variable::Variable, Term}; /// Error returned during the construction of objects from nemo's logical rule model #[derive(Error, Debug)] pub enum ProgramConstructionError { - #[error("variable \"{0}\" has an invalid name")] + /// An existentially quantified variable occurs in the body of a rule. + #[error(r#"existential variable used in rule body: `{0}`"#)] + BodyExistential(Variable), + /// Unsafe variable used in the head of the rule. + #[error(r#"unsafe variable used in rule head: `{0}`"#)] + HeadUnsafe(Variable), + /// Anonymous variable used in the head of the rule. + #[error(r#"anonymous variable used in rule head"#)] + HeadAnonymous, + /// Operation with unsafe variable + #[error(r#"unsafe variable used in computation: `{0}`"#)] + OperationUnsafe(Variable), + /// Unsafe variable used in multiple negative literals + #[error(r#"unsafe variable used in multiple negative literals: `{0}`"#)] + MultipleNegativeLiteralsUnsafe(Variable), + /// Aggregate is used in body + #[error(r#"aggregate used in rule body: `{0}`"#)] + BodyAggregate(Aggregate), + /// Unsupported feature: Multiple aggregates in one rule + #[error(r#"multiple aggregates in one rule is currently unsupported"#)] + AggregateMultiple, + /// Unsupported feature: Aggregates combined with existential rules + #[error(r#"aggregates and existential variables in one rule is currently unsupported"#)] + AggregatesAndExistentials, + /// A variable is both universally and existentially quantified + #[error(r#"variable is both universal and existential: `{0}`"#)] + VariableMultipleQuantifiers(String), + /// Fact contains non-ground term + #[error(r#"non-ground term used in fact: `{0}`"#)] + FactNonGround(Term), + /// Atom used without any arguments + #[error(r#"atoms without arguments are currently unsupported"#)] + AtomNoArguments, + /// Non-primitive terms are currently unsupported + #[error(r#"complex terms are currently unsupported"#)] + ComplexTerm, + /// Invalid variable name was used + #[error(r#"variable name is invalid: `{0}`"#)] InvalidVariableName(String), - #[error("term \"{0}\" has an invalid name")] - InvalidIdentifier(String), - #[error("atom \"{0}\" has an invalid name")] - InvalidAtomName(String), - #[error("fact {0} contains ")] - NonGroundFact(Fact), - #[error("parse error")] // TODO: Return parser error here - ParseError, + /// Invalid tag was used + #[error(r#"tag is invalid: `{0}`"#)] + InvalidTermTag(String), + /// Invalid predicate name was used + #[error(r#"predicate name is invalid: `{0}"#)] + InvalidPredicateName(String), } From 401e9d075cdccffcdcc4d084b274a617b461111e Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 10 Jul 2024 15:42:11 +0200 Subject: [PATCH 116/214] Change Fact Statement and parsing of facts --- nemo/src/io/parser.rs | 111 ++++++++++------------------ nemo/src/io/parser/ast/statement.rs | 73 +++++++++++++++++- nemo/src/rule_model/program.rs | 2 +- 3 files changed, 110 insertions(+), 76 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 8fc1225b5..dfc9e0129 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2782,7 +2782,7 @@ pub mod new { // dbg!(&input.parser_state.labels); context( Context::Fact, - tuple((opt(lex_doc_comment), parse_normal_atom, wsoc0, dot)), + tuple((opt(lex_doc_comment), parse_fact_atom, wsoc0, dot)), )(input) .map(|(rest_input, (doc_comment, atom, _ws, dot))| { ( @@ -2790,13 +2790,30 @@ pub mod new { Statement::Fact { span: outer_span(input.input, rest_input.input), doc_comment, - atom, + fact: atom, dot, }, ) }) } + fn parse_fact_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, + >( + input: Input<'a, 's>, + ) -> IResult, Fact<'a>, E> { + // TODO: Add Context + match parse_named_tuple::(input) { + Ok((rest_input, named_tuple)) => Ok((rest_input, Fact::NamedTuple(named_tuple))), + Err(_) => match parse_map::(input) { + Ok((rest_input, map)) => Ok((rest_input, Fact::Map(map))), + Err(err) => Err(err), + }, + } + } + /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` fn parse_rule<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, @@ -3108,9 +3125,13 @@ pub mod new { move |input: Input<'a, 's>| { context( Context::List, - pair(parse_t, many0(tuple((wsoc0, comma, wsoc0, parse_t)))), + tuple(( + parse_t, + many0(tuple((wsoc0, comma, wsoc0, parse_t))), + pair(wsoc0, opt(comma)), + )), )(input) - .map(|(rest_input, (first, rest))| { + .map(|(rest_input, (first, rest, (_, trailing_comma)))| { ( rest_input, List { @@ -3125,6 +3146,7 @@ pub mod new { .collect(), ) }, + trailing_comma, }, ) }) @@ -3304,43 +3326,10 @@ pub mod new { parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) } - // /// Parse a pair list of the form `key1 = value1, key2 = value2, …`. - // fn parse_pair_list<'a, 's, E: ParseError> + ContextError, Context>>( - // input: Input<'a, 's>, - // state: Errors, - // ) -> IResult, Option, Term<'a>>>>, E> { - // context( - // "parse pair list", - // opt(pair( - // parse_pair, - // many0(tuple(( - // opt(lex_whitespace), - // comma, - // opt(lex_whitespace), - // parse_pair, - // ))), - // )), - // )(input) - // .map(|(rest_input, pair_list)| { - // if let Some((first, rest)) = pair_list { - // ( - // rest_input, - // Some(List { - // span: outer_span(input, rest_input), - // first, - // rest: if rest.is_empty() { None } else { Some(rest) }, - // }), - // ) - // } else { - // (rest_input, None) - // } - // }) - // } - /// Parse a pair of the form `key = value`. fn parse_pair<'a, 's, E: ParseError> + ContextError, Context>>( input: Input<'a, 's>, - ) -> IResult, Pair<'a, Term<'a>, Term<'a>>, E> { + ) -> IResult, Pair<'a>, E> { context( Context::Pair, tuple((parse_term, wsoc0, equal, wsoc0, parse_term)), @@ -3358,35 +3347,6 @@ pub mod new { }) } - // /// Parse a list of terms of the form `term1, term2, …`. - // fn parse_term_list<'a, 's, E: ParseError> + ContextError, Context>>( - // input: Input<'a, 's>, - // state: Errors, - // ) -> IResult, List<'a, Term<'a>>, E> { - // context( - // "parse term list", - // pair( - // parse_term, - // many0(tuple(( - // opt(lex_whitespace), - // comma, - // opt(lex_whitespace), - // parse_term, - // ))), - // ), - // )(input) - // .map(|(rest_input, (first, rest))| { - // ( - // rest_input, - // List { - // span: outer_span(input, rest_input), - // first, - // rest: if rest.is_empty() { None } else { Some(rest) }, - // }, - // ) - // }) - // } - /// Parse a term. A term can be a primitive value (constant, number, string, …), /// a variable (universal or existential), a map, a function (-symbol), an arithmetic /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. @@ -3878,7 +3838,7 @@ pub mod new { statements: vec![Statement::Fact { span: s!(0, 1, "a(B,C)."), doc_comment: None, - atom: Atom::Positive(NamedTuple { + fact: Fact::NamedTuple(NamedTuple { span: s!(0, 1, "a(B,C)"), identifier: s!(0, 1, "a"), tuple: Tuple { @@ -3891,6 +3851,7 @@ pub mod new { s!(3, 1, ","), Term::Primitive(Primitive::Constant(s!(4, 1, "C"),)), )]), + trailing_comma: None, }), close_paren: s!(5, 1, ")"), } @@ -3964,6 +3925,7 @@ pub mod new { ),)), }, rest: None, + trailing_comma: None, }), close_brace: s!(134, 1, "}"), }, @@ -3994,6 +3956,7 @@ pub mod new { (s!(162, 1, ","), s!(164, 1, "b"),), (s!(165, 1, ","), s!(167, 1, "c"),), ]), + trailing_comma: None, }), dot: s!(168, 1, "."), }), @@ -4036,7 +3999,7 @@ pub mod new { Statement::Fact { span: s!(0, 1, "some(Fact, with, whitespace) ."), doc_comment: None, - atom: Atom::Positive(NamedTuple { + fact: Fact::NamedTuple(NamedTuple { span: s!(0, 1, "some(Fact, with, whitespace)"), identifier: s!(0, 1, "some"), tuple: Tuple { @@ -4063,6 +4026,7 @@ pub mod new { ))), ), ]), + trailing_comma: None, }), close_paren: s!(27, 1, ")"), } @@ -4525,7 +4489,8 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters } )), }, - rest: None + rest: None, + trailing_comma: None, }), close_paren: T! {CloseParen, 13,1,")"}, }))), @@ -4541,7 +4506,8 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters }), }), }, - rest: None + rest: None, + trailing_comma: None, }), close_paren: T!(CloseParen, 16, 1, ")") }))), @@ -4640,6 +4606,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters })), }, rest: None, + trailing_comma: None, }), close_paren: T! {CloseParen, 12,1,")"}, }))), @@ -4878,7 +4845,7 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters // TODO: Instead of just checking for errors, this should compare the created AST #[test] fn parse_trailing_comma() { - let test_string = "head(?X) :- body( (2,), (3, 4, ), ?X) ."; // should allow for spaces + let test_string = "head(?X) :- body( (2 ,), (3, 4 , ), ?X) ."; // should allow for spaces let input = Span::new(&test_string); let refcell = RefCell::new(Vec::new()); let parser_state = ParserState { errors: &refcell }; diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs index c9420ec06..4547c2b91 100644 --- a/nemo/src/io/parser/ast/statement.rs +++ b/nemo/src/io/parser/ast/statement.rs @@ -2,6 +2,8 @@ use tower_lsp::lsp_types::SymbolKind; use super::atom::Atom; use super::directive::Directive; +use super::map::Map; +use super::named_tuple::NamedTuple; use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; use crate::io::lexer::{Span, Token}; use ascii_tree::write_tree; @@ -12,7 +14,7 @@ pub enum Statement<'a> { Fact { span: Span<'a>, doc_comment: Option>, - atom: Atom<'a>, + fact: Fact<'a>, dot: Span<'a>, }, Rule { @@ -32,7 +34,7 @@ impl AstNode for Statement<'_> { Statement::Directive(directive) => Some(vec![directive]), Statement::Fact { doc_comment, - atom, + fact: atom, dot, .. } => { @@ -77,7 +79,7 @@ impl AstNode for Statement<'_> { } } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } @@ -131,3 +133,68 @@ impl std::fmt::Display for Statement<'_> { write!(f, "{output}") } } + +#[derive(Debug, Clone, PartialEq)] +pub enum Fact<'a> { + NamedTuple(NamedTuple<'a>), + Map(Map<'a>), +} +impl AstNode for Fact<'_> { + fn children(&self) -> Option> { + match self { + Fact::NamedTuple(named_tuple) => named_tuple.children(), + Fact::Map(map) => map.children(), + } + } + + fn span(&self) -> Span { + match self { + Fact::NamedTuple(named_tuple) => named_tuple.span(), + Fact::Map(map) => map.span(), + } + } + + fn is_leaf(&self) -> bool { + match self { + Fact::NamedTuple(named_tuple) => named_tuple.is_leaf(), + Fact::Map(map) => map.is_leaf(), + } + } + + fn name(&self) -> String { + match self { + Fact::NamedTuple(named_tuple) => named_tuple.name(), + Fact::Map(map) => map.name(), + } + } + + fn lsp_identifier(&self) -> Option<(String, String)> { + match self { + Fact::NamedTuple(named_tuple) => named_tuple.lsp_identifier(), + Fact::Map(map) => map.lsp_identifier(), + } + } + + fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { + match self { + Fact::NamedTuple(named_tuple) => named_tuple.lsp_symbol_info(), + Fact::Map(map) => map.lsp_symbol_info(), + } + } + + fn lsp_range_to_rename(&self) -> Option { + match self { + Fact::NamedTuple(named_tuple) => named_tuple.lsp_range_to_rename(), + Fact::Map(map) => map.lsp_range_to_rename(), + } + } +} + +impl std::fmt::Display for Fact<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Fact::NamedTuple(named_tuple) => named_tuple.fmt(f), + Fact::Map(map) => map.fmt(f), + } + } +} diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 2c37e4c67..e71f03ffe 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -53,7 +53,7 @@ impl Program { ast::statement::Statement::Fact { span, doc_comment, - atom, + fact: atom, dot, } => todo!(), ast::statement::Statement::Rule { head, body, .. } => { From 0a38fa72203231ca8cd25f5c0d7ffec982b5be41 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 10 Jul 2024 15:43:49 +0200 Subject: [PATCH 117/214] Small changes --- nemo/src/io/lexer.rs | 4 +-- nemo/src/io/parser/ast.rs | 40 +++++++++++++++++++++------ nemo/src/io/parser/ast/atom.rs | 2 +- nemo/src/io/parser/ast/directive.rs | 2 +- nemo/src/io/parser/ast/map.rs | 16 +++++------ nemo/src/io/parser/ast/named_tuple.rs | 2 +- nemo/src/io/parser/ast/program.rs | 4 ++- nemo/src/io/parser/ast/term.rs | 6 ++-- nemo/src/io/parser/ast/tuple.rs | 2 +- 9 files changed, 51 insertions(+), 27 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index 00b91e7d9..f8508474f 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -170,7 +170,7 @@ impl AstNode for Span<'_> { *self } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { true } @@ -399,7 +399,7 @@ impl<'a> AstNode for Token<'a> { self.span } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { true } diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs index 8fd95636a..be1133f24 100644 --- a/nemo/src/io/parser/ast.rs +++ b/nemo/src/io/parser/ast.rs @@ -1,3 +1,6 @@ +//! This module contains the (abstract) syntax tree, generated from the parser. + + use tower_lsp::lsp_types::SymbolKind; use crate::io::lexer::Span; @@ -13,10 +16,13 @@ pub(crate) mod term; pub(crate) mod tuple; pub(crate) mod named_tuple; +/// All AST nodes have to implement this trait so you can get all children recursively. pub trait AstNode: std::fmt::Debug + Display + Sync { + /// Return all children of an AST node. fn children(&self) -> Option>; + /// Return the `LocatedSpan` of the AST node. fn span(&self) -> Span; - + /// Convert the `LocatedSpan` into a range of positions. fn range(&self) -> Range { let span = self.span(); @@ -42,9 +48,11 @@ pub trait AstNode: std::fmt::Debug + Display + Sync { } } - // FIXME: With the removal of tokens is this method still usefull and/or should be renamed? - fn is_token(&self) -> bool; + // FIXME: With the removal of tokens is this method still usefull? + /// Indicates whether the current AST node is a leaf and has no children. + fn is_leaf(&self) -> bool; + /// Return a formatted String for use in printing the AST. fn name(&self) -> String; /// Returns an optional pair of the identfier and identifier scope. @@ -59,10 +67,14 @@ pub trait AstNode: std::fmt::Debug + Display + Sync { fn lsp_range_to_rename(&self) -> Option; } +/// `Position` contains the offset in the source and the line and column information. #[derive(Debug, Clone, Copy, Hash)] pub struct Position { + /// Offset in the source. pub offset: usize, + /// Line number pub line: u32, + /// Column number pub column: u32, } impl PartialEq for Position { @@ -92,16 +104,19 @@ impl Default for Position { } #[derive(Debug, Clone, Copy, Hash)] +/// A Range with start and end `Position`s. pub struct Range { + /// Start position pub start: Position, + /// End position pub end: Position, } /// Whitespace or Comment token #[derive(Debug, Clone, PartialEq)] pub struct Wsoc<'a> { - pub span: Span<'a>, - pub token: Vec>, + pub(crate) span: Span<'a>, + pub(crate) token: Vec>, } impl AstNode for Wsoc<'_> { fn children(&self) -> Option> { @@ -117,7 +132,7 @@ impl AstNode for Wsoc<'_> { self.span } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } @@ -155,6 +170,7 @@ pub struct List<'a, T> { pub first: T, // (,T)* pub rest: Option, T)>>, + pub trailing_comma: Option>, } impl<'a, T> List<'a, T> { pub fn to_item_vec(&'a self) -> Vec<&'a T> { @@ -202,7 +218,7 @@ impl AstNode for List<'_, T> { self.span } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } @@ -252,7 +268,7 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { let mut vec = Vec::new(); if let Some(children) = node.children() { for child in children { - if child.is_token() { + if child.is_leaf() { vec.push(Tree::Leaf(vec![format!("\x1b[93m{:?}\x1b[0m", child.name())])); } else { vec.push(ast_to_ascii_tree(child)); @@ -264,6 +280,7 @@ pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { mod test { use named_tuple::NamedTuple; + use statement::Fact; use super::*; use super::{ @@ -324,7 +341,7 @@ mod test { doc_comment: Some( s!(184,7,"%% This is just an example predicate.\n") ), - atom: Atom::Positive(NamedTuple { + fact: Fact::NamedTuple(NamedTuple { span: s!(222,8,"somePredicate(ConstA, ConstB)"), identifier: s!(222, 8, "somePredicate"), tuple: Tuple { @@ -341,6 +358,7 @@ mod test { Term::Primitive(Primitive::Constant( s!(244, 8, "ConstB"), )), )]), + trailing_comma: None, }), close_paren: s!(250,8,")") } @@ -368,11 +386,13 @@ mod test { first: Term::UniversalVariable( s!(304, 12, "?VarA"), ), rest: None, + trailing_comma: None, }), close_paren: s!(309,12,")") , } }), rest: None, + trailing_comma: None, }, arrow: s!(311,12,":-"), body: List { @@ -393,11 +413,13 @@ mod test { Term::Primitive(Primitive::Constant(s!(335, 12, "ConstB"), )), )]), + trailing_comma: None, }), close_paren: s!(341, 12,")") , } }), rest: None, + trailing_comma: None, }, dot: s!(342, 12,"."), }, diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs index cc0217366..13a2c5922 100644 --- a/nemo/src/io/parser/ast/atom.rs +++ b/nemo/src/io/parser/ast/atom.rs @@ -64,7 +64,7 @@ impl AstNode for Atom<'_> { } } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs index 4ccf406dd..25512581e 100644 --- a/nemo/src/io/parser/ast/directive.rs +++ b/nemo/src/io/parser/ast/directive.rs @@ -146,7 +146,7 @@ impl AstNode for Directive<'_> { } } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs index 143640d54..6eec78e52 100644 --- a/nemo/src/io/parser/ast/map.rs +++ b/nemo/src/io/parser/ast/map.rs @@ -11,7 +11,7 @@ pub struct Map<'a> { pub span: Span<'a>, pub identifier: Option>, pub open_brace: Span<'a>, - pub pairs: Option, Term<'a>>>>, + pub pairs: Option>>, pub close_brace: Span<'a>, } impl AstNode for Map<'_> { @@ -32,7 +32,7 @@ impl AstNode for Map<'_> { self.span } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } @@ -62,13 +62,13 @@ impl std::fmt::Display for Map<'_> { } #[derive(Debug, Clone, PartialEq)] -pub struct Pair<'a, K, V> { +pub struct Pair<'a> { pub span: Span<'a>, - pub key: K, + pub key: Term<'a>, pub equal: Span<'a>, - pub value: V, + pub value: Term<'a>, } -impl AstNode for Pair<'_, K, V> { +impl AstNode for Pair<'_> { fn children(&self) -> Option> { let mut vec: Vec<&dyn AstNode> = Vec::new(); vec.push(&self.key); @@ -81,7 +81,7 @@ impl AstNode for Pair<'_, K, V> { self.span } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } @@ -106,7 +106,7 @@ impl AstNode for Pair<'_, K, V> { Some((String::from("Pair"), SymbolKind::ARRAY)) } } -impl std::fmt::Display for Pair<'_, K, V> { +impl std::fmt::Display for Pair<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); write_tree(&mut output, &ast_to_ascii_tree(self))?; diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index ef14633b1..fa8c2895d 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -28,7 +28,7 @@ impl AstNode for NamedTuple<'_> { self.span } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs index 7b080bcab..55dcf97f7 100644 --- a/nemo/src/io/parser/ast/program.rs +++ b/nemo/src/io/parser/ast/program.rs @@ -1,3 +1,5 @@ +//! The root node of an AST, representing a nemo program. + use tower_lsp::lsp_types::SymbolKind; use super::{ast_to_ascii_tree, statement::Statement, AstNode, Position, Range}; @@ -29,7 +31,7 @@ impl AstNode for Program<'_> { self.span } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index 52671a29a..f93ac833b 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -95,7 +95,7 @@ impl AstNode for Term<'_> { } } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } @@ -284,7 +284,7 @@ impl AstNode for Primitive<'_> { } } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } @@ -352,7 +352,7 @@ impl AstNode for Exponent<'_> { todo!() } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { todo!() } diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs index 1442632c7..1fcab841c 100644 --- a/nemo/src/io/parser/ast/tuple.rs +++ b/nemo/src/io/parser/ast/tuple.rs @@ -28,7 +28,7 @@ impl AstNode for Tuple<'_> { self.span } - fn is_token(&self) -> bool { + fn is_leaf(&self) -> bool { false } From 2a718262dd96f6d9cb50190a343b5d45b3659cb4 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 10 Jul 2024 18:04:01 +0200 Subject: [PATCH 118/214] Remove old parser --- nemo/src/api.rs | 15 +- nemo/src/io/formats/dsv_value_format.rs | 16 +- nemo/src/io/lexer.rs | 21 +- nemo/src/io/parser.rs | 9179 +++++++++++------------ nemo/src/io/parser/ast/term.rs | 17 +- nemo/src/io/parser/types.rs | 14 +- nemo/src/io/resource_providers.rs | 7 +- nemo/src/model/rule_model/rule.rs | 3 +- nemo/src/rule_model/component/fact.rs | 5 + nemo/src/rule_model/program.rs | 86 +- 10 files changed, 4639 insertions(+), 4724 deletions(-) diff --git a/nemo/src/api.rs b/nemo/src/api.rs index 268efc8ca..ef5640814 100644 --- a/nemo/src/api.rs +++ b/nemo/src/api.rs @@ -25,13 +25,10 @@ use std::{fs::read_to_string, path::PathBuf}; use crate::{ error::{Error, ReadingError}, - execution::{DefaultExecutionEngine, ExecutionEngine}, - io::{ - parser::{all_input_consumed, RuleParser}, - resource_providers::ResourceProviders, - ImportManager, - }, + execution::DefaultExecutionEngine, + io::parser::parse_program_str, model::Identifier, + rule_model::program::Program, }; /// Reasoning Engine exposed by the API @@ -55,8 +52,10 @@ pub fn load(file: PathBuf) -> Result { /// # Error /// Returns an appropriate [Error] variant on parsing and feature check issues. pub fn load_string(input: String) -> Result { - let program = all_input_consumed(RuleParser::new().parse_program())(&input)?; - ExecutionEngine::initialize(&program, ImportManager::new(ResourceProviders::default())) + let (ast, _errors) = parse_program_str(&input); + let _program = Program::from_ast(ast); + todo!("ExecutionEngine has to use the new rule model") + // ExecutionEngine::initialize(&program, ImportManager::new(ResourceProviders::default())) } /// Executes the reasoning process of the [Engine]. diff --git a/nemo/src/io/formats/dsv_value_format.rs b/nemo/src/io/formats/dsv_value_format.rs index 0016272a2..7720899cf 100644 --- a/nemo/src/io/formats/dsv_value_format.rs +++ b/nemo/src/io/formats/dsv_value_format.rs @@ -6,13 +6,12 @@ use oxiri::Iri; use nemo_physical::datavalues::{AnyDataValue, DataValue, DataValueCreationError}; +use crate::io::lexer::ParserState; +use crate::io::parser::types::Input; use crate::model::{ VALUE_FORMAT_ANY, VALUE_FORMAT_DOUBLE, VALUE_FORMAT_INT, VALUE_FORMAT_SKIP, VALUE_FORMAT_STRING, }; -use crate::{ - io::parser::{parse_bare_name, span_from_str}, - model::FileFormat, -}; +use crate::{io::lexer::lex_tag, model::FileFormat}; use super::import_export::ImportExportError; @@ -145,9 +144,12 @@ impl DsvValueFormat { _ => {} } - // Check if it's a valid bare name - if let Ok((remainder, _)) = parse_bare_name(span_from_str(input)) { - if remainder.is_empty() { + // Check if it's a valid tag name + let refcell = std::cell::RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + if let Ok((remainder, _)) = lex_tag::>(Input::new(input, parser_state)) + { + if remainder.input.is_empty() { return Ok(AnyDataValue::new_iri(input.to_string())); } } diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index f8508474f..eca3cff25 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -2,7 +2,7 @@ use std::{cell::RefCell, ops::Range}; -use super::parser::new::context; +use super::parser::context; use nom::{ branch::alt, bytes::complete::{is_not, tag, take, take_till}, @@ -538,26 +538,19 @@ where )(input) } -pub(crate) fn lex_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> +/// This function lexes the name of a predicate or a map, called the tag of the predicate/map. +pub(crate) fn lex_tag<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> where E: ParseError> + ContextError, Context>, { - let (rest_input, ident) = context( + context( Context::Identifier, recognize(pair( alpha1, many0(alt((alphanumeric1, tag("_"), tag("-")))), )), - )(input)?; - let token = match *ident.input.fragment() { - "base" => ident.input, - "prefix" => ident.input, - "import" => ident.input, - "export" => ident.input, - "output" => ident.input, - _ => ident.input, - }; - Ok((rest_input, token)) + )(input) + .map(|(rest_input, ident)| (rest_input, ident.input)) } pub(crate) fn lex_prefixed_ident<'a, 's, E>( @@ -566,7 +559,7 @@ pub(crate) fn lex_prefixed_ident<'a, 's, E>( where E: ParseError> + ContextError, Context>, { - recognize(tuple((opt(lex_ident), colon, lex_ident)))(input) + recognize(tuple((opt(lex_tag), colon, lex_tag)))(input) .map(|(rest_input, prefixed_ident)| (rest_input, prefixed_ident.input)) } diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index dfc9e0129..0b6e1022e 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -1,4048 +1,3988 @@ //! A parser for rulewerk-style rules. -use std::{cell::RefCell, collections::HashMap, fmt::Debug}; +// use std::{cell::RefCell, collections::HashMap, fmt::Debug}; + +// use crate::{ +// error::Error, +// io::parser::types::{ArithmeticOperator, BodyExpression}, +// model::*, +// }; +// use nemo_physical::datavalues::{ +// AnyDataValue, DataValueCreationError, MapDataValue, TupleDataValue, +// }; +// use nom::{ +// branch::alt, +// bytes::complete::{is_not, tag}, +// character::complete::{alpha1, digit1, multispace1, satisfy}, +// combinator::{all_consuming, cut, map, map_res, opt, recognize, value}, +// multi::{many0, many1, separated_list0, separated_list1}, +// sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, +// Err, +// }; + +// use macros::traced; -use crate::{ - error::Error, - io::parser::types::{ArithmeticOperator, BodyExpression}, - model::*, -}; -use nemo_physical::datavalues::{ - AnyDataValue, DataValueCreationError, MapDataValue, TupleDataValue, -}; +pub mod ast; +pub(crate) mod types; + +use ast::atom::Atom; +use ast::directive::Directive; +use ast::map::{Map, Pair}; +use ast::named_tuple::NamedTuple; +use ast::program::Program; +use ast::statement::{Fact, Statement}; +use ast::term::{Exponent, Primitive, Term}; +use ast::tuple::Tuple; +use ast::{List, Position, Wsoc}; +use types::Input; +// use types::{ConstraintOperator, IntermediateResult, Span}; +// pub(crate) mod iri; +// pub(crate) mod rfc5234; +// pub(crate) mod sparql; +// pub(crate) mod turtle; +// pub use types::{span_from_str, LocatedParseError, ParseError, ParseResult}; +pub use types::LocatedParseError; + +// /// Parse a program in the given `input`-String and return a [Program]. +// /// +// /// The program will be parsed and checked for unsupported features. +// /// +// /// # Error +// /// Returns an appropriate [Error] variant on parsing and feature check issues. +// pub fn parse_program(input: impl AsRef) -> Result { +// let program = all_input_consumed(RuleParser::new().parse_program())(input.as_ref())?; +// Ok(program) +// } + +// /// Parse a single fact in the given `input`-String and return a [Program]. +// /// +// /// The program will be parsed and checked for unsupported features. +// /// +// /// # Error +// /// Returns an appropriate [Error] variant on parsing and feature check issues. +// pub fn parse_fact(mut input: String) -> Result { +// input += "."; +// let fact = all_input_consumed(RuleParser::new().parse_fact())(input.as_str())?; +// Ok(fact) +// } + +// /// A combinator to add tracing to the parser. +// /// [fun] is an identifier for the parser and [parser] is the actual parser. +// #[inline(always)] +// fn traced<'a, T, P>( +// fun: &'static str, +// mut parser: P, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> +// where +// T: Debug, +// P: FnMut(Span<'a>) -> IntermediateResult<'a, T>, +// { +// move |input| { +// log::trace!(target: "parser", "{fun}({input:?})"); +// let result = parser(input); +// log::trace!(target: "parser", "{fun}({input:?}) -> {result:?}"); +// result +// } +// } + +// /// A combinator that makes sure all input has been consumed. +// pub fn all_input_consumed<'a, T: 'a>( +// parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, +// ) -> impl FnMut(&'a str) -> Result + 'a { +// let mut p = all_consuming(parser); +// move |input| { +// let input = Span::new(input); +// p(input).map(|(_, result)| result).map_err(|e| match e { +// Err::Incomplete(e) => ParseError::MissingInput(match e { +// nom::Needed::Unknown => "expected an unknown amount of further input".to_string(), +// nom::Needed::Size(size) => format!("expected at least {size} more bytes"), +// }) +// .at(input), +// Err::Error(e) | Err::Failure(e) => e, +// }) +// } +// } + +// /// A combinator that recognises a comment, starting at a `%` +// /// character and ending at the end of the line. +// pub fn comment(input: Span) -> IntermediateResult<()> { +// alt(( +// value((), pair(tag("%"), is_not("\n\r"))), +// // a comment that immediately precedes the end of the line – +// // this must come after the normal line comment above +// value((), tag("%")), +// ))(input) +// } + +// /// A combinator that recognises an arbitrary amount of whitespace and +// /// comments. +// pub fn multispace_or_comment0(input: Span) -> IntermediateResult<()> { +// value((), many0(alt((value((), multispace1), comment))))(input) +// } + +// /// A combinator that recognises any non-empty amount of whitespace +// /// and comments. +// pub fn multispace_or_comment1(input: Span) -> IntermediateResult<()> { +// value((), many1(alt((value((), multispace1), comment))))(input) +// } + +// /// A combinator that modifies the associated error. +// pub fn map_error<'a, T: 'a>( +// mut parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, +// mut error: impl FnMut() -> ParseError + 'a, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a { +// move |input| { +// parser(input).map_err(|e| match e { +// Err::Incomplete(_) => e, +// Err::Error(context) => { +// let mut err = error().at(input); +// err.append(context); +// Err::Error(err) +// } +// Err::Failure(context) => { +// let mut err = error().at(input); +// err.append(context); +// Err::Failure(err) +// } +// }) +// } +// } + +// /// A combinator that creates a parser for a specific token. +// pub fn token<'a>(token: &'a str) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// map_error(tag(token), || ParseError::ExpectedToken(token.to_string())) +// } + +// /// A combinator that creates a parser for a specific token, +// /// surrounded by whitespace or comments. +// pub fn space_delimited_token<'a>( +// token: &'a str, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// map_error( +// delimited(multispace_or_comment0, tag(token), multispace_or_comment0), +// || ParseError::ExpectedToken(token.to_string()), +// ) +// } + +// /// Expand a prefix. +// fn resolve_prefix<'a>( +// prefixes: &'a HashMap<&'a str, &'a str>, +// prefix: &'a str, +// ) -> Result<&'a str, ParseError> { +// prefixes +// .get(prefix) +// .copied() +// .ok_or_else(|| ParseError::UndeclaredPrefix(prefix.to_string())) +// } + +// /// Expand a prefixed name. +// fn resolve_prefixed_name( +// prefixes: &HashMap<&str, &str>, +// name: sparql::Name, +// ) -> Result { +// match name { +// sparql::Name::IriReference(iri) => Ok(iri.to_string()), +// sparql::Name::PrefixedName { prefix, local } => { +// resolve_prefix(prefixes, prefix).map(|iri| format!("{iri}{local}")) +// } +// sparql::Name::BlankNode(label) => Ok(format!("_:{label}")), +// } +// } + +// /// Resolve prefixes in a [turtle::RdfLiteral]. +// fn resolve_prefixed_rdf_literal( +// prefixes: &HashMap<&str, &str>, +// literal: turtle::RdfLiteral, +// ) -> Result { +// match literal { +// turtle::RdfLiteral::LanguageString { value, tag } => Ok( +// AnyDataValue::new_language_tagged_string(value.to_string(), tag.to_string()), +// ), +// turtle::RdfLiteral::DatatypeValue { value, datatype } => { +// AnyDataValue::new_from_typed_literal( +// value.to_string(), +// resolve_prefixed_name(prefixes, datatype) +// .expect("prefix should have been registered during parsing"), +// ) +// } +// } +// } + +// #[traced("parser")] +// pub(crate) fn parse_bare_name(input: Span<'_>) -> IntermediateResult> { +// map_error( +// recognize(pair( +// alpha1, +// opt(many1(satisfy(|c| { +// ['0'..='9', 'a'..='z', 'A'..='Z', '-'..='-', '_'..='_'] +// .iter() +// .any(|range| range.contains(&c)) +// }))), +// )), +// || ParseError::ExpectedBareName, +// )(input) +// } + +// #[traced("parser")] +// fn parse_simple_name(input: Span<'_>) -> IntermediateResult> { +// map_error( +// recognize(pair( +// alpha1, +// opt(preceded( +// many0(tag(" ")), +// separated_list1( +// many1(tag(" ")), +// many1(satisfy(|c| { +// ['0'..='9', 'a'..='z', 'A'..='Z', '_'..='_'] +// .iter() +// .any(|range| range.contains(&c)) +// })), +// ), +// )), +// )), +// || ParseError::ExpectedBareName, +// )(input) +// } + +// /// Parse an IRI representing a constant. +// fn parse_iri_constant<'a>( +// prefixes: &'a RefCell>, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { +// map_error( +// move |input| { +// let (remainder, name) = traced( +// "parse_iri_constant", +// alt(( +// map(sparql::iriref, |name| sparql::Name::IriReference(&name)), +// sparql::prefixed_name, +// sparql::blank_node_label, +// map(parse_bare_name, |name| sparql::Name::IriReference(&name)), +// )), +// )(input)?; + +// let resolved = resolve_prefixed_name(&prefixes.borrow(), name) +// .map_err(|e| Err::Failure(e.at(input)))?; + +// Ok((remainder, AnyDataValue::new_iri(resolved))) +// }, +// || ParseError::ExpectedIriConstant, +// ) +// } + +// fn parse_constant_term<'a>( +// prefixes: &'a RefCell>, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { +// traced( +// "parse_constant_term", +// alt(( +// parse_iri_constant(prefixes), +// turtle::numeric_literal, +// map_res(turtle::rdf_literal, move |literal| { +// resolve_prefixed_rdf_literal(&prefixes.borrow(), literal) +// }), +// map(turtle::string, move |literal| { +// AnyDataValue::new_plain_string(literal.to_string()) +// }), +// )), +// ) +// } + +// /// Parse a ground term. +// pub fn parse_ground_term<'a>( +// prefixes: &'a RefCell>, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, PrimitiveTerm> { +// traced( +// "parse_ground_term", +// map_error( +// map(parse_constant_term(prefixes), PrimitiveTerm::GroundTerm), +// || ParseError::ExpectedGroundTerm, +// ), +// ) +// } + +// /// The main parser. Holds a hash map for +// /// prefixes, as well as the base IRI. +// #[derive(Debug, Default)] +// pub struct RuleParser<'a> { +// /// The base IRI, if set. +// base: RefCell>, +// /// A map from Prefixes to IRIs. +// prefixes: RefCell>, +// /// Number counting up for generating distinct wildcards. +// wildcard_generator: RefCell, +// } + +// impl<'a> RuleParser<'a> { +// /// Construct a new [RuleParser]. +// pub fn new() -> Self { +// Default::default() +// } + +// fn parse_complex_constant_term( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { +// traced( +// "parse_complex_constant_term", +// // Note: The explicit |s| in the cases below is important to enable proper type +// // reasoning in rust. Without it, unresolved opaque types appear in the recursion. +// alt(( +// parse_constant_term(&self.prefixes), +// map(|s| self.parse_tuple_literal()(s), AnyDataValue::from), +// map(|s| self.parse_map_literal()(s), AnyDataValue::from), +// )), +// ) +// } + +// /// Parse the dot that ends declarations, optionally surrounded by spaces. +// fn parse_dot(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_dot", space_delimited_token(".")) +// } + +// /// Parse a comma, optionally surrounded by spaces. +// fn parse_comma(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_comma", space_delimited_token(",")) +// } + +// /// Parse an equality sign, optionally surrounded by spaces. +// fn parse_equals(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_equals", space_delimited_token("=")) +// } + +// /// Parse a negation sign (`~`), optionally surrounded by spaces. +// fn parse_not(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_not", space_delimited_token("~")) +// } + +// /// Parse an arrow (`:-`), optionally surrounded by spaces. +// fn parse_arrow(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_arrow", space_delimited_token(":-")) +// } + +// /// Parse an opening parenthesis, optionally surrounded by spaces. +// fn parse_open_parenthesis(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_open_parenthesis", space_delimited_token("(")) +// } + +// /// Parse a closing parenthesis, optionally surrounded by spaces. +// fn parse_close_parenthesis(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_close_parenthesis", space_delimited_token(")")) +// } + +// /// Matches an opening parenthesis, +// /// then gets an object from the parser, +// /// and finally matches an closing parenthesis. +// pub fn parenthesised<'b, O, F>( +// &'a self, +// parser: F, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult +// where +// O: Debug + 'a, +// F: FnMut(Span<'a>) -> IntermediateResult + 'a, +// { +// traced( +// "parenthesised", +// map_error( +// delimited( +// self.parse_open_parenthesis(), +// parser, +// self.parse_close_parenthesis(), +// ), +// || ParseError::ExpectedParenthesisedExpression, +// ), +// ) +// } + +// /// Parse an opening brace, optionally surrounded by spaces. +// fn parse_open_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_open_brace", space_delimited_token("{")) +// } + +// /// Parse a closing brace, optionally surrounded by spaces. +// fn parse_close_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced("parse_close_brace", space_delimited_token("}")) +// } + +// /// Parse a base declaration. +// fn parse_base(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_base", +// map_error( +// move |input| { +// let (remainder, base) = delimited( +// terminated(token("@base"), cut(multispace_or_comment1)), +// cut(sparql::iriref), +// cut(self.parse_dot()), +// )(input)?; + +// log::debug!(target: "parser", r#"parse_base: set new base: "{base}""#); +// *self.base.borrow_mut() = Some(&base); + +// Ok((remainder, Identifier(base.to_string()))) +// }, +// || ParseError::ExpectedBaseDeclaration, +// ), +// ) +// } + +// fn parse_prefix(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { +// traced( +// "parse_prefix", +// map_error( +// move |input| { +// let (remainder, (prefix, iri)) = delimited( +// terminated(token("@prefix"), cut(multispace_or_comment1)), +// cut(tuple(( +// cut(terminated(sparql::pname_ns, multispace_or_comment1)), +// cut(sparql::iriref), +// ))), +// cut(self.parse_dot()), +// )(input)?; + +// log::debug!(target: "parser", r#"parse_prefix: got prefix "{prefix}" for iri "{iri}""#); +// if self.prefixes.borrow_mut().insert(&prefix, &iri).is_some() { +// Err(Err::Failure( +// ParseError::RedeclaredPrefix(prefix.to_string()).at(input), +// )) +// } else { +// Ok((remainder, prefix)) +// } +// }, +// || ParseError::ExpectedPrefixDeclaration, +// ), +// ) +// } + +// /// Parse a data source declaration. +// /// This is a backwards compatibility feature for Rulewerk syntax. Nemo normally uses +// /// `@import` instead of `@source`. The difference in `@source` is that (1) a predicate +// /// arity is given in brackets after the predicate name, (2) the import predicate names +// /// are one of `load-csv`, `load-tsv`, `load-rdf`, and `sparql`, with the only parameter +// /// being the file name or IRI to be loaded. +// fn parse_source(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_source", +// map_error( +// move |input| { +// let (remainder, (predicate, arity)) = preceded( +// terminated(token("@source"), cut(multispace_or_comment1)), +// cut(self.parse_qualified_predicate_name()), +// )(input)?; + +// let (remainder, datasource): (_, Result<_, ParseError>) = cut(delimited( +// delimited(multispace_or_comment0, token(":"), multispace_or_comment1), +// alt(( +// map( +// delimited( +// preceded(token("load-csv"), cut(self.parse_open_parenthesis())), +// turtle::string, +// self.parse_close_parenthesis(), +// ), +// |filename| { +// let attributes = MapDataValue::from_iter([ +// ( +// AnyDataValue::new_iri( +// PARAMETER_NAME_RESOURCE.to_string(), +// ), +// AnyDataValue::new_plain_string(filename.to_string()), +// ), +// ( +// AnyDataValue::new_iri( +// PARAMETER_NAME_FORMAT.to_string(), +// ), +// TupleDataValue::from_iter( +// vec![VALUE_FORMAT_ANY; arity] +// .iter() +// .map(|format| { +// AnyDataValue::new_plain_string( +// (*format).to_string(), +// ) +// }) +// .collect::>(), +// ) +// .into(), +// ), +// ]); +// Ok(ImportDirective::from(ImportExportDirective { +// predicate: predicate.clone(), +// format: FileFormat::CSV, +// attributes, +// })) +// }, +// ), +// map( +// delimited( +// preceded(token("load-tsv"), cut(self.parse_open_parenthesis())), +// turtle::string, +// self.parse_close_parenthesis(), +// ), +// |filename| { +// let attributes = MapDataValue::from_iter([ +// ( +// AnyDataValue::new_iri( +// PARAMETER_NAME_RESOURCE.to_string(), +// ), +// AnyDataValue::new_plain_string(filename.to_string()), +// ), +// ( +// AnyDataValue::new_iri( +// PARAMETER_NAME_FORMAT.to_string(), +// ), +// TupleDataValue::from_iter( +// vec![VALUE_FORMAT_ANY; arity] +// .iter() +// .map(|format| { +// AnyDataValue::new_plain_string( +// (*format).to_string(), +// ) +// }) +// .collect::>(), +// ) +// .into(), +// ), +// ]); +// Ok(ImportDirective::from(ImportExportDirective { +// predicate: predicate.clone(), +// format: FileFormat::TSV, +// attributes, +// })) +// }, +// ), +// map( +// delimited( +// preceded(token("load-rdf"), cut(self.parse_open_parenthesis())), +// turtle::string, +// self.parse_close_parenthesis(), +// ), +// |filename| { +// let mut attribute_pairs = vec![ +// ( +// AnyDataValue::new_iri( +// PARAMETER_NAME_RESOURCE.to_string(), +// ), +// AnyDataValue::new_plain_string(filename.to_string()), +// ), +// ( +// AnyDataValue::new_iri( +// PARAMETER_NAME_FORMAT.to_string(), +// ), +// TupleDataValue::from_iter( +// vec![VALUE_FORMAT_ANY; arity] +// .iter() +// .map(|format| { +// AnyDataValue::new_plain_string( +// (*format).to_string(), +// ) +// }) +// .collect::>(), +// ) +// .into(), +// ), +// ]; +// if let Some(base) = self.base() { +// attribute_pairs.push(( +// AnyDataValue::new_iri(PARAMETER_NAME_BASE.to_string()), +// AnyDataValue::new_iri(base.to_string()), +// )); +// } + +// let attributes = MapDataValue::from_iter(attribute_pairs); + +// Ok(ImportDirective::from(ImportExportDirective { +// predicate: predicate.clone(), +// format: FileFormat::RDF(RdfVariant::Unspecified), +// attributes, +// })) +// }, +// ), +// map( +// delimited( +// preceded(token("sparql"), cut(self.parse_open_parenthesis())), +// tuple(( +// self.parse_iri_identifier(), +// delimited( +// self.parse_comma(), +// turtle::string, +// self.parse_comma(), +// ), +// turtle::string, +// )), +// self.parse_close_parenthesis(), +// ), +// |(_endpoint, _projection, _query)| { +// Err(ParseError::UnsupportedSparqlSource(predicate.clone().0)) +// }, +// ), +// )), +// cut(self.parse_dot()), +// ))( +// remainder +// )?; + +// let spec = datasource.map_err(|e| Err::Failure(e.at(input)))?; + +// Ok((remainder, spec)) +// }, +// || ParseError::ExpectedDataSourceDeclaration, +// ), +// ) +// } + +// /// Parse an output directive. +// fn parse_output_directive(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_output", +// map_error( +// delimited( +// terminated(token("@output"), cut(multispace_or_comment1)), +// cut(map_res::<_, _, _, _, Error, _, _>( +// self.parse_iri_like_identifier(), +// Ok, +// )), +// cut(self.parse_dot()), +// ), +// || ParseError::ExpectedOutputDeclaration, +// ), +// ) +// } + +// /// Parse an entry in a [MapDataValue], i.e., am [AnyDataValue]--[AnyDataValue] pair. +// fn parse_map_entry( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult<(AnyDataValue, AnyDataValue)> { +// traced( +// "parse_map_entry", +// separated_pair( +// self.parse_complex_constant_term(), +// self.parse_equals(), +// map(self.parse_complex_constant_term(), |term| term), +// ), +// ) +// } + +// /// Parse a ground map literal. +// fn parse_map_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_map_literal", +// delimited( +// self.parse_open_brace(), +// map( +// separated_list0(self.parse_comma(), self.parse_map_entry()), +// MapDataValue::from_iter, +// ), +// self.parse_close_brace(), +// ), +// ) +// } + +// /// Parse a ground tuple literal. +// pub fn parse_tuple_literal( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_tuple_literal", +// delimited( +// self.parse_open_parenthesis(), +// map( +// separated_list0(self.parse_comma(), self.parse_complex_constant_term()), +// TupleDataValue::from_iter, +// ), +// self.parse_close_parenthesis(), +// ), +// ) +// } + +// /// Parse a file format name. +// fn parse_file_format(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced("parse_file_format", move |input| { +// let (remainder, format) = +// map_res(alpha1, |format: Span<'a>| match *format.fragment() { +// FILE_FORMAT_CSV => Ok(FileFormat::CSV), +// FILE_FORMAT_DSV => Ok(FileFormat::DSV), +// FILE_FORMAT_TSV => Ok(FileFormat::TSV), +// FILE_FORMAT_RDF_UNSPECIFIED => Ok(FileFormat::RDF(RdfVariant::Unspecified)), +// FILE_FORMAT_RDF_NTRIPLES => Ok(FileFormat::RDF(RdfVariant::NTriples)), +// FILE_FORMAT_RDF_NQUADS => Ok(FileFormat::RDF(RdfVariant::NQuads)), +// FILE_FORMAT_RDF_TURTLE => Ok(FileFormat::RDF(RdfVariant::Turtle)), +// FILE_FORMAT_RDF_TRIG => Ok(FileFormat::RDF(RdfVariant::TriG)), +// FILE_FORMAT_RDF_XML => Ok(FileFormat::RDF(RdfVariant::RDFXML)), +// FILE_FORMAT_JSON => Ok(FileFormat::JSON), +// _ => Err(ParseError::FileFormatError(format.fragment().to_string())), +// })(input)?; + +// Ok((remainder, format)) +// }) +// } + +// /// Parse an import/export specification. +// fn parse_import_export_spec( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced("parse_import_export_spec", move |input| { +// let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; +// let (remainder, format) = delimited( +// space_delimited_token(":-"), +// self.parse_file_format(), +// multispace_or_comment0, +// )(remainder)?; +// let (remainder, attributes) = self.parse_map_literal()(remainder)?; +// Ok(( +// remainder, +// ImportExportDirective { +// predicate, +// format, +// attributes, +// }, +// )) +// }) +// } + +// /// Parse an import directive. +// fn parse_import(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_import", +// delimited( +// terminated(token("@import"), multispace_or_comment1), +// cut(map(self.parse_import_export_spec(), ImportDirective::from)), +// cut(self.parse_dot()), +// ), +// ) +// } + +// /// Parse an export directive. +// fn parse_export(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_export", +// delimited( +// terminated(token("@export"), multispace_or_comment1), +// cut(map(self.parse_import_export_spec(), ExportDirective::from)), +// cut(self.parse_dot()), +// ), +// ) +// } + +// /// Parse a statement. +// fn parse_statement(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_statement", +// map_error( +// alt(( +// map(self.parse_fact(), Statement::Fact), +// map(self.parse_rule(), Statement::Rule), +// )), +// || ParseError::ExpectedStatement, +// ), +// ) +// } + +// /// Parse a fact. +// fn parse_fact(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_fact", +// map_error( +// move |input| { +// let (remainder, (predicate, terms)) = terminated( +// pair( +// self.parse_iri_like_identifier(), +// self.parenthesised(separated_list1( +// self.parse_comma(), +// parse_ground_term(&self.prefixes), +// )), +// ), +// self.parse_dot(), +// )(input)?; + +// let predicate_name = predicate.name(); +// log::trace!(target: "parser", "found fact {predicate_name}({terms:?})"); + +// // We do not allow complex term trees in facts for now +// let terms = terms.into_iter().map(Term::Primitive).collect(); + +// Ok((remainder, Fact(Atom::new(predicate, terms)))) +// }, +// || ParseError::ExpectedFact, +// ), +// ) +// } + +// /// Parse an IRI identifier, e.g. for predicate names. +// fn parse_iri_identifier(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// map_error( +// move |input| { +// let (remainder, name) = traced( +// "parse_iri_identifier", +// alt(( +// map(sparql::iriref, |name| sparql::Name::IriReference(&name)), +// sparql::prefixed_name, +// sparql::blank_node_label, +// )), +// )(input)?; + +// Ok(( +// remainder, +// Identifier( +// resolve_prefixed_name(&self.prefixes.borrow(), name) +// .map_err(|e| Err::Failure(e.at(input)))?, +// ), +// )) +// }, +// || ParseError::ExpectedIriIdentifier, +// ) +// } + +// /// Parse an IRI-like identifier. +// /// +// /// This is being used for: +// /// * predicate names +// /// * built-in functions in term trees +// fn parse_iri_like_identifier( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_iri_like_identifier", +// map_error( +// alt(( +// self.parse_iri_identifier(), +// self.parse_bare_iri_like_identifier(), +// )), +// || ParseError::ExpectedIriLikeIdentifier, +// ), +// ) +// } + +// /// Parse a qualified predicate name – currently, this is a +// /// predicate name together with its arity. +// /// +// /// FIXME: Obsolete. Can be removed in the future. +// fn parse_qualified_predicate_name( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult<(Identifier, usize)> { +// traced( +// "parse_qualified_predicate_name", +// pair( +// self.parse_iri_like_identifier(), +// preceded( +// multispace_or_comment0, +// delimited( +// token("["), +// cut(map_res(digit1, |number: Span<'a>| number.parse::())), +// cut(token("]")), +// ), +// ), +// ), +// ) +// } + +// /// Parse an IRI-like identifier (e.g. a predicate name) that is not an IRI. +// fn parse_bare_iri_like_identifier( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced("parse_bare_iri_like_identifier", move |input| { +// let (remainder, name) = parse_bare_name(input)?; + +// Ok((remainder, Identifier(name.to_string()))) +// }) +// } + +// /// Parse a rule. +// fn parse_rule(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_rule", +// map_error( +// move |input| { +// let (remainder, (head, body)) = pair( +// terminated( +// separated_list1(self.parse_comma(), self.parse_atom()), +// self.parse_arrow(), +// ), +// cut(terminated( +// separated_list1(self.parse_comma(), self.parse_body_expression()), +// self.parse_dot(), +// )), +// )(input)?; + +// log::trace!(target: "parser", r#"found rule "{head:?}" :- "{body:?}""#); + +// let literals = body +// .iter() +// .filter_map(|expr| match expr { +// BodyExpression::Literal(l) => Some(l.clone()), +// _ => None, +// }) +// .collect(); +// let constraints = body +// .into_iter() +// .filter_map(|expr| match expr { +// BodyExpression::Constraint(c) => Some(c), +// _ => None, +// }) +// .collect(); +// Ok(( +// remainder, +// Rule::new_validated(head, literals, constraints) +// .map_err(|e| Err::Failure(e.at(input)))?, +// )) +// }, +// || ParseError::ExpectedRule, +// ), +// ) +// } + +// /// Parse an atom. +// fn parse_atom(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_atom", +// map_error( +// move |input| { +// let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; +// let (remainder, terms) = delimited( +// self.parse_open_parenthesis(), +// cut(separated_list1(self.parse_comma(), self.parse_term())), +// cut(self.parse_close_parenthesis()), +// )(remainder)?; + +// let predicate_name = predicate.name(); +// log::trace!(target: "parser", "found atom {predicate_name}({terms:?})"); + +// Ok((remainder, Atom::new(predicate, terms))) +// }, +// || ParseError::ExpectedAtom, +// ), +// ) +// } + +// /// Parse a [PrimitiveTerm]. +// fn parse_primitive_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_primitive_term", +// map_error( +// alt((parse_ground_term(&self.prefixes), self.parse_variable())), +// || ParseError::ExpectedPrimitiveTerm, +// ), +// ) +// } + +// /// Parse an aggregate term. +// fn parse_aggregate(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_aggregate", +// map_error( +// move |input| { +// let (remainder, _) = nom::character::complete::char('#')(input)?; +// let (remainder, aggregate_operation_identifier) = +// self.parse_bare_iri_like_identifier()(remainder)?; +// let (remainder, terms) = self +// .parenthesised(separated_list1(self.parse_comma(), self.parse_term()))( +// remainder, +// )?; + +// if let Some(logical_aggregate_operation) = +// (&aggregate_operation_identifier).into() +// { +// let aggregate = Aggregate { +// logical_aggregate_operation, +// terms, +// }; + +// Ok((remainder, Term::Aggregation(aggregate))) +// } else { +// Err(Err::Failure( +// ParseError::UnknownAggregateOperation( +// aggregate_operation_identifier.name(), +// ) +// .at(input), +// )) +// } +// }, +// || ParseError::ExpectedAggregate, +// ), +// ) +// } + +// /// Parse a variable. +// fn parse_variable(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_variable", +// map_error( +// map( +// alt(( +// self.parse_universal_variable(), +// self.parse_existential_variable(), +// )), +// PrimitiveTerm::Variable, +// ), +// || ParseError::ExpectedVariable, +// ), +// ) +// } + +// /// Parse a universally quantified variable. +// fn parse_universal_variable(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_universal_variable", +// map_error( +// map( +// preceded(token("?"), cut(self.parse_variable_name())), +// Variable::Universal, +// ), +// || ParseError::ExpectedUniversalVariable, +// ), +// ) +// } + +// /// Parse an existentially quantified variable. +// fn parse_existential_variable( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_existential_variable", +// map_error( +// map( +// preceded(token("!"), cut(self.parse_variable_name())), +// Variable::Existential, +// ), +// || ParseError::ExpectedExistentialVariable, +// ), +// ) +// } + +// /// Parse a variable name. +// fn parse_variable_name(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_variable", +// map_error( +// move |input| { +// let (remainder, name) = parse_simple_name(input)?; + +// Ok((remainder, name.to_string())) +// }, +// || ParseError::ExpectedVariableName, +// ), +// ) +// } + +// /// Parse a literal (i.e., a possibly negated atom). +// fn parse_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_literal", +// map_error( +// alt((self.parse_negative_literal(), self.parse_positive_literal())), +// || ParseError::ExpectedLiteral, +// ), +// ) +// } + +// /// Parse a non-negated literal. +// fn parse_positive_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_positive_literal", +// map_error(map(self.parse_atom(), Literal::Positive), || { +// ParseError::ExpectedPositiveLiteral +// }), +// ) +// } + +// /// Parse a negated literal. +// fn parse_negative_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_negative_literal", +// map_error( +// map( +// preceded(self.parse_not(), cut(self.parse_atom())), +// Literal::Negative, +// ), +// || ParseError::ExpectedNegativeLiteral, +// ), +// ) +// } + +// /// Parse operation that is filters a variable +// fn parse_constraint_operator( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_constraint_operator", +// map_error( +// delimited( +// multispace_or_comment0, +// alt(( +// value(ConstraintOperator::LessThanEq, token("<=")), +// value(ConstraintOperator::LessThan, token("<")), +// value(ConstraintOperator::Equals, token("=")), +// value(ConstraintOperator::Unequals, token("!=")), +// value(ConstraintOperator::GreaterThanEq, token(">=")), +// value(ConstraintOperator::GreaterThan, token(">")), +// )), +// multispace_or_comment0, +// ), +// || ParseError::ExpectedFilterOperator, +// ), +// ) +// } + +// /// Parse a term tree. +// /// +// /// This may consist of: +// /// * A function term +// /// * An arithmetic expression, which handles e.g. precedence of addition over multiplication +// fn parse_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_term", +// map_error( +// move |input| { +// delimited( +// multispace_or_comment0, +// alt(( +// self.parse_arithmetic_expression(), +// // map(self.parse_constraint(), |c| c.as_binary_term()), +// self.parse_parenthesised_term(), +// self.parse_function_term(), +// self.parse_aggregate(), +// self.parse_wildcard(), +// )), +// multispace_or_comment0, +// )(input) +// }, +// || ParseError::ExpectedTerm, +// ), +// ) +// } + +// /// Parse a wildcard variable. +// fn parse_wildcard(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_wildcard", +// map_res(space_delimited_token("_"), |_| { +// let wildcard = Variable::new_unamed(*self.wildcard_generator.borrow()); +// *self.wildcard_generator.borrow_mut() += 1; +// Ok::<_, ParseError>(Term::Primitive(PrimitiveTerm::Variable(wildcard))) +// }), +// ) +// } + +// /// Parse a parenthesised term tree. +// fn parse_parenthesised_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_parenthesised_term", +// map_error(self.parenthesised(self.parse_term()), || { +// ParseError::ExpectedParenthesisedTerm +// }), +// ) +// } + +// /// Parse a function term, possibly with nested term trees. +// fn parse_function_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_function_term", +// map_error( +// move |input| { +// let (remainder, name) = self.parse_iri_like_identifier()(input)?; + +// if let Ok(op) = UnaryOperation::construct_from_name(&name.0) { +// let (remainder, subterm) = +// (self.parenthesised(self.parse_term()))(remainder)?; + +// Ok((remainder, Term::Unary(op, Box::new(subterm)))) +// } else if let Some(op) = BinaryOperation::construct_from_name(&name.0) { +// let (remainder, (left, _, right)) = (self.parenthesised(tuple(( +// self.parse_term(), +// self.parse_comma(), +// self.parse_term(), +// ))))(remainder)?; + +// Ok(( +// remainder, +// Term::Binary { +// operation: op, +// lhs: Box::new(left), +// rhs: Box::new(right), +// }, +// )) +// } else if let Some(op) = TernaryOperation::construct_from_name(&name.0) { +// let (remainder, (first, _, second, _, third)) = +// (self.parenthesised(tuple(( +// self.parse_term(), +// self.parse_comma(), +// self.parse_term(), +// self.parse_comma(), +// self.parse_term(), +// ))))(remainder)?; + +// Ok(( +// remainder, +// Term::Ternary { +// operation: op, +// first: Box::new(first), +// second: Box::new(second), +// third: Box::new(third), +// }, +// )) +// } else if let Some(op) = NaryOperation::construct_from_name(&name.0) { +// let (remainder, subterms) = (self.parenthesised(separated_list0( +// self.parse_comma(), +// self.parse_term(), +// )))(remainder)?; + +// Ok(( +// remainder, +// Term::Nary { +// operation: op, +// parameters: subterms, +// }, +// )) +// } else { +// let (remainder, subterms) = (self.parenthesised(separated_list0( +// self.parse_comma(), +// self.parse_term(), +// )))(remainder)?; + +// Ok((remainder, Term::Function(name, subterms))) +// } +// }, +// || ParseError::ExpectedFunctionTerm, +// ), +// ) +// } + +// /// Parse an arithmetic expression +// fn parse_arithmetic_expression(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_arithmetic_expression", +// map_error( +// move |input| { +// let (remainder, first) = self.parse_arithmetic_product()(input)?; +// let (remainder, expressions) = many0(alt(( +// preceded( +// delimited(multispace_or_comment0, token("+"), multispace_or_comment0), +// map(self.parse_arithmetic_product(), |term| { +// (ArithmeticOperator::Addition, term) +// }), +// ), +// preceded( +// delimited(multispace_or_comment0, token("-"), multispace_or_comment0), +// map(self.parse_arithmetic_product(), |term| { +// (ArithmeticOperator::Subtraction, term) +// }), +// ), +// )))(remainder)?; + +// Ok(( +// remainder, +// Self::fold_arithmetic_expressions(first, expressions), +// )) +// }, +// || ParseError::ExpectedArithmeticExpression, +// ), +// ) +// } + +// /// Parse an arithmetic product, i.e., an expression involving +// /// only `*` and `/` over subexpressions. +// fn parse_arithmetic_product(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_arithmetic_product", +// map_error( +// move |input| { +// let (remainder, first) = self.parse_arithmetic_factor()(input)?; +// let (remainder, factors) = many0(alt(( +// preceded( +// delimited(multispace_or_comment0, token("*"), multispace_or_comment0), +// map(self.parse_arithmetic_factor(), |term| { +// (ArithmeticOperator::Multiplication, term) +// }), +// ), +// preceded( +// delimited(multispace_or_comment0, token("/"), multispace_or_comment0), +// map(self.parse_arithmetic_factor(), |term| { +// (ArithmeticOperator::Division, term) +// }), +// ), +// )))(remainder)?; + +// Ok((remainder, Self::fold_arithmetic_expressions(first, factors))) +// }, +// || ParseError::ExpectedArithmeticProduct, +// ), +// ) +// } + +// /// Parse an arithmetic factor. +// fn parse_arithmetic_factor(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_arithmetic_factor", +// map_error( +// alt(( +// self.parse_function_term(), +// self.parse_aggregate(), +// map(self.parse_primitive_term(), Term::Primitive), +// self.parse_parenthesised_term(), +// )), +// || ParseError::ExpectedArithmeticFactor, +// ), +// ) +// } + +// /// Fold a sequence of ([ArithmeticOperator], [PrimitiveTerm]) pairs into a single [Term]. +// fn fold_arithmetic_expressions( +// initial: Term, +// sequence: Vec<(ArithmeticOperator, Term)>, +// ) -> Term { +// sequence.into_iter().fold(initial, |acc, pair| { +// let (operation, expression) = pair; + +// use ArithmeticOperator::*; + +// let operation = match operation { +// Addition => BinaryOperation::NumericAddition, +// Subtraction => BinaryOperation::NumericSubtraction, +// Multiplication => BinaryOperation::NumericMultiplication, +// Division => BinaryOperation::NumericDivision, +// }; + +// Term::Binary { +// operation, +// lhs: Box::new(acc), +// rhs: Box::new(expression), +// } +// }) +// } + +// /// Parse expression of the form ` ` expressing a constraint. +// fn parse_constraint(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_constraint", +// map_error( +// map( +// tuple(( +// self.parse_term(), +// self.parse_constraint_operator(), +// cut(self.parse_term()), +// )), +// |(lhs, operation, rhs)| operation.into_constraint(lhs, rhs), +// ), +// || ParseError::ExpectedConstraint, +// ), +// ) +// } + +// /// Parse body expression +// fn parse_body_expression( +// &'a self, +// ) -> impl FnMut(Span<'a>) -> IntermediateResult { +// traced( +// "parse_body_expression", +// map_error( +// alt(( +// map(self.parse_constraint(), BodyExpression::Constraint), +// map(self.parse_literal(), BodyExpression::Literal), +// )), +// || ParseError::ExpectedBodyExpression, +// ), +// ) +// } + +// /// Parse a program in the rules language. +// pub fn parse_program(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { +// fn check_for_invalid_statement<'a, F>( +// parser: &mut F, +// input: Span<'a>, +// ) -> IntermediateResult<'a, ()> +// where +// F: FnMut(Span<'a>) -> IntermediateResult, +// { +// if let Ok((_, e)) = parser(input) { +// return Err(Err::Failure(e.at(input))); +// } + +// Ok((input, ())) +// } + +// traced("parse_program", move |input| { +// let (remainder, _) = multispace_or_comment0(input)?; +// let (remainder, _) = opt(self.parse_base())(remainder)?; + +// check_for_invalid_statement( +// &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), +// remainder, +// )?; + +// let (remainder, _) = many0(self.parse_prefix())(remainder)?; + +// check_for_invalid_statement( +// &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), +// remainder, +// )?; +// check_for_invalid_statement( +// &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), +// remainder, +// )?; + +// let mut statements = Vec::new(); +// let mut output_predicates = Vec::new(); +// let mut sources = Vec::new(); +// let mut imports = Vec::new(); +// let mut exports = Vec::new(); + +// let (remainder, _) = many0(alt(( +// map(self.parse_source(), |source| sources.push(source)), +// map(self.parse_import(), |import| imports.push(import)), +// map(self.parse_export(), |export| exports.push(export)), +// map(self.parse_statement(), |statement| { +// statements.push(statement) +// }), +// map(self.parse_output_directive(), |output_predicate| { +// output_predicates.push(output_predicate) +// }), +// )))(remainder)?; + +// check_for_invalid_statement( +// &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), +// remainder, +// )?; +// check_for_invalid_statement( +// &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), +// remainder, +// )?; + +// let base = self.base().map(String::from); +// let prefixes = self +// .prefixes +// .borrow() +// .iter() +// .map(|(&prefix, &iri)| (prefix.to_string(), iri.to_string())) +// .collect::>(); +// let mut rules = Vec::new(); +// let mut facts = Vec::new(); + +// statements.iter().for_each(|statement| match statement { +// Statement::Fact(value) => facts.push(value.clone()), +// Statement::Rule(value) => rules.push(value.clone()), +// }); + +// let mut program_builder = Program::builder() +// .prefixes(prefixes) +// .imports(sources) +// .imports(imports) +// .exports(exports) +// .rules(rules) +// .facts(facts); + +// if let Some(base) = base { +// program_builder = program_builder.base(base); +// } + +// if !output_predicates.is_empty() { +// program_builder = program_builder.output_predicates(output_predicates); +// } + +// Ok((remainder, program_builder.build())) +// }) +// } + +// /// Return the declared base, if set, or None. +// #[must_use] +// fn base(&self) -> Option<&'a str> { +// *self.base.borrow() +// } +// } + +// #[cfg(test)] +// mod test { +// use super::*; +// use std::assert_matches::assert_matches; +// use test_log::test; + +// macro_rules! assert_parse { +// ($parser:expr, $left:expr, $right:expr $(,) ?) => { +// assert_eq!( +// all_input_consumed($parser)($left).expect( +// format!("failed to parse `{:?}`\nexpected `{:?}`", $left, $right).as_str() +// ), +// $right +// ); +// }; +// } + +// macro_rules! assert_fails { +// ($parser:expr, $left:expr, $right:pat $(,) ?) => {{ +// // Store in intermediate variable to prevent from being dropped too early +// let result = all_input_consumed($parser)($left); +// assert_matches!(result, Err($right)) +// }}; +// } + +// macro_rules! assert_parse_error { +// ($parser:expr, $left:expr, $right:pat $(,) ?) => { +// assert_fails!($parser, $left, LocatedParseError { source: $right, .. }) +// }; +// } + +// macro_rules! assert_expected_token { +// ($parser:expr, $left:expr, $right:expr $(,) ?) => { +// let _token = String::from($right); +// assert_parse_error!($parser, $left, ParseError::ExpectedToken(_token),); +// }; +// } + +// #[test] +// fn base_directive() { +// let base = "http://example.org/foo"; +// let input = format!("@base <{base}> ."); +// let parser = RuleParser::new(); +// let b = Identifier(base.to_string()); +// assert!(parser.base().is_none()); +// assert_parse!(parser.parse_base(), input.as_str(), b); +// assert_eq!(parser.base(), Some(base)); +// } + +// #[test] +// fn prefix_directive() { +// let prefix = unsafe { Span::new_from_raw_offset(8, 1, "foo", ()) }; +// let iri = "http://example.org/foo"; +// let input = format!("@prefix {prefix}: <{iri}> ."); +// let parser = RuleParser::new(); +// assert!(resolve_prefix(&parser.prefixes.borrow(), &prefix).is_err()); +// assert_parse!(parser.parse_prefix(), input.as_str(), prefix); +// assert_eq!( +// resolve_prefix(&parser.prefixes.borrow(), &prefix).map_err(|_| ()), +// Ok(iri) +// ); +// } + +// #[test] +// #[cfg_attr(miri, ignore)] +// fn source() { +// /// Helper function to create source-like imports +// fn csv_import(predicate: Identifier, filename: &str, arity: i64) -> ImportDirective { +// let attributes = MapDataValue::from_iter([ +// ( +// AnyDataValue::new_iri(PARAMETER_NAME_RESOURCE.to_string()), +// AnyDataValue::new_plain_string(filename.to_string()), +// ), +// ( +// AnyDataValue::new_iri(PARAMETER_NAME_FORMAT.to_string()), +// TupleDataValue::from_iter( +// vec![ +// VALUE_FORMAT_ANY; +// usize::try_from(arity).expect("required for these tests") +// ] +// .iter() +// .map(|format| AnyDataValue::new_plain_string((*format).to_string())) +// .collect::>(), +// ) +// .into(), +// ), +// ]); +// ImportDirective::from(ImportExportDirective { +// predicate, +// format: FileFormat::CSV, +// attributes, +// }) +// } + +// let parser = RuleParser::new(); +// let file = "drinks.csv"; +// let predicate_name = "drink"; +// let predicate = Identifier(predicate_name.to_string()); +// let default_import = csv_import(predicate.clone(), file, 1); + +// // rulewerk accepts all of these variants +// let input = format!(r#"@source {predicate_name}[1]: load-csv("{file}") ."#); +// assert_parse!(parser.parse_source(), &input, default_import); +// let input = format!(r#"@source {predicate_name}[1] : load-csv("{file}") ."#); +// assert_parse!(parser.parse_source(), &input, default_import); +// let input = format!(r#"@source {predicate_name}[1] : load-csv ( "{file}" ) ."#); +// assert_parse!(parser.parse_source(), &input, default_import); +// let input = format!(r#"@source {predicate_name} [1] : load-csv ( "{file}" ) ."#); +// assert_parse!(parser.parse_source(), &input, default_import); +// } + +// #[test] +// fn fact() { +// let parser = RuleParser::new(); +// let predicate = "p"; +// let value = "foo"; +// let datatype = "bar"; +// let p = Identifier(predicate.to_string()); +// let v = value.to_string(); +// let t = datatype.to_string(); +// let fact = format!(r#"{predicate}("{value}"^^<{datatype}>) ."#); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_from_typed_literal(v, t).expect("unknown types should work"), +// ))], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact,); +// } + +// #[test] +// fn fact_namespaced() { +// let parser = RuleParser::new(); +// let predicate = "p"; +// let name = "foo"; +// let prefix = unsafe { Span::new_from_raw_offset(8, 1, "eg", ()) }; +// let iri = "http://example.org/foo"; +// let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); +// let p = Identifier(predicate.to_string()); +// let pn = format!("{prefix}:{name}"); +// let v = format!("{iri}{name}"); +// let fact = format!(r#"{predicate}({pn}) ."#); + +// assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_iri(v), +// ))], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact,); +// } + +// #[test] +// fn fact_bnode() { +// let parser = RuleParser::new(); +// let predicate = "p"; +// let name = "foo"; +// let p = Identifier(predicate.to_string()); +// let pn = format!("_:{name}"); +// let fact = format!(r#"{predicate}({pn}) ."#); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_iri(pn), +// ))], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact,); +// } + +// #[test] +// fn fact_numbers() { +// let parser = RuleParser::new(); +// let predicate = "p"; +// let p = Identifier(predicate.to_string()); +// let int = 23_i64; +// let dbl = 42.0; +// let dec = 13.37; +// let fact = format!(r#"{predicate}({int}, {dbl:.1}E0, {dec:.2}) ."#); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![ +// Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(int), +// )), +// Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_double_from_f64(dbl).expect("is not NaN"), +// )), +// Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_double_from_f64(dec).expect("is not NaN"), +// )), +// ], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact,); +// } + +// #[test] +// fn fact_rdf_literal_xsd_string() { +// let parser = RuleParser::new(); + +// let prefix = unsafe { Span::new_from_raw_offset(8, 1, "xsd", ()) }; +// let iri = "http://www.w3.org/2001/XMLSchema#"; +// let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); + +// assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); + +// let predicate = "p"; +// let value = "my nice string"; +// let datatype = "xsd:string"; + +// let p = Identifier(predicate.to_string()); +// let v = value.to_string(); +// let fact = format!(r#"{predicate}("{value}"^^{datatype}) ."#); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_plain_string(v), +// ))], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact,); +// } + +// #[test] +// fn fact_string_literal() { +// let parser = RuleParser::new(); +// let predicate = "p"; +// let value = "my nice string"; +// let p = Identifier(predicate.to_string()); +// let v = value.to_string(); +// let fact = format!(r#"{predicate}("{value}") ."#); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_plain_string(v), +// ))], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact,); +// } + +// #[test] +// fn fact_language_string() { +// let parser = RuleParser::new(); +// let predicate = "p"; +// let v = "Qapla"; +// let langtag = "tlh"; +// let p = Identifier(predicate.to_string()); +// let value = v.to_string(); +// let fact = format!(r#"{predicate}("{v}"@{langtag}) ."#); +// let tag = langtag.to_string(); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_language_tagged_string(value, tag), +// ))], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact); +// } + +// #[test] +// fn fact_abstract() { +// let parser = RuleParser::new(); +// let predicate = "p"; +// let name = "a"; +// let p = Identifier(predicate.to_string()); +// let fact = format!(r#"{predicate}({name}) ."#); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_iri(name.to_string()), +// ))], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact,); +// } + +// #[test] +// fn fact_comment() { +// let parser = RuleParser::new(); +// let predicate = "p"; +// let value = "foo"; +// let datatype = "bar"; +// let p = Identifier(predicate.to_string()); +// let v = value.to_string(); +// let t = datatype.to_string(); +// let fact = format!( +// r#"{predicate}(% comment 1 +// "{value}"^^<{datatype}> % comment 2 +// ) % comment 3 +// . % comment 4 +// %"# +// ); + +// let expected_fact = Fact(Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_from_typed_literal(v, t) +// .expect("unknown datatype should always work"), +// ))], +// )); + +// assert_parse!(parser.parse_fact(), &fact, expected_fact,); +// } + +// #[test] +// #[cfg_attr(miri, ignore)] +// fn filter() { +// let parser = RuleParser::new(); +// let aa = "A"; +// let a = Identifier(aa.to_string()); +// let bb = "B"; +// let b = Identifier(bb.to_string()); +// let pp = "P"; +// let p = Identifier(pp.to_string()); +// let xx = "X"; +// let x = xx.to_string(); +// let yy = "Y"; +// let y = yy.to_string(); +// let zz = "Z"; +// let z = zz.to_string(); + +// let rule = format!( +// "{pp}(?{xx}) :- {aa}(?{xx}, ?{yy}), ?{yy} > ?{xx}, {bb}(?{zz}), ?{xx} = 3, ?{zz} < 7, ?{xx} <= ?{zz}, ?{zz} >= ?{yy} ." +// ); + +// let expected_rule = Rule::new( +// vec![Atom::new( +// p, +// vec![Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal(x.clone()), +// ))], +// )], +// vec![ +// Literal::Positive(Atom::new( +// a, +// vec![ +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y.clone()))), +// ], +// )), +// Literal::Positive(Atom::new( +// b, +// vec![Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal(z.clone()), +// ))], +// )), +// ], +// vec![ +// Constraint::GreaterThan( +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y.clone()))), +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), +// ), +// Constraint::Equals( +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), +// Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(3), +// )), +// ), +// Constraint::LessThan( +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), +// Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(7), +// )), +// ), +// Constraint::LessThanEq( +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x))), +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), +// ), +// Constraint::GreaterThanEq( +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z))), +// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y))), +// ), +// ], +// ); + +// assert_parse!(parser.parse_rule(), &rule, expected_rule,); +// } + +// #[test] +// #[allow(clippy::redundant_clone)] +// fn parse_output() { +// let parser = RuleParser::new(); + +// let j2 = Identifier("J2".to_string()); + +// assert_parse!(parser.parse_output_directive(), "@output J2 .", j2.clone()); +// assert_parse_error!( +// parser.parse_output_directive(), +// "@output J2[3] .", +// ParseError::ExpectedOutputDeclaration +// ); +// } + +// #[test] +// fn parse_errors() { +// let parser = RuleParser::new(); + +// assert_expected_token!(parser.parse_dot(), "", "."); +// assert_expected_token!(parser.parse_dot(), ":-", "."); +// assert_expected_token!(parser.parse_comma(), "", ","); +// assert_expected_token!(parser.parse_comma(), ":-", ","); +// assert_expected_token!(parser.parse_not(), "", "~"); +// assert_expected_token!(parser.parse_not(), ":-", "~"); +// assert_expected_token!(parser.parse_arrow(), "", ":-"); +// assert_expected_token!(parser.parse_arrow(), "-:", ":-"); +// assert_expected_token!(parser.parse_open_parenthesis(), "", "("); +// assert_expected_token!(parser.parse_open_parenthesis(), "-:", "("); +// assert_expected_token!(parser.parse_close_parenthesis(), "", ")"); +// assert_expected_token!(parser.parse_close_parenthesis(), "-:", ")"); + +// assert_parse_error!( +// parser.parse_base(), +// "@base . @base .", +// ParseError::LateBaseDeclaration +// ); + +// assert_parse_error!( +// parser.parse_program(), +// "@prefix f: . @base .", +// ParseError::LateBaseDeclaration +// ); + +// assert_parse_error!( +// parser.parse_program(), +// "@output p . @base .", +// ParseError::LateBaseDeclaration +// ); + +// assert_parse_error!( +// parser.parse_program(), +// "@output p . @prefix g: .", +// ParseError::LatePrefixDeclaration +// ); +// } +// #[test] +// #[cfg_attr(miri, ignore)] +// fn parse_function_terms() { +// let parser = RuleParser::new(); + +// let twenty_three = Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(23), +// )); +// let fourty_two = Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(42), +// )); +// let twenty_three_times_fourty_two = Term::Binary { +// operation: BinaryOperation::NumericMultiplication, +// lhs: Box::new(twenty_three.clone()), +// rhs: Box::new(fourty_two.clone()), +// }; + +// assert_parse_error!( +// parser.parse_function_term(), +// "", +// ParseError::ExpectedFunctionTerm +// ); + +// let nullary_function = Term::Function(Identifier(String::from("nullary_function")), vec![]); +// assert_parse!( +// parser.parse_function_term(), +// "nullary_function()", +// nullary_function +// ); +// assert_parse!( +// parser.parse_function_term(), +// "nullary_function( )", +// nullary_function +// ); +// assert_parse_error!( +// parser.parse_function_term(), +// "nullary_function( () )", +// ParseError::ExpectedFunctionTerm +// ); + +// let unary_function = Term::Function( +// Identifier(String::from("unary_function")), +// vec![fourty_two.clone()], +// ); +// assert_parse!( +// parser.parse_function_term(), +// "unary_function(42)", +// unary_function +// ); +// assert_parse!( +// parser.parse_function_term(), +// "unary_function((42))", +// unary_function +// ); +// assert_parse!( +// parser.parse_function_term(), +// "unary_function(( (42 )))", +// unary_function +// ); + +// let binary_function = Term::Function( +// Identifier(String::from("binary_function")), +// vec![fourty_two.clone(), twenty_three.clone()], +// ); +// assert_parse!( +// parser.parse_function_term(), +// "binary_function(42, 23)", +// binary_function +// ); + +// let function_with_nested_algebraic_expression = Term::Function( +// Identifier(String::from("function")), +// vec![twenty_three_times_fourty_two], +// ); +// assert_parse!( +// parser.parse_function_term(), +// "function( 23 *42)", +// function_with_nested_algebraic_expression +// ); + +// let nested_function = Term::Function( +// Identifier(String::from("nested_function")), +// vec![nullary_function.clone()], +// ); + +// assert_parse!( +// parser.parse_function_term(), +// "nested_function(nullary_function())", +// nested_function +// ); + +// let triple_nested_function = Term::Function( +// Identifier(String::from("nested_function")), +// vec![Term::Function( +// Identifier(String::from("nested_function")), +// vec![Term::Function( +// Identifier(String::from("nested_function")), +// vec![nullary_function.clone()], +// )], +// )], +// ); +// assert_parse!( +// parser.parse_function_term(), +// "nested_function( nested_function( (nested_function(nullary_function()) ) ))", +// triple_nested_function +// ); +// } + +// #[test] +// fn parse_terms() { +// let parser = RuleParser::new(); + +// assert_parse_error!(parser.parse_term(), "", ParseError::ExpectedTerm); + +// assert_parse!( +// parser.parse_term(), +// "constant", +// Term::Primitive(PrimitiveTerm::GroundTerm(AnyDataValue::new_iri( +// String::from("constant") +// ))) +// ); +// } + +// #[test] +// fn parse_aggregates() { +// let parser = RuleParser::new(); + +// assert_parse_error!(parser.parse_aggregate(), "", ParseError::ExpectedAggregate); + +// assert_parse!( +// parser.parse_aggregate(), +// "#min(?VARIABLE)", +// Term::Aggregation(Aggregate { +// logical_aggregate_operation: LogicalAggregateOperation::MinNumber, +// terms: vec![Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal(String::from("VARIABLE")) +// ))] +// }) +// ); + +// assert_parse_error!( +// parser.parse_aggregate(), +// "#test(?VAR1, ?VAR2)", +// ParseError::ExpectedAggregate +// ) +// } + +// #[test] +// fn parse_unary_function() { +// let parser = RuleParser::new(); + +// let expression = "ABS(4)"; +// let expected_term = Term::Unary( +// UnaryOperation::NumericAbsolute, +// Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(4), +// ))), +// ); + +// assert_parse!(parser.parse_arithmetic_factor(), expression, expected_term); +// } + +// #[test] +// fn parse_arithmetic_and_functions() { +// let parser = RuleParser::new(); + +// let expression = "5 * ABS(SQRT(4) - 3)"; + +// let expected_term = Term::Binary { +// operation: BinaryOperation::NumericMultiplication, +// lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(5), +// ))), +// rhs: Box::new(Term::Unary( +// UnaryOperation::NumericAbsolute, +// Box::new(Term::Binary { +// operation: BinaryOperation::NumericSubtraction, +// lhs: Box::new(Term::Unary( +// UnaryOperation::NumericSquareroot, +// Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(4), +// ))), +// )), +// rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(3), +// ))), +// }), +// )), +// }; + +// assert_parse!(parser.parse_term(), expression, expected_term); +// } + +// #[test] +// fn parse_assignment() { +// let parser = RuleParser::new(); + +// let expression = "?X = ABS(?Y - 5) * (7 + ?Z)"; + +// let variable = Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( +// "X".to_string(), +// ))); + +// let term = Term::Binary { +// operation: BinaryOperation::NumericMultiplication, +// lhs: Box::new(Term::Unary( +// UnaryOperation::NumericAbsolute, +// Box::new(Term::Binary { +// operation: BinaryOperation::NumericSubtraction, +// lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal("Y".to_string()), +// ))), +// rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(5), +// ))), +// }), +// )), +// rhs: Box::new(Term::Binary { +// operation: BinaryOperation::NumericAddition, +// lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( +// AnyDataValue::new_integer_from_i64(7), +// ))), +// rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal("Z".to_string()), +// ))), +// }), +// }; + +// let expected = Constraint::Equals(variable, term); + +// assert_parse!(parser.parse_constraint(), expression, expected); +// } + +// #[test] +// fn parse_complex_condition() { +// let parser = RuleParser::new(); + +// let expression = "ABS(?X - ?Y) <= ?Z + SQRT(?Y)"; + +// let left_term = Term::Unary( +// UnaryOperation::NumericAbsolute, +// Box::new(Term::Binary { +// operation: BinaryOperation::NumericSubtraction, +// lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal(String::from("X")), +// ))), +// rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal(String::from("Y")), +// ))), +// }), +// ); + +// let right_term = Term::Binary { +// operation: BinaryOperation::NumericAddition, +// lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal(String::from("Z")), +// ))), +// rhs: Box::new(Term::Unary( +// UnaryOperation::NumericSquareroot, +// Box::new(Term::Primitive(PrimitiveTerm::Variable( +// Variable::Universal(String::from("Y")), +// ))), +// )), +// }; + +// let expected = Constraint::LessThanEq(left_term, right_term); + +// assert_parse!(parser.parse_constraint(), expression, expected); +// } + +// #[test] +// fn map_literal() { +// let parser = RuleParser::new(); +// assert_parse!( +// parser.parse_map_literal(), +// r#"{}"#, +// MapDataValue::from_iter([]), +// ); + +// let ident = "foo"; +// let key = AnyDataValue::new_iri(ident.to_string()); + +// let entry = format!("{ident}=23"); +// assert_parse!( +// parser.parse_map_entry(), +// &entry, +// (key.clone(), AnyDataValue::new_integer_from_i64(23)) +// ); + +// let pairs = vec![ +// ( +// AnyDataValue::new_plain_string("23".to_string()), +// AnyDataValue::new_integer_from_i64(42), +// ), +// ( +// AnyDataValue::new_iri("foo".to_string()), +// AnyDataValue::new_integer_from_i64(23), +// ), +// ]; + +// assert_parse!( +// parser.parse_map_literal(), +// r#"{foo = 23, "23" = 42}"#, +// pairs.clone().into_iter().collect::() +// ); +// } + +// #[test] +// fn nested_map_literal() { +// let parser = RuleParser::new(); + +// let pairs = vec![( +// AnyDataValue::new_iri("inner".to_string()), +// MapDataValue::from_iter([]).into(), +// )]; + +// assert_parse!( +// parser.parse_map_literal(), +// r#"{inner = {}}"#, +// pairs.clone().into_iter().collect::() +// ); +// } + +// #[test] +// fn tuple_literal() { +// let parser = RuleParser::new(); + +// let expected: TupleDataValue = [ +// AnyDataValue::new_iri("something".to_string()), +// AnyDataValue::new_integer_from_i64(42), +// TupleDataValue::from_iter([]).into(), +// ] +// .into_iter() +// .collect(); + +// assert_parse!( +// parser.parse_tuple_literal(), +// r#"(something, 42, ())"#, +// expected +// ); +// } + +// #[test] +// fn import_export() { +// let parser = RuleParser::new(); + +// let name = "p".to_string(); +// let predicate = Identifier(name.clone()); +// let qualified = format!("{name} "); +// let arguments = r#"{delimiter = ";", resource = }"#; +// let spec = format!("{qualified} :- dsv{arguments}"); +// let directive = format!("@import {spec} ."); +// let directive_export = format!("@export {spec} ."); +// let attributes = parser.parse_map_literal()(arguments.into()).unwrap().1; + +// assert_parse!( +// parser.parse_import_export_spec(), +// &spec, +// ImportExportDirective { +// predicate: predicate.clone(), +// format: FileFormat::DSV, +// attributes: attributes.clone(), +// } +// ); + +// assert_parse!( +// parser.parse_import(), +// &directive, +// ImportDirective::from(ImportExportDirective { +// predicate: predicate.clone(), +// format: FileFormat::DSV, +// attributes: attributes.clone() +// }) +// ); + +// assert_parse!( +// parser.parse_export(), +// &directive_export, +// ExportDirective::from(ImportExportDirective { +// predicate: predicate.clone(), +// format: FileFormat::DSV, +// attributes: attributes.clone() +// }) +// ); +// } +// } + +/// NEW PARSER +use std::cell::RefCell; + +use nom::character::complete::multispace0; +use nom::combinator::{opt, recognize}; +use nom::error::ParseError; +use nom::sequence::{delimited, pair}; +use nom::Parser; use nom::{ branch::alt, - bytes::complete::{is_not, tag}, - character::complete::{alpha1, digit1, multispace1, satisfy}, - combinator::{all_consuming, cut, map, map_res, opt, recognize, value}, - multi::{many0, many1, separated_list0, separated_list1}, - sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, - Err, + combinator::verify, + multi::{many0, many1}, + sequence::tuple, + IResult, +}; +use nom_supreme::{context::ContextError, error::StackContext}; + +use super::lexer::{ + arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, + greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_iri, + lex_number, lex_prefixed_ident, lex_string, lex_tag, lex_toplevel_doc_comment, lex_whitespace, + minus, open_brace, open_paren, plus, question_mark, skip_to_statement_end, slash, star, tilde, + underscore, unequal, Context, Error, ErrorTree, ParserState, Span, }; -use macros::traced; +fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { + unsafe { + // dbg!(&input, &span, &rest_input); + Span::new_from_raw_offset( + input.location_offset(), + input.location_line(), + &input[..(rest_input.location_offset() - input.location_offset())], + (), + ) + } +} -pub mod ast; -pub(crate) mod types; +fn expect<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( + mut parser: F, + error_msg: impl ToString, + error_output: O, + errors: ParserState<'s>, +) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { + move |input| match parser.parse(input) { + Ok(result) => Ok(result), + Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { + let err = Error { + pos: Position { + offset: input.input.location_offset(), + line: input.input.location_line(), + column: input.input.get_utf8_column() as u32, + }, + msg: error_msg.to_string(), + context: vec![], + }; + errors.report_error(err); + Ok((input, error_output)) + } + Err(err) => Err(err), + } +} -use types::{ConstraintOperator, IntermediateResult, Span}; -pub(crate) mod iri; -pub(crate) mod rfc5234; -pub(crate) mod sparql; -pub(crate) mod turtle; -pub use types::{span_from_str, LocatedParseError, ParseError, ParseResult}; - -/// Parse a program in the given `input`-String and return a [Program]. -/// -/// The program will be parsed and checked for unsupported features. -/// -/// # Error -/// Returns an appropriate [Error] variant on parsing and feature check issues. -pub fn parse_program(input: impl AsRef) -> Result { - let program = all_input_consumed(RuleParser::new().parse_program())(input.as_ref())?; - Ok(program) +fn recover<'a, 's, E>( + mut parser: impl Parser, Statement<'a>, E>, + error_msg: impl ToString, + context: Context, + _errors: ParserState<'s>, +) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { + move |input: Input<'a, 's>| match parser.parse(input) { + Ok(result) => Ok(result), + Err(err) if input.input.is_empty() => Err(err), + Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { + let _err = Error { + pos: Position { + offset: input.input.location_offset(), + line: input.input.location_line(), + column: input.input.get_utf8_column() as u32, + }, + msg: error_msg.to_string(), + context: vec![context], + }; + // errors.report_error(err); + let (rest_input, span) = skip_to_statement_end::>>(input); + Ok((rest_input, Statement::Error(span))) + } + Err(err) => Err(err), + } } -/// Parse a single fact in the given `input`-String and return a [Program]. -/// -/// The program will be parsed and checked for unsupported features. -/// -/// # Error -/// Returns an appropriate [Error] variant on parsing and feature check issues. -pub fn parse_fact(mut input: String) -> Result { - input += "."; - let fact = all_input_consumed(RuleParser::new().parse_fact())(input.as_str())?; - Ok(fact) +fn report_error<'a, 's, O>( + mut parser: impl Parser, O, ErrorTree>>, +) -> impl FnMut(Input<'a, 's>) -> IResult, O, ErrorTree>> { + move |input| match parser.parse(input) { + Ok(result) => Ok(result), + Err(e) => { + if input.input.is_empty() { + return Err(e); + }; + match &e { + nom::Err::Incomplete(_) => (), + nom::Err::Error(err) | nom::Err::Failure(err) => { + let (_deepest_pos, errors) = get_deepest_errors(err); + for error in errors { + input.parser_state.report_error(error); + } + // let error = Error(deepest_pos, format!("")); + // // input.parser_state.report_error(error) + } + }; + Err(e) + } + } } -/// A combinator to add tracing to the parser. -/// [fun] is an identifier for the parser and [parser] is the actual parser. -#[inline(always)] -fn traced<'a, T, P>( - fun: &'static str, - mut parser: P, -) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> -where - T: Debug, - P: FnMut(Span<'a>) -> IntermediateResult<'a, T>, -{ - move |input| { - log::trace!(target: "parser", "{fun}({input:?})"); - let result = parser(input); - log::trace!(target: "parser", "{fun}({input:?}) -> {result:?}"); - result +fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { + match e { + ErrorTree::Base { location, .. } => { + let span = location.input; + let err_pos = Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }; + ( + err_pos, + vec![Error { + pos: err_pos, + msg: "".to_string(), + context: Vec::new(), + }], + ) + } + ErrorTree::Stack { base, contexts } => { + // let mut err_pos = Position::default(); + match &**base { + ErrorTree::Base { location, .. } => { + let span = location.input; + let err_pos = Position { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }; + let mut msg = String::from(""); + for (_, context) in contexts { + match context { + StackContext::Kind(_) => todo!(), + StackContext::Context(c) => match c { + Context::Tag(t) => { + msg.push_str(t); + } + _ => (), + }, + } + } + ( + err_pos, + vec![Error { + pos: err_pos, + msg, + context: context_strs(contexts), + }], + ) + } + ErrorTree::Stack { base, contexts } => { + let (pos, mut deepest_errors) = get_deepest_errors(base); + let contexts = context_strs(contexts); + for error in &mut deepest_errors { + error.context.append(&mut contexts.clone()); + } + (pos, deepest_errors) + } + ErrorTree::Alt(_error_tree) => { + let (pos, mut deepest_errors) = get_deepest_errors(base); + let contexts = context_strs(contexts); + for error in &mut deepest_errors { + error.context.append(&mut contexts.clone()); + } + (pos, deepest_errors) + } + } + } + ErrorTree::Alt(vec) => { + let mut return_vec: Vec = Vec::new(); + let mut deepest_pos = Position::default(); + for error in vec { + let (pos, mut deepest_errors) = get_deepest_errors(error); + if pos > deepest_pos { + deepest_pos = pos; + return_vec.clear(); + return_vec.append(&mut deepest_errors); + } else if pos == deepest_pos { + return_vec.append(&mut deepest_errors); + } + } + (deepest_pos, return_vec) + } } } -/// A combinator that makes sure all input has been consumed. -pub fn all_input_consumed<'a, T: 'a>( - parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, -) -> impl FnMut(&'a str) -> Result + 'a { - let mut p = all_consuming(parser); - move |input| { - let input = Span::new(input); - p(input).map(|(_, result)| result).map_err(|e| match e { - Err::Incomplete(e) => ParseError::MissingInput(match e { - nom::Needed::Unknown => "expected an unknown amount of further input".to_string(), - nom::Needed::Size(size) => format!("expected at least {size} more bytes"), - }) - .at(input), - Err::Error(e) | Err::Failure(e) => e, +fn context_strs(contexts: &Vec<(Input<'_, '_>, StackContext)>) -> Vec { + contexts + .iter() + .map(|(_, c)| match c { + StackContext::Kind(_) => todo!(), + StackContext::Context(c) => *c, }) - } + .collect() } -/// A combinator that recognises a comment, starting at a `%` -/// character and ending at the end of the line. -pub fn comment(input: Span) -> IntermediateResult<()> { - alt(( - value((), pair(tag("%"), is_not("\n\r"))), - // a comment that immediately precedes the end of the line – - // this must come after the normal line comment above - value((), tag("%")), - ))(input) +pub(crate) fn context<'a, 's, P, E, F, O>( + context: P, + mut f: F, +) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> +where + P: Clone, + F: Parser, O, E>, + E: ContextError, P>, +{ + move |i| match f.parse(i.clone()) { + Ok(o) => Ok(o), + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Err(nom::Err::Error(e)) => Err(nom::Err::Error(E::add_context(i, context.clone(), e))), + Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(E::add_context(i, context.clone(), e))), + } } -/// A combinator that recognises an arbitrary amount of whitespace and -/// comments. -pub fn multispace_or_comment0(input: Span) -> IntermediateResult<()> { - value((), many0(alt((value((), multispace1), comment))))(input) +fn wsoc0<'a, 's, E>(input: Input<'a, 's>) -> IResult, Option>, E> +where + E: ParseError> + ContextError, Context>, +{ + many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { + if vec.is_empty() { + (rest_input, None) + } else { + ( + rest_input, + Some(Wsoc { + span: outer_span(input.input, rest_input.input), + token: vec, + }), + ) + } + }) } -/// A combinator that recognises any non-empty amount of whitespace -/// and comments. -pub fn multispace_or_comment1(input: Span) -> IntermediateResult<()> { - value((), many1(alt((value((), multispace1), comment))))(input) +fn wsoc1<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Wsoc<'a>, E> { + many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { + ( + rest_input, + Wsoc { + span: outer_span(input.input, rest_input.input), + token: vec, + }, + ) + }) } -/// A combinator that modifies the associated error. -pub fn map_error<'a, T: 'a>( - mut parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, - mut error: impl FnMut() -> ParseError + 'a, -) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a { - move |input| { - parser(input).map_err(|e| match e { - Err::Incomplete(_) => e, - Err::Error(context) => { - let mut err = error().at(input); - err.append(context); - Err::Error(err) - } - Err::Failure(context) => { - let mut err = error().at(input); - err.append(context); - Err::Failure(err) - } - }) +/// Parse a full program consisting of directives, facts, rules and comments. +fn parse_program<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> (Program<'a>, Vec) { + let result = context( + Context::Program, + pair( + opt(lex_toplevel_doc_comment::>>), + delimited( + multispace0, + many0(recover( + report_error(delimited( + multispace0, + alt(( + // TODO: Discuss wether directives should only get parsed at the beginning of the source file + parse_rule, + parse_fact, + parse_directive, + parse_comment, + )), + multispace0, + )), + "failed to parse statement", + Context::Program, + input.parser_state, + )), + multispace0, + ), + ), + )(input); + match result { + Ok((rest_input, (tl_doc_comment, statements))) => { + if !rest_input.input.is_empty() { + panic!("Parser did not consume all input. This is considered a bug. Please report it. Unparsed input is: {:?}", rest_input); + }; + ( + Program { + span: input.input, + tl_doc_comment, + statements, + }, + rest_input.parser_state.errors.take(), + ) + } + Err(e) => panic!( + "Parser can't fail. If it fails it's a bug! Please report it. Got: {:?}", + e + ), } } -/// A combinator that creates a parser for a specific token. -pub fn token<'a>(token: &'a str) -> impl FnMut(Span<'a>) -> IntermediateResult> { - map_error(tag(token), || ParseError::ExpectedToken(token.to_string())) +/// This function takes a `&str` of source code (for example by loading a file) and +/// produces an AST and potentially a Vector with Errors +pub fn parse_program_str(input: &str) -> (Program<'_>, Vec) { + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input: Span::new(input), + parser_state, + }; + parse_program::>>(input) } -/// A combinator that creates a parser for a specific token, -/// surrounded by whitespace or comments. -pub fn space_delimited_token<'a>( - token: &'a str, -) -> impl FnMut(Span<'a>) -> IntermediateResult> { - map_error( - delimited(multispace_or_comment0, tag(token), multispace_or_comment0), - || ParseError::ExpectedToken(token.to_string()), - ) +/// Parse a fact directly +pub fn parse_fact_str(_input: &str) -> (Fact<'_>, Vec) { + todo!("parse fact directly from string input") } -/// Expand a prefix. -fn resolve_prefix<'a>( - prefixes: &'a HashMap<&'a str, &'a str>, - prefix: &'a str, -) -> Result<&'a str, ParseError> { - prefixes - .get(prefix) - .copied() - .ok_or_else(|| ParseError::UndeclaredPrefix(prefix.to_string())) +/// Parse normal comments that start with a `%` and ends at the line ending. +fn parse_comment<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Statement<'a>, E> { + lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) } -/// Expand a prefixed name. -fn resolve_prefixed_name( - prefixes: &HashMap<&str, &str>, - name: sparql::Name, -) -> Result { - match name { - sparql::Name::IriReference(iri) => Ok(iri.to_string()), - sparql::Name::PrefixedName { prefix, local } => { - resolve_prefix(prefixes, prefix).map(|iri| format!("{iri}{local}")) - } - sparql::Name::BlankNode(label) => Ok(format!("_:{label}")), +/// Parse a fact of the form `predicateName(term1, term2, …).` +fn parse_fact<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Statement<'a>, E> { + // dbg!(&input.parser_state.labels); + context( + Context::Fact, + tuple((opt(lex_doc_comment), parse_fact_atom, wsoc0, dot)), + )(input) + .map(|(rest_input, (doc_comment, atom, _ws, dot))| { + ( + rest_input, + Statement::Fact { + span: outer_span(input.input, rest_input.input), + doc_comment, + fact: atom, + dot, + }, + ) + }) +} + +fn parse_fact_atom<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Fact<'a>, E> { + // TODO: Add Context + match parse_named_tuple::(input) { + Ok((rest_input, named_tuple)) => Ok((rest_input, Fact::NamedTuple(named_tuple))), + Err(_) => match parse_map::(input) { + Ok((rest_input, map)) => Ok((rest_input, Fact::Map(map))), + Err(err) => Err(err), + }, } } -/// Resolve prefixes in a [turtle::RdfLiteral]. -fn resolve_prefixed_rdf_literal( - prefixes: &HashMap<&str, &str>, - literal: turtle::RdfLiteral, -) -> Result { - match literal { - turtle::RdfLiteral::LanguageString { value, tag } => Ok( - AnyDataValue::new_language_tagged_string(value.to_string(), tag.to_string()), - ), - turtle::RdfLiteral::DatatypeValue { value, datatype } => { - AnyDataValue::new_from_typed_literal( - value.to_string(), - resolve_prefixed_name(prefixes, datatype) - .expect("prefix should have been registered during parsing"), +/// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` +fn parse_rule<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Statement<'a>, E> { + context( + Context::Rule, + tuple(( + opt(lex_doc_comment), + parse_head, + wsoc0, + arrow, + wsoc0, + parse_body, + wsoc0, + dot, + )), + )(input) + .map( + |(rest_input, (doc_comment, head, _ws1, arrow, _ws2, body, _ws3, dot))| { + ( + rest_input, + Statement::Rule { + span: outer_span(input.input, rest_input.input), + doc_comment, + head, + arrow, + body, + dot, + }, ) - } - } + }, + ) +} + +/// Parse the head atoms of a rule. +fn parse_head<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, List<'a, Atom<'a>>, E> { + context(Context::RuleHead, parse_list(parse_atoms))(input) +} + +/// Parse the body atoms of a rule. +fn parse_body<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, List<'a, Atom<'a>>, E> { + context(Context::RuleBody, parse_list(parse_atoms))(input) } -#[traced("parser")] -pub(crate) fn parse_bare_name(input: Span<'_>) -> IntermediateResult> { - map_error( - recognize(pair( - alpha1, - opt(many1(satisfy(|c| { - ['0'..='9', 'a'..='z', 'A'..='Z', '-'..='-', '_'..='_'] - .iter() - .any(|range| range.contains(&c)) - }))), +/// Parse the directives (@base, @prefix, @import, @export, @output). +fn parse_directive<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Statement<'a>, E> { + context( + Context::Directive, + alt(( + parse_base_directive, + parse_prefix_directive, + parse_import_directive, + parse_export_directive, + parse_output_directive, )), - || ParseError::ExpectedBareName, )(input) + .map(|(rest, directive)| (rest, Statement::Directive(directive))) } -#[traced("parser")] -fn parse_simple_name(input: Span<'_>) -> IntermediateResult> { - map_error( - recognize(pair( - alpha1, - opt(preceded( - many0(tag(" ")), - separated_list1( - many1(tag(" ")), - many1(satisfy(|c| { - ['0'..='9', 'a'..='z', 'A'..='Z', '_'..='_'] - .iter() - .any(|range| range.contains(&c)) - })), - ), +/// Parse the base directive. +fn parse_base_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Directive<'a>, E> { + context( + Context::DirectiveBase, + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_tag, |token| *token.fragment() == "base"), )), + wsoc0, + lex_iri, + wsoc0, + dot, )), - || ParseError::ExpectedBareName, )(input) + .map( + |(rest_input, (doc_comment, _kw, _ws1, base_iri, _ws2, dot))| { + ( + rest_input, + Directive::Base { + span: outer_span(input.input, rest_input.input), + doc_comment, + base_iri, + dot, + }, + ) + }, + ) } -/// Parse an IRI representing a constant. -fn parse_iri_constant<'a>( - prefixes: &'a RefCell>, -) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { - map_error( - move |input| { - let (remainder, name) = traced( - "parse_iri_constant", - alt(( - map(sparql::iriref, |name| sparql::Name::IriReference(&name)), - sparql::prefixed_name, - sparql::blank_node_label, - map(parse_bare_name, |name| sparql::Name::IriReference(&name)), - )), - )(input)?; - - let resolved = resolve_prefixed_name(&prefixes.borrow(), name) - .map_err(|e| Err::Failure(e.at(input)))?; - - Ok((remainder, AnyDataValue::new_iri(resolved))) +/// Parse the prefix directive. +fn parse_prefix_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Directive<'a>, E> { + context( + Context::DirectivePrefix, + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_tag, |token| *token.fragment() == "prefix"), + )), + wsoc0, + recognize(pair(opt(lex_tag), colon)), + wsoc0, + lex_iri, + wsoc0, + dot, + )), + )(input) + .map( + |(rest_input, (doc_comment, _kw, _ws1, prefix, _ws2, prefix_iri, _ws3, dot))| { + ( + rest_input, + Directive::Prefix { + span: outer_span(input.input, rest_input.input), + doc_comment, + prefix: prefix.input, + prefix_iri, + dot, + }, + ) }, - || ParseError::ExpectedIriConstant, ) } -fn parse_constant_term<'a>( - prefixes: &'a RefCell>, -) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { - traced( - "parse_constant_term", - alt(( - parse_iri_constant(prefixes), - turtle::numeric_literal, - map_res(turtle::rdf_literal, move |literal| { - resolve_prefixed_rdf_literal(&prefixes.borrow(), literal) - }), - map(turtle::string, move |literal| { - AnyDataValue::new_plain_string(literal.to_string()) - }), +/// Parse the import directive. +fn parse_import_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Directive<'a>, E> { + context( + Context::DirectiveImport, + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_tag, |token| *token.fragment() == "import"), + )), + wsoc1, + lex_tag, + wsoc0, + arrow, + wsoc0, + parse_map, + wsoc0, + dot, )), + )(input) + .map( + |(rest_input, (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot))| { + ( + rest_input, + Directive::Import { + span: outer_span(input.input, rest_input.input), + doc_comment, + predicate, + arrow, + map, + dot, + }, + ) + }, ) } -/// Parse a ground term. -pub fn parse_ground_term<'a>( - prefixes: &'a RefCell>, -) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, PrimitiveTerm> { - traced( - "parse_ground_term", - map_error( - map(parse_constant_term(prefixes), PrimitiveTerm::GroundTerm), - || ParseError::ExpectedGroundTerm, - ), +/// Parse the export directive. +fn parse_export_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Directive<'a>, E> { + context( + Context::DirectiveExport, + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_tag, |token| *token.fragment() == "export"), + )), + wsoc1, + lex_tag, + wsoc0, + arrow, + wsoc0, + parse_map, + wsoc0, + dot, + )), + )(input) + .map( + |(rest_input, (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot))| { + ( + rest_input, + Directive::Export { + span: outer_span(input.input, rest_input.input), + doc_comment, + predicate, + arrow, + map, + dot, + }, + ) + }, ) } -/// The main parser. Holds a hash map for -/// prefixes, as well as the base IRI. -#[derive(Debug, Default)] -pub struct RuleParser<'a> { - /// The base IRI, if set. - base: RefCell>, - /// A map from Prefixes to IRIs. - prefixes: RefCell>, - /// Number counting up for generating distinct wildcards. - wildcard_generator: RefCell, +/// Parse the output directive. +fn parse_output_directive< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Directive<'a>, E> { + context( + Context::DirectiveOutput, + tuple(( + opt(lex_doc_comment), + recognize(pair( + at, + verify(lex_tag, |token| *token.fragment() == "output"), + )), + wsoc1, + opt(parse_list(lex_tag)), + wsoc0, + dot, + )), + )(input) + .map( + |(rest_input, (doc_comment, _kw, _ws1, predicates, _ws2, dot))| { + ( + rest_input, + Directive::Output { + span: outer_span(input.input, rest_input.input), + doc_comment, + predicates, + dot, + }, + ) + }, + ) } -impl<'a> RuleParser<'a> { - /// Construct a new [RuleParser]. - pub fn new() -> Self { - Default::default() - } - - fn parse_complex_constant_term( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { - traced( - "parse_complex_constant_term", - // Note: The explicit |s| in the cases below is important to enable proper type - // reasoning in rust. Without it, unresolved opaque types appear in the recursion. - alt(( - parse_constant_term(&self.prefixes), - map(|s| self.parse_tuple_literal()(s), AnyDataValue::from), - map(|s| self.parse_map_literal()(s), AnyDataValue::from), +// /// Parse a list of `ident1, ident2, …` +// fn parse_identifier_list<'a, 's, E: ParseError> + ContextError, Context>>( +// input: Input<'a, 's>, +// ) -> IResult, List<'a, Token<'a>>, E> { +// pair( +// lex_ident, +// many0(tuple(( +// opt(lex_whitespace), +// comma, +// opt(lex_whitespace), +// lex_ident, +// ))), +// )(input) +// .map(|(rest_input, (first, rest))| { +// ( +// rest_input, +// List { +// span: outer_span(input.input, rest_input.input), +// first, +// rest: if rest.is_empty() { None } else { Some(rest) }, +// }, +// ) +// }) +// } + +fn parse_list<'a, 's, T, E: ParseError> + ContextError, Context>>( + parse_t: fn(Input<'a, 's>) -> IResult, T, E>, +) -> impl Fn(Input<'a, 's>) -> IResult, List<'a, T>, E> { + move |input: Input<'a, 's>| { + context( + Context::List, + tuple(( + parse_t, + many0(tuple((wsoc0, comma, wsoc0, parse_t))), + pair(wsoc0, opt(comma)), )), - ) - } - - /// Parse the dot that ends declarations, optionally surrounded by spaces. - fn parse_dot(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_dot", space_delimited_token(".")) - } - - /// Parse a comma, optionally surrounded by spaces. - fn parse_comma(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_comma", space_delimited_token(",")) + )(input) + .map(|(rest_input, (first, rest, (_, trailing_comma)))| { + ( + rest_input, + List { + span: outer_span(input.input, rest_input.input), + first, + rest: if rest.is_empty() { + None + } else { + Some( + rest.into_iter() + .map(|(_ws1, comma, _ws2, t)| (comma, t)) + .collect(), + ) + }, + trailing_comma, + }, + ) + }) } +} - /// Parse an equality sign, optionally surrounded by spaces. - fn parse_equals(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_equals", space_delimited_token("=")) - } +/// Parse the different atom variants. +fn parse_atoms<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Atom<'a>, E> { + context( + Context::BodyAtoms, + alt(( + parse_normal_atom, + parse_negative_atom, + parse_infix_atom, + parse_map_atom, + )), + )(input) +} - /// Parse a negation sign (`~`), optionally surrounded by spaces. - fn parse_not(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_not", space_delimited_token("~")) - } +/// Parse an atom of the form `predicateName(term1, term2, …)`. +fn parse_normal_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Atom<'a>, E> { + context(Context::PositiveAtom, parse_named_tuple)(input) + .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) +} - /// Parse an arrow (`:-`), optionally surrounded by spaces. - fn parse_arrow(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_arrow", space_delimited_token(":-")) - } +/// Parse an atom of the form `~predicateName(term1, term2, …)`. +fn parse_negative_atom< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Atom<'a>, E> { + context(Context::NegativeAtom, pair(tilde, parse_named_tuple))(input).map( + |(rest_input, (tilde, named_tuple))| { + ( + rest_input, + Atom::Negative { + span: outer_span(input.input, rest_input.input), + neg: tilde, + atom: named_tuple, + }, + ) + }, + ) +} - /// Parse an opening parenthesis, optionally surrounded by spaces. - fn parse_open_parenthesis(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_open_parenthesis", space_delimited_token("(")) - } +/// Parse an "infix atom" of the form `term1 term2`. +/// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. +fn parse_infix_atom<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Atom<'a>, E> { + context( + Context::InfixAtom, + tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), + )(input) + .map(|(rest_input, (lhs, _ws1, operation, _ws2, rhs))| { + ( + rest_input, + Atom::InfixAtom { + span: outer_span(input.input, rest_input.input), + lhs, + operation, + rhs, + }, + ) + }) +} - /// Parse a closing parenthesis, optionally surrounded by spaces. - fn parse_close_parenthesis(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_close_parenthesis", space_delimited_token(")")) - } +/// Parse a tuple like `(int, int, skip)`. A 1-tuple is denoted `(,)` (with a trailing comma) to distinquish it from parenthesised expressions. +fn parse_tuple<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Tuple<'a>, E> { + context( + Context::Tuple, + tuple(( + open_paren, + wsoc0, + opt(parse_list(parse_term)), + wsoc0, + close_paren, + )), + )(input) + .map( + |(rest_input, (open_paren, _ws1, terms, _ws2, close_paren))| { + ( + rest_input, + Tuple { + span: outer_span(input.input, rest_input.input), + open_paren, + terms, + close_paren, + }, + ) + }, + ) +} - /// Matches an opening parenthesis, - /// then gets an object from the parser, - /// and finally matches an closing parenthesis. - pub fn parenthesised<'b, O, F>( - &'a self, - parser: F, - ) -> impl FnMut(Span<'a>) -> IntermediateResult - where - O: Debug + 'a, - F: FnMut(Span<'a>) -> IntermediateResult + 'a, - { - traced( - "parenthesised", - map_error( - delimited( - self.parse_open_parenthesis(), - parser, - self.parse_close_parenthesis(), - ), - || ParseError::ExpectedParenthesisedExpression, - ), +/// Parse a named tuple. This function is like `parse_tuple` with the difference, +/// that is enforces the existence of an identifier for the tuple. +fn parse_named_tuple< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, NamedTuple<'a>, E> { + context( + Context::NamedTuple, + tuple((alt((lex_prefixed_ident, lex_tag)), wsoc0, parse_tuple)), + )(input) + .map(|(rest_input, (identifier, _ws, tuple))| { + ( + rest_input, + NamedTuple { + span: outer_span(input.input, rest_input.input), + identifier, + tuple, + }, ) - } + }) +} - /// Parse an opening brace, optionally surrounded by spaces. - fn parse_open_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_open_brace", space_delimited_token("{")) - } +/// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. +/// Inside the curly braces ist a list of pairs. +fn parse_map<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Map<'a>, E> { + context( + Context::Map, + tuple(( + opt(lex_tag), + wsoc0, + open_brace, + wsoc0, + opt(parse_list(parse_pair)), + wsoc0, + close_brace, + )), + )(input) + .map( + |(rest_input, (identifier, _ws1, open_brace, _ws2, pairs, _ws3, close_brace))| { + ( + rest_input, + Map { + span: outer_span(input.input, rest_input.input), + identifier, + open_brace, + pairs, + close_brace, + }, + ) + }, + ) +} - /// Parse a closing brace, optionally surrounded by spaces. - fn parse_close_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_close_brace", space_delimited_token("}")) - } +/// Parse a map in an atom position. +fn parse_map_atom<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Atom<'a>, E> { + parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) +} - /// Parse a base declaration. - fn parse_base(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_base", - map_error( - move |input| { - let (remainder, base) = delimited( - terminated(token("@base"), cut(multispace_or_comment1)), - cut(sparql::iriref), - cut(self.parse_dot()), - )(input)?; - - log::debug!(target: "parser", r#"parse_base: set new base: "{base}""#); - *self.base.borrow_mut() = Some(&base); - - Ok((remainder, Identifier(base.to_string()))) - }, - || ParseError::ExpectedBaseDeclaration, - ), +/// Parse a pair of the form `key = value`. +fn parse_pair<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Pair<'a>, E> { + context( + Context::Pair, + tuple((parse_term, wsoc0, equal, wsoc0, parse_term)), + )(input) + .map(|(rest_input, (key, _ws1, equal, _ws2, value))| { + ( + rest_input, + Pair { + span: outer_span(input.input, rest_input.input), + key, + equal, + value, + }, ) - } + }) +} - fn parse_prefix(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced( - "parse_prefix", - map_error( - move |input| { - let (remainder, (prefix, iri)) = delimited( - terminated(token("@prefix"), cut(multispace_or_comment1)), - cut(tuple(( - cut(terminated(sparql::pname_ns, multispace_or_comment1)), - cut(sparql::iriref), - ))), - cut(self.parse_dot()), - )(input)?; - - log::debug!(target: "parser", r#"parse_prefix: got prefix "{prefix}" for iri "{iri}""#); - if self.prefixes.borrow_mut().insert(&prefix, &iri).is_some() { - Err(Err::Failure( - ParseError::RedeclaredPrefix(prefix.to_string()).at(input), - )) - } else { - Ok((remainder, prefix)) - } - }, - || ParseError::ExpectedPrefixDeclaration, - ), - ) - } - - /// Parse a data source declaration. - /// This is a backwards compatibility feature for Rulewerk syntax. Nemo normally uses - /// `@import` instead of `@source`. The difference in `@source` is that (1) a predicate - /// arity is given in brackets after the predicate name, (2) the import predicate names - /// are one of `load-csv`, `load-tsv`, `load-rdf`, and `sparql`, with the only parameter - /// being the file name or IRI to be loaded. - fn parse_source(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_source", - map_error( - move |input| { - let (remainder, (predicate, arity)) = preceded( - terminated(token("@source"), cut(multispace_or_comment1)), - cut(self.parse_qualified_predicate_name()), - )(input)?; - - let (remainder, datasource): (_, Result<_, ParseError>) = cut(delimited( - delimited(multispace_or_comment0, token(":"), multispace_or_comment1), - alt(( - map( - delimited( - preceded(token("load-csv"), cut(self.parse_open_parenthesis())), - turtle::string, - self.parse_close_parenthesis(), - ), - |filename| { - let attributes = MapDataValue::from_iter([ - ( - AnyDataValue::new_iri( - PARAMETER_NAME_RESOURCE.to_string(), - ), - AnyDataValue::new_plain_string(filename.to_string()), - ), - ( - AnyDataValue::new_iri( - PARAMETER_NAME_FORMAT.to_string(), - ), - TupleDataValue::from_iter( - vec![VALUE_FORMAT_ANY; arity] - .iter() - .map(|format| { - AnyDataValue::new_plain_string( - (*format).to_string(), - ) - }) - .collect::>(), - ) - .into(), - ), - ]); - Ok(ImportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::CSV, - attributes, - })) - }, - ), - map( - delimited( - preceded(token("load-tsv"), cut(self.parse_open_parenthesis())), - turtle::string, - self.parse_close_parenthesis(), - ), - |filename| { - let attributes = MapDataValue::from_iter([ - ( - AnyDataValue::new_iri( - PARAMETER_NAME_RESOURCE.to_string(), - ), - AnyDataValue::new_plain_string(filename.to_string()), - ), - ( - AnyDataValue::new_iri( - PARAMETER_NAME_FORMAT.to_string(), - ), - TupleDataValue::from_iter( - vec![VALUE_FORMAT_ANY; arity] - .iter() - .map(|format| { - AnyDataValue::new_plain_string( - (*format).to_string(), - ) - }) - .collect::>(), - ) - .into(), - ), - ]); - Ok(ImportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::TSV, - attributes, - })) - }, - ), - map( - delimited( - preceded(token("load-rdf"), cut(self.parse_open_parenthesis())), - turtle::string, - self.parse_close_parenthesis(), - ), - |filename| { - let mut attribute_pairs = vec![ - ( - AnyDataValue::new_iri( - PARAMETER_NAME_RESOURCE.to_string(), - ), - AnyDataValue::new_plain_string(filename.to_string()), - ), - ( - AnyDataValue::new_iri( - PARAMETER_NAME_FORMAT.to_string(), - ), - TupleDataValue::from_iter( - vec![VALUE_FORMAT_ANY; arity] - .iter() - .map(|format| { - AnyDataValue::new_plain_string( - (*format).to_string(), - ) - }) - .collect::>(), - ) - .into(), - ), - ]; - if let Some(base) = self.base() { - attribute_pairs.push(( - AnyDataValue::new_iri(PARAMETER_NAME_BASE.to_string()), - AnyDataValue::new_iri(base.to_string()), - )); - } - - let attributes = MapDataValue::from_iter(attribute_pairs); - - Ok(ImportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::RDF(RdfVariant::Unspecified), - attributes, - })) - }, - ), - map( - delimited( - preceded(token("sparql"), cut(self.parse_open_parenthesis())), - tuple(( - self.parse_iri_identifier(), - delimited( - self.parse_comma(), - turtle::string, - self.parse_comma(), - ), - turtle::string, - )), - self.parse_close_parenthesis(), - ), - |(_endpoint, _projection, _query)| { - Err(ParseError::UnsupportedSparqlSource(predicate.clone().0)) - }, - ), - )), - cut(self.parse_dot()), - ))( - remainder - )?; - - let spec = datasource.map_err(|e| Err::Failure(e.at(input)))?; - - Ok((remainder, spec)) - }, - || ParseError::ExpectedDataSourceDeclaration, - ), - ) - } - - /// Parse an output directive. - fn parse_output_directive(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_output", - map_error( - delimited( - terminated(token("@output"), cut(multispace_or_comment1)), - cut(map_res::<_, _, _, _, Error, _, _>( - self.parse_iri_like_identifier(), - Ok, - )), - cut(self.parse_dot()), - ), - || ParseError::ExpectedOutputDeclaration, - ), - ) - } - - /// Parse an entry in a [MapDataValue], i.e., am [AnyDataValue]--[AnyDataValue] pair. - fn parse_map_entry( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<(AnyDataValue, AnyDataValue)> { - traced( - "parse_map_entry", - separated_pair( - self.parse_complex_constant_term(), - self.parse_equals(), - map(self.parse_complex_constant_term(), |term| term), - ), - ) - } - - /// Parse a ground map literal. - fn parse_map_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_map_literal", - delimited( - self.parse_open_brace(), - map( - separated_list0(self.parse_comma(), self.parse_map_entry()), - MapDataValue::from_iter, - ), - self.parse_close_brace(), - ), - ) - } - - /// Parse a ground tuple literal. - pub fn parse_tuple_literal( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_tuple_literal", - delimited( - self.parse_open_parenthesis(), - map( - separated_list0(self.parse_comma(), self.parse_complex_constant_term()), - TupleDataValue::from_iter, - ), - self.parse_close_parenthesis(), - ), - ) - } - - /// Parse a file format name. - fn parse_file_format(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced("parse_file_format", move |input| { - let (remainder, format) = - map_res(alpha1, |format: Span<'a>| match *format.fragment() { - FILE_FORMAT_CSV => Ok(FileFormat::CSV), - FILE_FORMAT_DSV => Ok(FileFormat::DSV), - FILE_FORMAT_TSV => Ok(FileFormat::TSV), - FILE_FORMAT_RDF_UNSPECIFIED => Ok(FileFormat::RDF(RdfVariant::Unspecified)), - FILE_FORMAT_RDF_NTRIPLES => Ok(FileFormat::RDF(RdfVariant::NTriples)), - FILE_FORMAT_RDF_NQUADS => Ok(FileFormat::RDF(RdfVariant::NQuads)), - FILE_FORMAT_RDF_TURTLE => Ok(FileFormat::RDF(RdfVariant::Turtle)), - FILE_FORMAT_RDF_TRIG => Ok(FileFormat::RDF(RdfVariant::TriG)), - FILE_FORMAT_RDF_XML => Ok(FileFormat::RDF(RdfVariant::RDFXML)), - FILE_FORMAT_JSON => Ok(FileFormat::JSON), - _ => Err(ParseError::FileFormatError(format.fragment().to_string())), - })(input)?; - - Ok((remainder, format)) - }) - } - - /// Parse an import/export specification. - fn parse_import_export_spec( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced("parse_import_export_spec", move |input| { - let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; - let (remainder, format) = delimited( - space_delimited_token(":-"), - self.parse_file_format(), - multispace_or_comment0, - )(remainder)?; - let (remainder, attributes) = self.parse_map_literal()(remainder)?; - Ok(( - remainder, - ImportExportDirective { - predicate, - format, - attributes, - }, - )) - }) - } - - /// Parse an import directive. - fn parse_import(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_import", - delimited( - terminated(token("@import"), multispace_or_comment1), - cut(map(self.parse_import_export_spec(), ImportDirective::from)), - cut(self.parse_dot()), - ), - ) - } - - /// Parse an export directive. - fn parse_export(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_export", - delimited( - terminated(token("@export"), multispace_or_comment1), - cut(map(self.parse_import_export_spec(), ExportDirective::from)), - cut(self.parse_dot()), - ), - ) - } - - /// Parse a statement. - fn parse_statement(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_statement", - map_error( - alt(( - map(self.parse_fact(), Statement::Fact), - map(self.parse_rule(), Statement::Rule), - )), - || ParseError::ExpectedStatement, - ), - ) - } - - /// Parse a fact. - fn parse_fact(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_fact", - map_error( - move |input| { - let (remainder, (predicate, terms)) = terminated( - pair( - self.parse_iri_like_identifier(), - self.parenthesised(separated_list1( - self.parse_comma(), - parse_ground_term(&self.prefixes), - )), - ), - self.parse_dot(), - )(input)?; - - let predicate_name = predicate.name(); - log::trace!(target: "parser", "found fact {predicate_name}({terms:?})"); - - // We do not allow complex term trees in facts for now - let terms = terms.into_iter().map(Term::Primitive).collect(); - - Ok((remainder, Fact(Atom::new(predicate, terms)))) - }, - || ParseError::ExpectedFact, - ), - ) - } - - /// Parse an IRI identifier, e.g. for predicate names. - fn parse_iri_identifier(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - map_error( - move |input| { - let (remainder, name) = traced( - "parse_iri_identifier", - alt(( - map(sparql::iriref, |name| sparql::Name::IriReference(&name)), - sparql::prefixed_name, - sparql::blank_node_label, - )), - )(input)?; - - Ok(( - remainder, - Identifier( - resolve_prefixed_name(&self.prefixes.borrow(), name) - .map_err(|e| Err::Failure(e.at(input)))?, - ), - )) - }, - || ParseError::ExpectedIriIdentifier, - ) - } - - /// Parse an IRI-like identifier. - /// - /// This is being used for: - /// * predicate names - /// * built-in functions in term trees - fn parse_iri_like_identifier( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_iri_like_identifier", - map_error( - alt(( - self.parse_iri_identifier(), - self.parse_bare_iri_like_identifier(), - )), - || ParseError::ExpectedIriLikeIdentifier, - ), - ) - } - - /// Parse a qualified predicate name – currently, this is a - /// predicate name together with its arity. - /// - /// FIXME: Obsolete. Can be removed in the future. - fn parse_qualified_predicate_name( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<(Identifier, usize)> { - traced( - "parse_qualified_predicate_name", - pair( - self.parse_iri_like_identifier(), - preceded( - multispace_or_comment0, - delimited( - token("["), - cut(map_res(digit1, |number: Span<'a>| number.parse::())), - cut(token("]")), - ), - ), - ), - ) - } - - /// Parse an IRI-like identifier (e.g. a predicate name) that is not an IRI. - fn parse_bare_iri_like_identifier( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced("parse_bare_iri_like_identifier", move |input| { - let (remainder, name) = parse_bare_name(input)?; - - Ok((remainder, Identifier(name.to_string()))) - }) - } - - /// Parse a rule. - fn parse_rule(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_rule", - map_error( - move |input| { - let (remainder, (head, body)) = pair( - terminated( - separated_list1(self.parse_comma(), self.parse_atom()), - self.parse_arrow(), - ), - cut(terminated( - separated_list1(self.parse_comma(), self.parse_body_expression()), - self.parse_dot(), - )), - )(input)?; - - log::trace!(target: "parser", r#"found rule "{head:?}" :- "{body:?}""#); - - let literals = body - .iter() - .filter_map(|expr| match expr { - BodyExpression::Literal(l) => Some(l.clone()), - _ => None, - }) - .collect(); - let constraints = body - .into_iter() - .filter_map(|expr| match expr { - BodyExpression::Constraint(c) => Some(c), - _ => None, - }) - .collect(); - Ok(( - remainder, - Rule::new_validated(head, literals, constraints) - .map_err(|e| Err::Failure(e.at(input)))?, - )) - }, - || ParseError::ExpectedRule, - ), - ) - } - - /// Parse an atom. - fn parse_atom(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_atom", - map_error( - move |input| { - let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; - let (remainder, terms) = delimited( - self.parse_open_parenthesis(), - cut(separated_list1(self.parse_comma(), self.parse_term())), - cut(self.parse_close_parenthesis()), - )(remainder)?; - - let predicate_name = predicate.name(); - log::trace!(target: "parser", "found atom {predicate_name}({terms:?})"); - - Ok((remainder, Atom::new(predicate, terms))) - }, - || ParseError::ExpectedAtom, - ), - ) - } - - /// Parse a [PrimitiveTerm]. - fn parse_primitive_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_primitive_term", - map_error( - alt((parse_ground_term(&self.prefixes), self.parse_variable())), - || ParseError::ExpectedPrimitiveTerm, - ), - ) - } - - /// Parse an aggregate term. - fn parse_aggregate(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_aggregate", - map_error( - move |input| { - let (remainder, _) = nom::character::complete::char('#')(input)?; - let (remainder, aggregate_operation_identifier) = - self.parse_bare_iri_like_identifier()(remainder)?; - let (remainder, terms) = self - .parenthesised(separated_list1(self.parse_comma(), self.parse_term()))( - remainder, - )?; - - if let Some(logical_aggregate_operation) = - (&aggregate_operation_identifier).into() - { - let aggregate = Aggregate { - logical_aggregate_operation, - terms, - }; - - Ok((remainder, Term::Aggregation(aggregate))) - } else { - Err(Err::Failure( - ParseError::UnknownAggregateOperation( - aggregate_operation_identifier.name(), - ) - .at(input), - )) - } - }, - || ParseError::ExpectedAggregate, - ), - ) - } - - /// Parse a variable. - fn parse_variable(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_variable", - map_error( - map( - alt(( - self.parse_universal_variable(), - self.parse_existential_variable(), - )), - PrimitiveTerm::Variable, - ), - || ParseError::ExpectedVariable, - ), - ) - } - - /// Parse a universally quantified variable. - fn parse_universal_variable(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_universal_variable", - map_error( - map( - preceded(token("?"), cut(self.parse_variable_name())), - Variable::Universal, - ), - || ParseError::ExpectedUniversalVariable, - ), - ) - } - - /// Parse an existentially quantified variable. - fn parse_existential_variable( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_existential_variable", - map_error( - map( - preceded(token("!"), cut(self.parse_variable_name())), - Variable::Existential, - ), - || ParseError::ExpectedExistentialVariable, - ), - ) - } - - /// Parse a variable name. - fn parse_variable_name(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_variable", - map_error( - move |input| { - let (remainder, name) = parse_simple_name(input)?; - - Ok((remainder, name.to_string())) - }, - || ParseError::ExpectedVariableName, - ), - ) - } - - /// Parse a literal (i.e., a possibly negated atom). - fn parse_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_literal", - map_error( - alt((self.parse_negative_literal(), self.parse_positive_literal())), - || ParseError::ExpectedLiteral, - ), - ) - } - - /// Parse a non-negated literal. - fn parse_positive_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_positive_literal", - map_error(map(self.parse_atom(), Literal::Positive), || { - ParseError::ExpectedPositiveLiteral - }), - ) - } - - /// Parse a negated literal. - fn parse_negative_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_negative_literal", - map_error( - map( - preceded(self.parse_not(), cut(self.parse_atom())), - Literal::Negative, - ), - || ParseError::ExpectedNegativeLiteral, - ), - ) - } - - /// Parse operation that is filters a variable - fn parse_constraint_operator( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_constraint_operator", - map_error( - delimited( - multispace_or_comment0, - alt(( - value(ConstraintOperator::LessThanEq, token("<=")), - value(ConstraintOperator::LessThan, token("<")), - value(ConstraintOperator::Equals, token("=")), - value(ConstraintOperator::Unequals, token("!=")), - value(ConstraintOperator::GreaterThanEq, token(">=")), - value(ConstraintOperator::GreaterThan, token(">")), - )), - multispace_or_comment0, - ), - || ParseError::ExpectedFilterOperator, - ), - ) - } - - /// Parse a term tree. - /// - /// This may consist of: - /// * A function term - /// * An arithmetic expression, which handles e.g. precedence of addition over multiplication - fn parse_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_term", - map_error( - move |input| { - delimited( - multispace_or_comment0, - alt(( - self.parse_arithmetic_expression(), - // map(self.parse_constraint(), |c| c.as_binary_term()), - self.parse_parenthesised_term(), - self.parse_function_term(), - self.parse_aggregate(), - self.parse_wildcard(), - )), - multispace_or_comment0, - )(input) - }, - || ParseError::ExpectedTerm, - ), - ) - } - - /// Parse a wildcard variable. - fn parse_wildcard(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_wildcard", - map_res(space_delimited_token("_"), |_| { - let wildcard = Variable::new_unamed(*self.wildcard_generator.borrow()); - *self.wildcard_generator.borrow_mut() += 1; - Ok::<_, ParseError>(Term::Primitive(PrimitiveTerm::Variable(wildcard))) - }), - ) - } - - /// Parse a parenthesised term tree. - fn parse_parenthesised_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_parenthesised_term", - map_error(self.parenthesised(self.parse_term()), || { - ParseError::ExpectedParenthesisedTerm - }), - ) - } - - /// Parse a function term, possibly with nested term trees. - fn parse_function_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_function_term", - map_error( - move |input| { - let (remainder, name) = self.parse_iri_like_identifier()(input)?; - - if let Ok(op) = UnaryOperation::construct_from_name(&name.0) { - let (remainder, subterm) = - (self.parenthesised(self.parse_term()))(remainder)?; - - Ok((remainder, Term::Unary(op, Box::new(subterm)))) - } else if let Some(op) = BinaryOperation::construct_from_name(&name.0) { - let (remainder, (left, _, right)) = (self.parenthesised(tuple(( - self.parse_term(), - self.parse_comma(), - self.parse_term(), - ))))(remainder)?; - - Ok(( - remainder, - Term::Binary { - operation: op, - lhs: Box::new(left), - rhs: Box::new(right), - }, - )) - } else if let Some(op) = TernaryOperation::construct_from_name(&name.0) { - let (remainder, (first, _, second, _, third)) = - (self.parenthesised(tuple(( - self.parse_term(), - self.parse_comma(), - self.parse_term(), - self.parse_comma(), - self.parse_term(), - ))))(remainder)?; - - Ok(( - remainder, - Term::Ternary { - operation: op, - first: Box::new(first), - second: Box::new(second), - third: Box::new(third), - }, - )) - } else if let Some(op) = NaryOperation::construct_from_name(&name.0) { - let (remainder, subterms) = (self.parenthesised(separated_list0( - self.parse_comma(), - self.parse_term(), - )))(remainder)?; - - Ok(( - remainder, - Term::Nary { - operation: op, - parameters: subterms, - }, - )) - } else { - let (remainder, subterms) = (self.parenthesised(separated_list0( - self.parse_comma(), - self.parse_term(), - )))(remainder)?; - - Ok((remainder, Term::Function(name, subterms))) - } - }, - || ParseError::ExpectedFunctionTerm, - ), - ) - } - - /// Parse an arithmetic expression - fn parse_arithmetic_expression(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_arithmetic_expression", - map_error( - move |input| { - let (remainder, first) = self.parse_arithmetic_product()(input)?; - let (remainder, expressions) = many0(alt(( - preceded( - delimited(multispace_or_comment0, token("+"), multispace_or_comment0), - map(self.parse_arithmetic_product(), |term| { - (ArithmeticOperator::Addition, term) - }), - ), - preceded( - delimited(multispace_or_comment0, token("-"), multispace_or_comment0), - map(self.parse_arithmetic_product(), |term| { - (ArithmeticOperator::Subtraction, term) - }), - ), - )))(remainder)?; - - Ok(( - remainder, - Self::fold_arithmetic_expressions(first, expressions), - )) - }, - || ParseError::ExpectedArithmeticExpression, - ), - ) - } - - /// Parse an arithmetic product, i.e., an expression involving - /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_arithmetic_product", - map_error( - move |input| { - let (remainder, first) = self.parse_arithmetic_factor()(input)?; - let (remainder, factors) = many0(alt(( - preceded( - delimited(multispace_or_comment0, token("*"), multispace_or_comment0), - map(self.parse_arithmetic_factor(), |term| { - (ArithmeticOperator::Multiplication, term) - }), - ), - preceded( - delimited(multispace_or_comment0, token("/"), multispace_or_comment0), - map(self.parse_arithmetic_factor(), |term| { - (ArithmeticOperator::Division, term) - }), - ), - )))(remainder)?; - - Ok((remainder, Self::fold_arithmetic_expressions(first, factors))) - }, - || ParseError::ExpectedArithmeticProduct, - ), - ) - } - - /// Parse an arithmetic factor. - fn parse_arithmetic_factor(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_arithmetic_factor", - map_error( - alt(( - self.parse_function_term(), - self.parse_aggregate(), - map(self.parse_primitive_term(), Term::Primitive), - self.parse_parenthesised_term(), - )), - || ParseError::ExpectedArithmeticFactor, - ), - ) - } - - /// Fold a sequence of ([ArithmeticOperator], [PrimitiveTerm]) pairs into a single [Term]. - fn fold_arithmetic_expressions( - initial: Term, - sequence: Vec<(ArithmeticOperator, Term)>, - ) -> Term { - sequence.into_iter().fold(initial, |acc, pair| { - let (operation, expression) = pair; - - use ArithmeticOperator::*; - - let operation = match operation { - Addition => BinaryOperation::NumericAddition, - Subtraction => BinaryOperation::NumericSubtraction, - Multiplication => BinaryOperation::NumericMultiplication, - Division => BinaryOperation::NumericDivision, - }; - - Term::Binary { - operation, - lhs: Box::new(acc), - rhs: Box::new(expression), - } - }) - } - - /// Parse expression of the form ` ` expressing a constraint. - fn parse_constraint(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_constraint", - map_error( - map( - tuple(( - self.parse_term(), - self.parse_constraint_operator(), - cut(self.parse_term()), - )), - |(lhs, operation, rhs)| operation.into_constraint(lhs, rhs), - ), - || ParseError::ExpectedConstraint, - ), - ) - } - - /// Parse body expression - fn parse_body_expression( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_body_expression", - map_error( - alt(( - map(self.parse_constraint(), BodyExpression::Constraint), - map(self.parse_literal(), BodyExpression::Literal), - )), - || ParseError::ExpectedBodyExpression, - ), - ) - } - - /// Parse a program in the rules language. - pub fn parse_program(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - fn check_for_invalid_statement<'a, F>( - parser: &mut F, - input: Span<'a>, - ) -> IntermediateResult<'a, ()> - where - F: FnMut(Span<'a>) -> IntermediateResult, - { - if let Ok((_, e)) = parser(input) { - return Err(Err::Failure(e.at(input))); - } - - Ok((input, ())) - } - - traced("parse_program", move |input| { - let (remainder, _) = multispace_or_comment0(input)?; - let (remainder, _) = opt(self.parse_base())(remainder)?; - - check_for_invalid_statement( - &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), - remainder, - )?; - - let (remainder, _) = many0(self.parse_prefix())(remainder)?; - - check_for_invalid_statement( - &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), - remainder, - )?; - check_for_invalid_statement( - &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), - remainder, - )?; - - let mut statements = Vec::new(); - let mut output_predicates = Vec::new(); - let mut sources = Vec::new(); - let mut imports = Vec::new(); - let mut exports = Vec::new(); - - let (remainder, _) = many0(alt(( - map(self.parse_source(), |source| sources.push(source)), - map(self.parse_import(), |import| imports.push(import)), - map(self.parse_export(), |export| exports.push(export)), - map(self.parse_statement(), |statement| { - statements.push(statement) - }), - map(self.parse_output_directive(), |output_predicate| { - output_predicates.push(output_predicate) - }), - )))(remainder)?; - - check_for_invalid_statement( - &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), - remainder, - )?; - check_for_invalid_statement( - &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), - remainder, - )?; - - let base = self.base().map(String::from); - let prefixes = self - .prefixes - .borrow() - .iter() - .map(|(&prefix, &iri)| (prefix.to_string(), iri.to_string())) - .collect::>(); - let mut rules = Vec::new(); - let mut facts = Vec::new(); - - statements.iter().for_each(|statement| match statement { - Statement::Fact(value) => facts.push(value.clone()), - Statement::Rule(value) => rules.push(value.clone()), - }); - - let mut program_builder = Program::builder() - .prefixes(prefixes) - .imports(sources) - .imports(imports) - .exports(exports) - .rules(rules) - .facts(facts); - - if let Some(base) = base { - program_builder = program_builder.base(base); - } - - if !output_predicates.is_empty() { - program_builder = program_builder.output_predicates(output_predicates); - } - - Ok((remainder, program_builder.build())) - }) - } - - /// Return the declared base, if set, or None. - #[must_use] - fn base(&self) -> Option<&'a str> { - *self.base.borrow() - } -} - -#[cfg(test)] -mod test { - use super::*; - use std::assert_matches::assert_matches; - use test_log::test; - - macro_rules! assert_parse { - ($parser:expr, $left:expr, $right:expr $(,) ?) => { - assert_eq!( - all_input_consumed($parser)($left).expect( - format!("failed to parse `{:?}`\nexpected `{:?}`", $left, $right).as_str() - ), - $right - ); - }; - } - - macro_rules! assert_fails { - ($parser:expr, $left:expr, $right:pat $(,) ?) => {{ - // Store in intermediate variable to prevent from being dropped too early - let result = all_input_consumed($parser)($left); - assert_matches!(result, Err($right)) - }}; - } - - macro_rules! assert_parse_error { - ($parser:expr, $left:expr, $right:pat $(,) ?) => { - assert_fails!($parser, $left, LocatedParseError { source: $right, .. }) - }; - } - - macro_rules! assert_expected_token { - ($parser:expr, $left:expr, $right:expr $(,) ?) => { - let _token = String::from($right); - assert_parse_error!($parser, $left, ParseError::ExpectedToken(_token),); - }; - } - - #[test] - fn base_directive() { - let base = "http://example.org/foo"; - let input = format!("@base <{base}> ."); - let parser = RuleParser::new(); - let b = Identifier(base.to_string()); - assert!(parser.base().is_none()); - assert_parse!(parser.parse_base(), input.as_str(), b); - assert_eq!(parser.base(), Some(base)); - } - - #[test] - fn prefix_directive() { - let prefix = unsafe { Span::new_from_raw_offset(8, 1, "foo", ()) }; - let iri = "http://example.org/foo"; - let input = format!("@prefix {prefix}: <{iri}> ."); - let parser = RuleParser::new(); - assert!(resolve_prefix(&parser.prefixes.borrow(), &prefix).is_err()); - assert_parse!(parser.parse_prefix(), input.as_str(), prefix); - assert_eq!( - resolve_prefix(&parser.prefixes.borrow(), &prefix).map_err(|_| ()), - Ok(iri) - ); - } - - #[test] - #[cfg_attr(miri, ignore)] - fn source() { - /// Helper function to create source-like imports - fn csv_import(predicate: Identifier, filename: &str, arity: i64) -> ImportDirective { - let attributes = MapDataValue::from_iter([ - ( - AnyDataValue::new_iri(PARAMETER_NAME_RESOURCE.to_string()), - AnyDataValue::new_plain_string(filename.to_string()), - ), - ( - AnyDataValue::new_iri(PARAMETER_NAME_FORMAT.to_string()), - TupleDataValue::from_iter( - vec![ - VALUE_FORMAT_ANY; - usize::try_from(arity).expect("required for these tests") - ] - .iter() - .map(|format| AnyDataValue::new_plain_string((*format).to_string())) - .collect::>(), - ) - .into(), - ), - ]); - ImportDirective::from(ImportExportDirective { - predicate, - format: FileFormat::CSV, - attributes, - }) - } - - let parser = RuleParser::new(); - let file = "drinks.csv"; - let predicate_name = "drink"; - let predicate = Identifier(predicate_name.to_string()); - let default_import = csv_import(predicate.clone(), file, 1); - - // rulewerk accepts all of these variants - let input = format!(r#"@source {predicate_name}[1]: load-csv("{file}") ."#); - assert_parse!(parser.parse_source(), &input, default_import); - let input = format!(r#"@source {predicate_name}[1] : load-csv("{file}") ."#); - assert_parse!(parser.parse_source(), &input, default_import); - let input = format!(r#"@source {predicate_name}[1] : load-csv ( "{file}" ) ."#); - assert_parse!(parser.parse_source(), &input, default_import); - let input = format!(r#"@source {predicate_name} [1] : load-csv ( "{file}" ) ."#); - assert_parse!(parser.parse_source(), &input, default_import); - } - - #[test] - fn fact() { - let parser = RuleParser::new(); - let predicate = "p"; - let value = "foo"; - let datatype = "bar"; - let p = Identifier(predicate.to_string()); - let v = value.to_string(); - let t = datatype.to_string(); - let fact = format!(r#"{predicate}("{value}"^^<{datatype}>) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_from_typed_literal(v, t).expect("unknown types should work"), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_namespaced() { - let parser = RuleParser::new(); - let predicate = "p"; - let name = "foo"; - let prefix = unsafe { Span::new_from_raw_offset(8, 1, "eg", ()) }; - let iri = "http://example.org/foo"; - let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); - let p = Identifier(predicate.to_string()); - let pn = format!("{prefix}:{name}"); - let v = format!("{iri}{name}"); - let fact = format!(r#"{predicate}({pn}) ."#); - - assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_iri(v), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_bnode() { - let parser = RuleParser::new(); - let predicate = "p"; - let name = "foo"; - let p = Identifier(predicate.to_string()); - let pn = format!("_:{name}"); - let fact = format!(r#"{predicate}({pn}) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_iri(pn), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_numbers() { - let parser = RuleParser::new(); - let predicate = "p"; - let p = Identifier(predicate.to_string()); - let int = 23_i64; - let dbl = 42.0; - let dec = 13.37; - let fact = format!(r#"{predicate}({int}, {dbl:.1}E0, {dec:.2}) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![ - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(int), - )), - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_double_from_f64(dbl).expect("is not NaN"), - )), - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_double_from_f64(dec).expect("is not NaN"), - )), - ], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_rdf_literal_xsd_string() { - let parser = RuleParser::new(); - - let prefix = unsafe { Span::new_from_raw_offset(8, 1, "xsd", ()) }; - let iri = "http://www.w3.org/2001/XMLSchema#"; - let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); - - assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); - - let predicate = "p"; - let value = "my nice string"; - let datatype = "xsd:string"; - - let p = Identifier(predicate.to_string()); - let v = value.to_string(); - let fact = format!(r#"{predicate}("{value}"^^{datatype}) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_plain_string(v), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_string_literal() { - let parser = RuleParser::new(); - let predicate = "p"; - let value = "my nice string"; - let p = Identifier(predicate.to_string()); - let v = value.to_string(); - let fact = format!(r#"{predicate}("{value}") ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_plain_string(v), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_language_string() { - let parser = RuleParser::new(); - let predicate = "p"; - let v = "Qapla"; - let langtag = "tlh"; - let p = Identifier(predicate.to_string()); - let value = v.to_string(); - let fact = format!(r#"{predicate}("{v}"@{langtag}) ."#); - let tag = langtag.to_string(); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_language_tagged_string(value, tag), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact); - } - - #[test] - fn fact_abstract() { - let parser = RuleParser::new(); - let predicate = "p"; - let name = "a"; - let p = Identifier(predicate.to_string()); - let fact = format!(r#"{predicate}({name}) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_iri(name.to_string()), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_comment() { - let parser = RuleParser::new(); - let predicate = "p"; - let value = "foo"; - let datatype = "bar"; - let p = Identifier(predicate.to_string()); - let v = value.to_string(); - let t = datatype.to_string(); - let fact = format!( - r#"{predicate}(% comment 1 - "{value}"^^<{datatype}> % comment 2 - ) % comment 3 - . % comment 4 - %"# - ); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_from_typed_literal(v, t) - .expect("unknown datatype should always work"), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - #[cfg_attr(miri, ignore)] - fn filter() { - let parser = RuleParser::new(); - let aa = "A"; - let a = Identifier(aa.to_string()); - let bb = "B"; - let b = Identifier(bb.to_string()); - let pp = "P"; - let p = Identifier(pp.to_string()); - let xx = "X"; - let x = xx.to_string(); - let yy = "Y"; - let y = yy.to_string(); - let zz = "Z"; - let z = zz.to_string(); - - let rule = format!( - "{pp}(?{xx}) :- {aa}(?{xx}, ?{yy}), ?{yy} > ?{xx}, {bb}(?{zz}), ?{xx} = 3, ?{zz} < 7, ?{xx} <= ?{zz}, ?{zz} >= ?{yy} ." - ); - - let expected_rule = Rule::new( - vec![Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(x.clone()), - ))], - )], - vec![ - Literal::Positive(Atom::new( - a, - vec![ - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y.clone()))), - ], - )), - Literal::Positive(Atom::new( - b, - vec![Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(z.clone()), - ))], - )), - ], - vec![ - Constraint::GreaterThan( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y.clone()))), - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), - ), - Constraint::Equals( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(3), - )), - ), - Constraint::LessThan( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(7), - )), - ), - Constraint::LessThanEq( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x))), - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), - ), - Constraint::GreaterThanEq( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z))), - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y))), - ), - ], - ); - - assert_parse!(parser.parse_rule(), &rule, expected_rule,); - } - - #[test] - #[allow(clippy::redundant_clone)] - fn parse_output() { - let parser = RuleParser::new(); - - let j2 = Identifier("J2".to_string()); - - assert_parse!(parser.parse_output_directive(), "@output J2 .", j2.clone()); - assert_parse_error!( - parser.parse_output_directive(), - "@output J2[3] .", - ParseError::ExpectedOutputDeclaration - ); - } - - #[test] - fn parse_errors() { - let parser = RuleParser::new(); - - assert_expected_token!(parser.parse_dot(), "", "."); - assert_expected_token!(parser.parse_dot(), ":-", "."); - assert_expected_token!(parser.parse_comma(), "", ","); - assert_expected_token!(parser.parse_comma(), ":-", ","); - assert_expected_token!(parser.parse_not(), "", "~"); - assert_expected_token!(parser.parse_not(), ":-", "~"); - assert_expected_token!(parser.parse_arrow(), "", ":-"); - assert_expected_token!(parser.parse_arrow(), "-:", ":-"); - assert_expected_token!(parser.parse_open_parenthesis(), "", "("); - assert_expected_token!(parser.parse_open_parenthesis(), "-:", "("); - assert_expected_token!(parser.parse_close_parenthesis(), "", ")"); - assert_expected_token!(parser.parse_close_parenthesis(), "-:", ")"); - - assert_parse_error!( - parser.parse_base(), - "@base . @base .", - ParseError::LateBaseDeclaration - ); - - assert_parse_error!( - parser.parse_program(), - "@prefix f: . @base .", - ParseError::LateBaseDeclaration - ); - - assert_parse_error!( - parser.parse_program(), - "@output p . @base .", - ParseError::LateBaseDeclaration - ); - - assert_parse_error!( - parser.parse_program(), - "@output p . @prefix g: .", - ParseError::LatePrefixDeclaration - ); - } - #[test] - #[cfg_attr(miri, ignore)] - fn parse_function_terms() { - let parser = RuleParser::new(); - - let twenty_three = Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(23), - )); - let fourty_two = Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(42), - )); - let twenty_three_times_fourty_two = Term::Binary { - operation: BinaryOperation::NumericMultiplication, - lhs: Box::new(twenty_three.clone()), - rhs: Box::new(fourty_two.clone()), - }; - - assert_parse_error!( - parser.parse_function_term(), - "", - ParseError::ExpectedFunctionTerm - ); - - let nullary_function = Term::Function(Identifier(String::from("nullary_function")), vec![]); - assert_parse!( - parser.parse_function_term(), - "nullary_function()", - nullary_function - ); - assert_parse!( - parser.parse_function_term(), - "nullary_function( )", - nullary_function - ); - assert_parse_error!( - parser.parse_function_term(), - "nullary_function( () )", - ParseError::ExpectedFunctionTerm - ); - - let unary_function = Term::Function( - Identifier(String::from("unary_function")), - vec![fourty_two.clone()], - ); - assert_parse!( - parser.parse_function_term(), - "unary_function(42)", - unary_function - ); - assert_parse!( - parser.parse_function_term(), - "unary_function((42))", - unary_function - ); - assert_parse!( - parser.parse_function_term(), - "unary_function(( (42 )))", - unary_function - ); - - let binary_function = Term::Function( - Identifier(String::from("binary_function")), - vec![fourty_two.clone(), twenty_three.clone()], - ); - assert_parse!( - parser.parse_function_term(), - "binary_function(42, 23)", - binary_function - ); - - let function_with_nested_algebraic_expression = Term::Function( - Identifier(String::from("function")), - vec![twenty_three_times_fourty_two], - ); - assert_parse!( - parser.parse_function_term(), - "function( 23 *42)", - function_with_nested_algebraic_expression - ); - - let nested_function = Term::Function( - Identifier(String::from("nested_function")), - vec![nullary_function.clone()], - ); - - assert_parse!( - parser.parse_function_term(), - "nested_function(nullary_function())", - nested_function - ); - - let triple_nested_function = Term::Function( - Identifier(String::from("nested_function")), - vec![Term::Function( - Identifier(String::from("nested_function")), - vec![Term::Function( - Identifier(String::from("nested_function")), - vec![nullary_function.clone()], - )], - )], - ); - assert_parse!( - parser.parse_function_term(), - "nested_function( nested_function( (nested_function(nullary_function()) ) ))", - triple_nested_function - ); - } - - #[test] - fn parse_terms() { - let parser = RuleParser::new(); - - assert_parse_error!(parser.parse_term(), "", ParseError::ExpectedTerm); - - assert_parse!( - parser.parse_term(), - "constant", - Term::Primitive(PrimitiveTerm::GroundTerm(AnyDataValue::new_iri( - String::from("constant") - ))) - ); - } - - #[test] - fn parse_aggregates() { - let parser = RuleParser::new(); - - assert_parse_error!(parser.parse_aggregate(), "", ParseError::ExpectedAggregate); - - assert_parse!( - parser.parse_aggregate(), - "#min(?VARIABLE)", - Term::Aggregation(Aggregate { - logical_aggregate_operation: LogicalAggregateOperation::MinNumber, - terms: vec![Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("VARIABLE")) - ))] - }) - ); - - assert_parse_error!( - parser.parse_aggregate(), - "#test(?VAR1, ?VAR2)", - ParseError::ExpectedAggregate - ) - } - - #[test] - fn parse_unary_function() { - let parser = RuleParser::new(); - - let expression = "ABS(4)"; - let expected_term = Term::Unary( - UnaryOperation::NumericAbsolute, - Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(4), - ))), - ); - - assert_parse!(parser.parse_arithmetic_factor(), expression, expected_term); - } - - #[test] - fn parse_arithmetic_and_functions() { - let parser = RuleParser::new(); - - let expression = "5 * ABS(SQRT(4) - 3)"; - - let expected_term = Term::Binary { - operation: BinaryOperation::NumericMultiplication, - lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(5), - ))), - rhs: Box::new(Term::Unary( - UnaryOperation::NumericAbsolute, - Box::new(Term::Binary { - operation: BinaryOperation::NumericSubtraction, - lhs: Box::new(Term::Unary( - UnaryOperation::NumericSquareroot, - Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(4), - ))), - )), - rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(3), - ))), - }), - )), - }; - - assert_parse!(parser.parse_term(), expression, expected_term); - } - - #[test] - fn parse_assignment() { - let parser = RuleParser::new(); - - let expression = "?X = ABS(?Y - 5) * (7 + ?Z)"; - - let variable = Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( - "X".to_string(), - ))); - - let term = Term::Binary { - operation: BinaryOperation::NumericMultiplication, - lhs: Box::new(Term::Unary( - UnaryOperation::NumericAbsolute, - Box::new(Term::Binary { - operation: BinaryOperation::NumericSubtraction, - lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal("Y".to_string()), - ))), - rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(5), - ))), - }), - )), - rhs: Box::new(Term::Binary { - operation: BinaryOperation::NumericAddition, - lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(7), - ))), - rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal("Z".to_string()), - ))), - }), - }; - - let expected = Constraint::Equals(variable, term); - - assert_parse!(parser.parse_constraint(), expression, expected); - } - - #[test] - fn parse_complex_condition() { - let parser = RuleParser::new(); - - let expression = "ABS(?X - ?Y) <= ?Z + SQRT(?Y)"; - - let left_term = Term::Unary( - UnaryOperation::NumericAbsolute, - Box::new(Term::Binary { - operation: BinaryOperation::NumericSubtraction, - lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("X")), - ))), - rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("Y")), - ))), - }), - ); - - let right_term = Term::Binary { - operation: BinaryOperation::NumericAddition, - lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("Z")), - ))), - rhs: Box::new(Term::Unary( - UnaryOperation::NumericSquareroot, - Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("Y")), - ))), - )), - }; - - let expected = Constraint::LessThanEq(left_term, right_term); - - assert_parse!(parser.parse_constraint(), expression, expected); - } - - #[test] - fn map_literal() { - let parser = RuleParser::new(); - assert_parse!( - parser.parse_map_literal(), - r#"{}"#, - MapDataValue::from_iter([]), - ); - - let ident = "foo"; - let key = AnyDataValue::new_iri(ident.to_string()); - - let entry = format!("{ident}=23"); - assert_parse!( - parser.parse_map_entry(), - &entry, - (key.clone(), AnyDataValue::new_integer_from_i64(23)) - ); - - let pairs = vec![ - ( - AnyDataValue::new_plain_string("23".to_string()), - AnyDataValue::new_integer_from_i64(42), - ), - ( - AnyDataValue::new_iri("foo".to_string()), - AnyDataValue::new_integer_from_i64(23), - ), - ]; - - assert_parse!( - parser.parse_map_literal(), - r#"{foo = 23, "23" = 42}"#, - pairs.clone().into_iter().collect::() - ); - } - - #[test] - fn nested_map_literal() { - let parser = RuleParser::new(); - - let pairs = vec![( - AnyDataValue::new_iri("inner".to_string()), - MapDataValue::from_iter([]).into(), - )]; - - assert_parse!( - parser.parse_map_literal(), - r#"{inner = {}}"#, - pairs.clone().into_iter().collect::() - ); - } - - #[test] - fn tuple_literal() { - let parser = RuleParser::new(); - - let expected: TupleDataValue = [ - AnyDataValue::new_iri("something".to_string()), - AnyDataValue::new_integer_from_i64(42), - TupleDataValue::from_iter([]).into(), - ] - .into_iter() - .collect(); - - assert_parse!( - parser.parse_tuple_literal(), - r#"(something, 42, ())"#, - expected - ); - } - - #[test] - fn import_export() { - let parser = RuleParser::new(); - - let name = "p".to_string(); - let predicate = Identifier(name.clone()); - let qualified = format!("{name} "); - let arguments = r#"{delimiter = ";", resource = }"#; - let spec = format!("{qualified} :- dsv{arguments}"); - let directive = format!("@import {spec} ."); - let directive_export = format!("@export {spec} ."); - let attributes = parser.parse_map_literal()(arguments.into()).unwrap().1; - - assert_parse!( - parser.parse_import_export_spec(), - &spec, - ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::DSV, - attributes: attributes.clone(), - } - ); - - assert_parse!( - parser.parse_import(), - &directive, - ImportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::DSV, - attributes: attributes.clone() - }) - ); - - assert_parse!( - parser.parse_export(), - &directive_export, - ExportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::DSV, - attributes: attributes.clone() - }) - ); - } -} - -/// NEW PARSER -pub mod new { - use std::borrow::BorrowMut; - use std::cell::RefCell; - - use super::ast::named_tuple::NamedTuple; - use super::ast::{ - atom::*, directive::*, map::*, program::*, statement::*, term::*, tuple::*, List, Position, - Wsoc, - }; - use super::types::{Input, ToRange}; - use crate::io::lexer::{ - arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, - exp, greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, - lex_ident, lex_iri, lex_number, lex_operators, lex_prefixed_ident, lex_string, - lex_toplevel_doc_comment, lex_whitespace, minus, open_brace, open_paren, plus, - question_mark, skip_to_statement_end, slash, star, tilde, underscore, unequal, Context, - Error, ErrorTree, ParserState, Span, Token, TokenKind, - }; - use crate::io::parser::ast::AstNode; - use nom::character::complete::multispace0; - use nom::combinator::{all_consuming, cut, map, opt, recognize}; - use nom::error::{ErrorKind, ParseError}; - use nom::sequence::{delimited, pair}; - use nom::Parser; - use nom::{ - branch::alt, - combinator::verify, - multi::{many0, many1}, - sequence::tuple, - IResult, - }; - use nom_supreme::{context::ContextError, error::StackContext}; - use sanitise_file_name::Stringy; - - fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { - unsafe { - // dbg!(&input, &span, &rest_input); - Span::new_from_raw_offset( - input.location_offset(), - input.location_line(), - &input[..(rest_input.location_offset() - input.location_offset())], - (), - ) - } - } - - fn expect<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( - mut parser: F, - error_msg: impl ToString, - error_output: O, - errors: ParserState<'s>, - ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - move |input| match parser.parse(input) { - Ok(result) => Ok(result), - Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { - let err = Error { - pos: Position { - offset: input.input.location_offset(), - line: input.input.location_line(), - column: input.input.get_utf8_column() as u32, - }, - msg: error_msg.to_string(), - context: vec![], - }; - errors.report_error(err); - Ok((input, error_output)) - } - Err(err) => Err(err), - } - } - - fn recover<'a, 's, E>( - mut parser: impl Parser, Statement<'a>, E>, - error_msg: impl ToString, - context: Context, - errors: ParserState<'s>, - ) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { - move |input: Input<'a, 's>| match parser.parse(input) { - Ok(result) => Ok(result), - Err(err) if input.input.is_empty() => Err(err), - Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => { - let err = Error { - pos: Position { - offset: input.input.location_offset(), - line: input.input.location_line(), - column: input.input.get_utf8_column() as u32, - }, - msg: error_msg.to_string(), - context: vec![context], - }; - // errors.report_error(err); - let (rest_input, span) = skip_to_statement_end::>>(input); - Ok((rest_input, Statement::Error(span))) - } - Err(err) => Err(err), - } - } - - fn report_error<'a, 's, O>( - mut parser: impl Parser, O, ErrorTree>>, - ) -> impl FnMut(Input<'a, 's>) -> IResult, O, ErrorTree>> { - move |input| match parser.parse(input) { - Ok(result) => Ok(result), - Err(e) => { - if input.input.is_empty() { - return Err(e); - }; - match &e { - nom::Err::Incomplete(_) => (), - nom::Err::Error(err) | nom::Err::Failure(err) => { - let (_deepest_pos, errors) = get_deepest_errors(err); - for error in errors { - input.parser_state.report_error(error); - } - // let error = Error(deepest_pos, format!("")); - // // input.parser_state.report_error(error) - } - }; - Err(e) - } - } - } - - fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { - match e { - ErrorTree::Base { location, kind } => { - let span = location.input; - let err_pos = Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - }; - ( - err_pos, - vec![Error { - pos: err_pos, - msg: "".to_string(), - context: Vec::new(), - }], - ) - } - ErrorTree::Stack { base, contexts } => { - // let mut err_pos = Position::default(); - match &**base { - ErrorTree::Base { location, kind } => { - let span = location.input; - let err_pos = Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - }; - let mut msg = String::from(""); - for (_, context) in contexts { - match context { - StackContext::Kind(_) => todo!(), - StackContext::Context(c) => match c { - Context::Tag(t) => { - msg.push_str(t); - } - _ => (), - }, - } - } - ( - err_pos, - vec![Error { - pos: err_pos, - msg, - context: context_strs(contexts), - }], - ) - } - ErrorTree::Stack { base, contexts } => { - let (pos, mut deepest_errors) = get_deepest_errors(base); - let contexts = context_strs(contexts); - for mut error in &mut deepest_errors { - error.context.append(&mut contexts.clone()); - } - (pos, deepest_errors) - } - ErrorTree::Alt(error_tree) => { - let (pos, mut deepest_errors) = get_deepest_errors(base); - let contexts = context_strs(contexts); - for mut error in &mut deepest_errors { - error.context.append(&mut contexts.clone()); - } - (pos, deepest_errors) - } - } - } - ErrorTree::Alt(vec) => { - let mut return_vec: Vec = Vec::new(); - let mut deepest_pos = Position::default(); - for error in vec { - let (pos, mut deepest_errors) = get_deepest_errors(error); - if pos > deepest_pos { - deepest_pos = pos; - return_vec.clear(); - return_vec.append(&mut deepest_errors); - } else if pos == deepest_pos { - return_vec.append(&mut deepest_errors); - } - } - (deepest_pos, return_vec) - } - } - } - - fn context_strs(contexts: &Vec<(Input<'_, '_>, StackContext)>) -> Vec { - contexts - .iter() - .map(|(_, c)| match c { - StackContext::Kind(k) => todo!(), - StackContext::Context(c) => *c, - }) - .collect() - } - - pub(crate) fn context<'a, 's, P, E, F, O>( - context: P, - mut f: F, - ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> - where - P: Clone, - F: Parser, O, E>, - E: ContextError, P>, - { - move |i| match f.parse(i.clone()) { - Ok(o) => Ok(o), - Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), - Err(nom::Err::Error(e)) => Err(nom::Err::Error(E::add_context(i, context.clone(), e))), - Err(nom::Err::Failure(e)) => { - Err(nom::Err::Failure(E::add_context(i, context.clone(), e))) - } - } - } - - fn wsoc0<'a, 's, E>(input: Input<'a, 's>) -> IResult, Option>, E> - where - E: ParseError> + ContextError, Context>, - { - many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { - if vec.is_empty() { - (rest_input, None) - } else { - ( - rest_input, - Some(Wsoc { - span: outer_span(input.input, rest_input.input), - token: vec, - }), - ) - } - }) - } - - fn wsoc1<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Wsoc<'a>, E> { - many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { - ( - rest_input, - Wsoc { - span: outer_span(input.input, rest_input.input), - token: vec, - }, - ) - }) - } - - /// Parse a full program consisting of directives, facts, rules and comments. - fn parse_program< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> (Program<'a>, Vec) { - let result = context( - Context::Program, - pair( - opt(lex_toplevel_doc_comment::>>), - delimited( - multispace0, - many0(recover( - report_error(delimited( - multispace0, - alt(( - // TODO: Discuss wether directives should only get parsed at the beginning of the source file - parse_rule, - parse_fact, - parse_directive, - parse_comment, - )), - multispace0, - )), - "failed to parse statement", - Context::Program, - input.parser_state, - )), - multispace0, - ), - ), - )(input); - match result { - Ok((rest_input, (tl_doc_comment, statements))) => { - if !rest_input.input.is_empty() { - panic!("Parser did not consume all input. This is considered a bug. Please report it. Unparsed input is: {:?}", rest_input); - }; - ( - Program { - span: input.input, - tl_doc_comment, - statements, - }, - rest_input.parser_state.errors.take(), - ) - } - Err(e) => panic!( - "Parser can't fail. If it fails it's a bug! Please report it. Got: {:?}", - e - ), - } - } - - pub fn parse_program_str(input: &str) -> (Program<'_>, Vec) { - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input: Span::new(input), - parser_state, - }; - parse_program::>>(input) - } - - /// Parse normal comments that start with a `%` and ends at the line ending. - fn parse_comment< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Statement<'a>, E> { - lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) - } - - /// Parse a fact of the form `predicateName(term1, term2, …).` - fn parse_fact<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Statement<'a>, E> { - // dbg!(&input.parser_state.labels); - context( - Context::Fact, - tuple((opt(lex_doc_comment), parse_fact_atom, wsoc0, dot)), - )(input) - .map(|(rest_input, (doc_comment, atom, _ws, dot))| { - ( - rest_input, - Statement::Fact { - span: outer_span(input.input, rest_input.input), - doc_comment, - fact: atom, - dot, - }, - ) - }) - } - - fn parse_fact_atom< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Fact<'a>, E> { - // TODO: Add Context - match parse_named_tuple::(input) { - Ok((rest_input, named_tuple)) => Ok((rest_input, Fact::NamedTuple(named_tuple))), - Err(_) => match parse_map::(input) { - Ok((rest_input, map)) => Ok((rest_input, Fact::Map(map))), - Err(err) => Err(err), - }, - } - } - - /// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` - fn parse_rule<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Statement<'a>, E> { - context( - Context::Rule, - tuple(( - opt(lex_doc_comment), - parse_head, - wsoc0, - arrow, - wsoc0, - parse_body, - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, head, _ws1, arrow, _ws2, body, _ws3, dot))| { - ( - rest_input, - Statement::Rule { - span: outer_span(input.input, rest_input.input), - doc_comment, - head, - arrow, - body, - dot, - }, - ) - }, - ) - } - - /// Parse the head atoms of a rule. - fn parse_head<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, List<'a, Atom<'a>>, E> { - context(Context::RuleHead, parse_list(parse_atoms))(input) - } - - /// Parse the body atoms of a rule. - fn parse_body<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, List<'a, Atom<'a>>, E> { - context(Context::RuleBody, parse_list(parse_atoms))(input) - } - - /// Parse the directives (@base, @prefix, @import, @export, @output). - fn parse_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Statement<'a>, E> { - context( - Context::Directive, - alt(( - parse_base_directive, - parse_prefix_directive, - parse_import_directive, - parse_export_directive, - parse_output_directive, - )), - )(input) - .map(|(rest, directive)| (rest, Statement::Directive(directive))) - } - - /// Parse the base directive. - fn parse_base_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Directive<'a>, E> { - context( - Context::DirectiveBase, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| *token.fragment() == "base"), - )), - wsoc0, - lex_iri, - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, _kw, _ws1, base_iri, _ws2, dot))| { - ( - rest_input, - Directive::Base { - span: outer_span(input.input, rest_input.input), - doc_comment, - base_iri, - dot, - }, - ) - }, - ) - } - - /// Parse the prefix directive. - fn parse_prefix_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Directive<'a>, E> { - context( - Context::DirectivePrefix, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| *token.fragment() == "prefix"), - )), - wsoc0, - recognize(pair(opt(lex_ident), colon)), - wsoc0, - lex_iri, - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, _kw, _ws1, prefix, _ws2, prefix_iri, _ws3, dot))| { - ( - rest_input, - Directive::Prefix { - span: outer_span(input.input, rest_input.input), - doc_comment, - prefix: prefix.input, - prefix_iri, - dot, - }, - ) - }, - ) - } - - /// Parse the import directive. - fn parse_import_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Directive<'a>, E> { - context( - Context::DirectiveImport, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| *token.fragment() == "import"), - )), - wsoc1, - lex_ident, - wsoc0, - arrow, - wsoc0, - parse_map, - wsoc0, - dot, - )), - )(input) - .map( - |( - rest_input, - (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot), - )| { - ( - rest_input, - Directive::Import { - span: outer_span(input.input, rest_input.input), - doc_comment, - predicate, - arrow, - map, - dot, - }, - ) - }, - ) - } - - /// Parse the export directive. - fn parse_export_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Directive<'a>, E> { - context( - Context::DirectiveExport, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| *token.fragment() == "export"), - )), - wsoc1, - lex_ident, - wsoc0, - arrow, - wsoc0, - parse_map, - wsoc0, - dot, - )), - )(input) - .map( - |( - rest_input, - (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot), - )| { - ( - rest_input, - Directive::Export { - span: outer_span(input.input, rest_input.input), - doc_comment, - predicate, - arrow, - map, - dot, - }, - ) - }, - ) - } - - /// Parse the output directive. - fn parse_output_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Directive<'a>, E> { - context( - Context::DirectiveOutput, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_ident, |token| *token.fragment() == "output"), - )), - wsoc1, - opt(parse_list(lex_ident)), - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, _kw, _ws1, predicates, _ws2, dot))| { - ( - rest_input, - Directive::Output { - span: outer_span(input.input, rest_input.input), - doc_comment, - predicates, - dot, - }, - ) - }, - ) - } - - // /// Parse a list of `ident1, ident2, …` - // fn parse_identifier_list<'a, 's, E: ParseError> + ContextError, Context>>( - // input: Input<'a, 's>, - // ) -> IResult, List<'a, Token<'a>>, E> { - // pair( - // lex_ident, - // many0(tuple(( - // opt(lex_whitespace), - // comma, - // opt(lex_whitespace), - // lex_ident, - // ))), - // )(input) - // .map(|(rest_input, (first, rest))| { - // ( - // rest_input, - // List { - // span: outer_span(input.input, rest_input.input), - // first, - // rest: if rest.is_empty() { None } else { Some(rest) }, - // }, - // ) - // }) - // } - - fn parse_list< - 'a, - 's, - T, - E: ParseError> + ContextError, Context>, - >( - parse_t: fn(Input<'a, 's>) -> IResult, T, E>, - ) -> impl Fn(Input<'a, 's>) -> IResult, List<'a, T>, E> { - move |input: Input<'a, 's>| { - context( - Context::List, - tuple(( - parse_t, - many0(tuple((wsoc0, comma, wsoc0, parse_t))), - pair(wsoc0, opt(comma)), - )), - )(input) - .map(|(rest_input, (first, rest, (_, trailing_comma)))| { - ( - rest_input, - List { - span: outer_span(input.input, rest_input.input), - first, - rest: if rest.is_empty() { - None - } else { - Some( - rest.into_iter() - .map(|(_ws1, comma, _ws2, t)| (comma, t)) - .collect(), - ) - }, - trailing_comma, - }, - ) - }) - } - } - - /// Parse the different atom variants. - fn parse_atoms<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Atom<'a>, E> { - context( - Context::BodyAtoms, - alt(( - parse_normal_atom, - parse_negative_atom, - parse_infix_atom, - parse_map_atom, - )), - )(input) - } - - /// Parse an atom of the form `predicateName(term1, term2, …)`. - fn parse_normal_atom< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Atom<'a>, E> { - context(Context::PositiveAtom, parse_named_tuple)(input) - .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) - } - - /// Parse an atom of the form `~predicateName(term1, term2, …)`. - fn parse_negative_atom< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Atom<'a>, E> { - context(Context::NegativeAtom, pair(tilde, parse_named_tuple))(input).map( - |(rest_input, (tilde, named_tuple))| { - ( - rest_input, - Atom::Negative { - span: outer_span(input.input, rest_input.input), - neg: tilde, - atom: named_tuple, - }, - ) - }, - ) - } - - /// Parse an "infix atom" of the form `term1 term2`. - /// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. - fn parse_infix_atom< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Atom<'a>, E> { - context( - Context::InfixAtom, - tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), - )(input) - .map(|(rest_input, (lhs, _ws1, operation, _ws2, rhs))| { - ( - rest_input, - Atom::InfixAtom { - span: outer_span(input.input, rest_input.input), - lhs, - operation, - rhs, - }, - ) - }) - } - - /// Parse a tuple like `(int, int, skip)`. A 1-tuple is denoted `(,)` (with a trailing comma) to distinquish it from parenthesised expressions. - fn parse_tuple<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Tuple<'a>, E> { - context( - Context::Tuple, - tuple(( - open_paren, - wsoc0, - opt(parse_list(parse_term)), - wsoc0, - close_paren, - )), - )(input) - .map( - |(rest_input, (open_paren, _ws1, terms, _ws2, close_paren))| { - ( - rest_input, - Tuple { - span: outer_span(input.input, rest_input.input), - open_paren, - terms, - close_paren, - }, - ) - }, - ) - } - - /// Parse a named tuple. This function is like `parse_tuple` with the difference, - /// that is enforces the existence of an identifier for the tuple. - fn parse_named_tuple< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, NamedTuple<'a>, E> { - context( - Context::NamedTuple, - tuple((alt((lex_prefixed_ident, lex_ident)), wsoc0, parse_tuple)), - )(input) - .map(|(rest_input, (identifier, _ws, tuple))| { - ( - rest_input, - NamedTuple { - span: outer_span(input.input, rest_input.input), - identifier, - tuple, - }, - ) - }) - } +/// Parse a term. A term can be a primitive value (constant, number, string, …), +/// a variable (universal or existential), a map, a function (-symbol), an arithmetic +/// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. +fn parse_term<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context( + Context::Term, + alt(( + parse_binary_term, + parse_tuple_term, + // parse_unary_prefix_term, + parse_map_term, + parse_primitive_term, + parse_variable, + parse_existential, + parse_aggregation_term, + parse_blank, + )), + )(input) +} - /// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. - /// Inside the curly braces ist a list of pairs. - fn parse_map<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Map<'a>, E> { - context( - Context::Map, - tuple(( - opt(lex_ident), - wsoc0, - open_brace, - wsoc0, - opt(parse_list(parse_pair)), - wsoc0, - close_brace, - )), - )(input) - .map( - |(rest_input, (identifier, _ws1, open_brace, _ws2, pairs, _ws3, close_brace))| { - ( - rest_input, - Map { - span: outer_span(input.input, rest_input.input), - identifier, - open_brace, - pairs, - close_brace, - }, - ) +/// Parse a primitive term (simple constant, iri constant, number, string). +fn parse_primitive_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context( + Context::TermPrivimitive, + alt(( + parse_rdf_literal, + parse_prefixed_ident, + parse_ident, + parse_iri, + parse_number, + parse_string, + )), + )(input) + .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) +} + +/// Parse a rdf literal e.g. "2023-06-19"^^ +fn parse_rdf_literal< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Primitive<'a>, E> { + context( + Context::RdfLiteral, + tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), + )(input) + .map(|(rest_input, (string, carets, iri))| { + ( + rest_input, + Primitive::RdfLiteral { + span: outer_span(input.input, rest_input.input), + string, + carets: carets.input, + iri, }, ) - } + }) +} - /// Parse a map in an atom position. - fn parse_map_atom< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Atom<'a>, E> { - parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) - } +fn parse_prefixed_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Primitive<'a>, E> +where + E: ParseError> + ContextError, Context>, +{ + context( + Context::PrefixedConstant, + tuple((opt(lex_tag), colon, lex_tag)), + )(input) + .map(|(rest_input, (prefix, colon, constant))| { + ( + rest_input, + Primitive::PrefixedConstant { + span: outer_span(input.input, rest_input.input), + prefix, + colon, + constant, + }, + ) + }) +} - /// Parse a pair of the form `key = value`. - fn parse_pair<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Pair<'a>, E> { - context( - Context::Pair, - tuple((parse_term, wsoc0, equal, wsoc0, parse_term)), - )(input) - .map(|(rest_input, (key, _ws1, equal, _ws2, value))| { - ( - rest_input, - Pair { - span: outer_span(input.input, rest_input.input), - key, - equal, - value, - }, - ) - }) - } +fn parse_ident<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Primitive<'a>, E> { + lex_tag(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) +} - /// Parse a term. A term can be a primitive value (constant, number, string, …), - /// a variable (universal or existential), a map, a function (-symbol), an arithmetic - /// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. - fn parse_term<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context( - Context::Term, - alt(( - parse_binary_term, - parse_tuple_term, - // parse_unary_prefix_term, - parse_map_term, - parse_primitive_term, - parse_variable, - parse_existential, - parse_aggregation_term, - parse_blank, - )), - )(input) - } +fn parse_iri<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Primitive<'a>, E> { + lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) +} - /// Parse a primitive term (simple constant, iri constant, number, string). - fn parse_primitive_term< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context( - Context::TermPrivimitive, - alt(( - parse_rdf_literal, - parse_prefixed_ident, - parse_ident, - parse_iri, - parse_number, - parse_string, - )), - )(input) - .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) - } +fn parse_number<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Primitive<'a>, E> { + context(Context::Number, alt((parse_decimal, parse_integer)))(input) +} - /// Parse a rdf literal e.g. "2023-06-19"^^ - fn parse_rdf_literal< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Primitive<'a>, E> { - context( - Context::RdfLiteral, - tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), - )(input) - .map(|(rest_input, (string, carets, iri))| { - ( - rest_input, - Primitive::RdfLiteral { - span: outer_span(input.input, rest_input.input), - string, - carets: carets.input, - iri, - }, - ) - }) - } +fn parse_decimal<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Primitive<'a>, E> { + context( + Context::Decimal, + tuple(( + opt(alt((plus, minus))), + opt(lex_number), + dot, + lex_number, + opt(parse_exponent), + )), + )(input) + .map(|(rest_input, (sign, before, dot, after, exponent))| { + ( + rest_input, + Primitive::Number { + span: outer_span(input.input, rest_input.input), + sign, + before, + dot: Some(dot), + after, + exponent, + }, + ) + }) +} - fn parse_prefixed_ident<'a, 's, E>( - input: Input<'a, 's>, - ) -> IResult, Primitive<'a>, E> - where - E: ParseError> + ContextError, Context>, - { - context( - Context::PrefixedConstant, - tuple((opt(lex_ident), colon, lex_ident)), - )(input) - .map(|(rest_input, (prefix, colon, constant))| { +fn parse_integer<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Primitive<'a>, E> { + context(Context::Integer, pair(opt(alt((plus, minus))), lex_number))(input).map( + |(rest_input, (sign, number))| { ( rest_input, - Primitive::PrefixedConstant { + Primitive::Number { span: outer_span(input.input, rest_input.input), - prefix, - colon, - constant, + sign, + before: None, + dot: None, + after: number, + exponent: None, }, ) - }) - } - - fn parse_ident<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Primitive<'a>, E> { - lex_ident(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) - } + }, + ) +} - fn parse_iri<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Primitive<'a>, E> { - lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) - } +fn parse_exponent<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Exponent<'a>, E> { + context( + Context::Exponent, + tuple((exp, opt(alt((plus, minus))), lex_number)), + )(input) + .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) +} - fn parse_number<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Primitive<'a>, E> { - context(Context::Number, alt((parse_decimal, parse_integer)))(input) - } +fn parse_string<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Primitive<'a>, E> { + lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) +} - fn parse_decimal< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Primitive<'a>, E> { - context( - Context::Decimal, - tuple(( - opt(alt((plus, minus))), - opt(lex_number), - dot, - lex_number, - opt(parse_exponent), - )), - )(input) - .map(|(rest_input, (sign, before, dot, after, exponent))| { - ( - rest_input, - Primitive::Number { +// /// Parse an unary term. +// fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError, Context>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { +// pair(lex_unary_prefix_operators, parse_term)(input).map( +// |(rest_input, (operation, term))| { +// ( +// rest_input, +// Term::UnaryPrefix { +// span: outer_span(input.input, rest_input.input), +// operation, +// term: Box::new(term), +// }, +// ) +// }, +// ) +// } + +/// Parse a binary infix operation of the form `term1 term2`. +fn parse_binary_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context( + Context::TermBinary, + pair( + parse_arithmetic_product, + opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), + ), + )(input) + .map(|(rest_input, (lhs, opt))| { + ( + rest_input, + if let Some((_ws1, operation, _ws2, rhs)) = opt { + Term::Binary { span: outer_span(input.input, rest_input.input), - sign, - before, - dot: Some(dot), - after, - exponent, - }, - ) - }) - } - - fn parse_integer< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Primitive<'a>, E> { - context(Context::Integer, pair(opt(alt((plus, minus))), lex_number))(input).map( - |(rest_input, (sign, number))| { - ( - rest_input, - Primitive::Number { - span: outer_span(input.input, rest_input.input), - sign, - before: None, - dot: None, - after: number, - exponent: None, - }, - ) + lhs: Box::new(lhs), + operation, + rhs: Box::new(rhs), + } + } else { + lhs }, ) - } - - fn parse_exponent< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Exponent<'a>, E> { - context( - Context::Exponent, - tuple((exp, opt(alt((plus, minus))), lex_number)), - )(input) - .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) - } - - fn parse_string<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Primitive<'a>, E> { - lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) - } - - // /// Parse an unary term. - // fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError, Context>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { - // pair(lex_unary_prefix_operators, parse_term)(input).map( - // |(rest_input, (operation, term))| { - // ( - // rest_input, - // Term::UnaryPrefix { - // span: outer_span(input.input, rest_input.input), - // operation, - // term: Box::new(term), - // }, - // ) - // }, - // ) - // } + }) +} - /// Parse a binary infix operation of the form `term1 term2`. - fn parse_binary_term< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context( - Context::TermBinary, - pair( +/// Parse an arithmetic product, i.e. an expression involving +/// only `*` and `/` over subexpressions. +fn parse_arithmetic_product< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context( + Context::ArithmeticProduct, + pair( + parse_arithmetic_factor, + opt(tuple(( + wsoc0, + alt((star, slash)), + wsoc0, parse_arithmetic_product, - opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), - ), - )(input) - .map(|(rest_input, (lhs, opt))| { - ( - rest_input, - if let Some((_ws1, operation, _ws2, rhs)) = opt { - Term::Binary { - span: outer_span(input.input, rest_input.input), - lhs: Box::new(lhs), - operation, - rhs: Box::new(rhs), - } - } else { - lhs - }, - ) - }) - } + ))), + ), + )(input) + .map(|(rest_input, (lhs, opt))| { + ( + rest_input, + if let Some((_ws1, operation, _ws2, rhs)) = opt { + Term::Binary { + span: outer_span(input.input, rest_input.input), + lhs: Box::new(lhs), + operation, + rhs: Box::new(rhs), + } + } else { + lhs + }, + ) + }) +} - /// Parse an arithmetic product, i.e. an expression involving - /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context( - Context::ArithmeticProduct, - pair( - parse_arithmetic_factor, - opt(tuple(( - wsoc0, - alt((star, slash)), - wsoc0, - parse_arithmetic_product, - ))), - ), - )(input) - .map(|(rest_input, (lhs, opt))| { +fn parse_arithmetic_factor< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context( + Context::ArithmeticFactor, + alt(( + parse_tuple_term, + parse_aggregation_term, + parse_primitive_term, + parse_variable, + parse_existential, + )), + )(input) +} + +// fn fold_arithmetic_expression<'a>( +// initial: Term<'a>, +// sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, +// span_vec: Vec>, +// ) -> Term<'a> { +// sequence +// .into_iter() +// .enumerate() +// .fold(initial, |acc, (i, pair)| { +// let (ws1, operation, ws2, expression) = pair; +// Term::Binary { +// span: span_vec[i], +// lhs: Box::new(acc), +// ws1, +// operation, +// ws2, +// rhs: Box::new(expression), +// } +// }) +// } + +/// Parse an aggregation term of the form `#sum(…)`. +fn parse_aggregation_term< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context( + Context::TermAggregation, + tuple(( + recognize(pair(hash, lex_tag)), + open_paren, + wsoc0, + parse_list(parse_term), + wsoc0, + close_paren, + )), + )(input) + .map( + |(rest_input, (operation, open_paren, _ws1, terms, _ws2, close_paren))| { ( rest_input, - if let Some((_ws1, operation, _ws2, rhs)) = opt { - Term::Binary { - span: outer_span(input.input, rest_input.input), - lhs: Box::new(lhs), - operation, - rhs: Box::new(rhs), - } - } else { - lhs + Term::Aggregation { + span: outer_span(input.input, rest_input.input), + operation: operation.input, + open_paren, + terms: Box::new(terms), + close_paren, }, ) - }) - } + }, + ) +} - fn parse_arithmetic_factor< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context( - Context::ArithmeticFactor, - alt(( - parse_tuple_term, - parse_aggregation_term, - parse_primitive_term, - parse_variable, - parse_existential, - )), - )(input) - } +/// Parse a `_` +fn parse_blank<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context(Context::Blank, underscore)(input) + .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) +} - // fn fold_arithmetic_expression<'a>( - // initial: Term<'a>, - // sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, - // span_vec: Vec>, - // ) -> Term<'a> { - // sequence - // .into_iter() - // .enumerate() - // .fold(initial, |acc, (i, pair)| { - // let (ws1, operation, ws2, expression) = pair; - // Term::Binary { - // span: span_vec[i], - // lhs: Box::new(acc), - // ws1, - // operation, - // ws2, - // rhs: Box::new(expression), - // } - // }) - // } +/// Parse a tuple term, either with a name (function symbol) or as a term (-list) with +/// parenthesis. +fn parse_tuple_term<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context(Context::TermTuple, parse_tuple)(input) + .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) +} - /// Parse an aggregation term of the form `#sum(…)`. - fn parse_aggregation_term< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context( - Context::TermAggregation, - tuple(( - recognize(pair(hash, lex_ident)), - open_paren, - wsoc0, - parse_list(parse_term), - wsoc0, - close_paren, - )), - )(input) - .map( - |(rest_input, (operation, open_paren, _ws1, terms, _ws2, close_paren))| { - ( - rest_input, - Term::Aggregation { - span: outer_span(input.input, rest_input.input), - operation: operation.input, - open_paren, - terms: Box::new(terms), - close_paren, - }, - ) - }, - ) - } +/// Parse a map as a term. +fn parse_map_term<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context(Context::TermMap, parse_map)(input) + .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) +} - /// Parse a `_` - fn parse_blank<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context(Context::Blank, underscore)(input) - .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) - } +/// Parse a variable. +fn parse_variable<'a, 's, E: ParseError> + ContextError, Context>>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context( + Context::UniversalVariable, + recognize(pair(question_mark, lex_tag)), + )(input) + .map(|(rest_input, var)| (rest_input, Term::UniversalVariable(var.input))) +} - /// Parse a tuple term, either with a name (function symbol) or as a term (-list) with - /// parenthesis. - fn parse_tuple_term< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context(Context::TermTuple, parse_tuple)(input) - .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) - } +/// Parse an existential variable. +fn parse_existential< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Term<'a>, E> { + context( + Context::ExistentialVariable, + recognize(pair(exclamation_mark, lex_tag)), + )(input) + .map(|(rest_input, existential)| (rest_input, Term::ExistentialVariable(existential.input))) +} - /// Parse a map as a term. - fn parse_map_term< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context(Context::TermMap, parse_map)(input) - .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) - } +/// Parse the operator for an infix atom. +fn parse_operation_token< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( + input: Input<'a, 's>, +) -> IResult, Span<'a>, E> { + context( + Context::Operators, + // Order of parser compinator is important, because of ordered choice and no backtracking + alt((less_equal, greater_equal, equal, unequal, less, greater)), + )(input) +} - /// Parse a variable. - fn parse_variable< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context( - Context::UniversalVariable, - recognize(pair(question_mark, lex_ident)), - )(input) - .map(|(rest_input, var)| (rest_input, Term::UniversalVariable(var.input))) - } +#[cfg(test)] +mod tests { + use std::{ + cell::RefCell, + collections::{BTreeMap, HashSet}, + }; - /// Parse an existential variable. - fn parse_existential< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Term<'a>, E> { - context( - Context::ExistentialVariable, - recognize(pair(exclamation_mark, lex_ident)), - )(input) - .map(|(rest_input, existential)| (rest_input, Term::ExistentialVariable(existential.input))) - } + use super::*; + use crate::io::{ + lexer::*, + parser::ast::*, + // parser::ast::{ + // atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, + // }, + }; - // Order of parser compinator is important, because of ordered choice and no backtracking - /// Parse the operator for an infix atom. - fn parse_operation_token< - 'a, - 's, - E: ParseError> + ContextError, Context>, - >( - input: Input<'a, 's>, - ) -> IResult, Span<'a>, E> { - context( - Context::Operators, - alt((less_equal, greater_equal, equal, unequal, less, greater)), - )(input) + macro_rules! T { + ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } + }; } - - #[cfg(test)] - mod tests { - use std::{ - cell::RefCell, - collections::{BTreeMap, HashMap, HashSet}, + macro_rules! s { + ($offset:literal,$line:literal,$str:literal) => { + unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } }; + } - use nom::error::{convert_error, VerboseError}; + // use nom::error::{convert_error, VerboseError}; + // fn convert_located_span_error<'a, 's>( + // input: Span<'a>, + // err: VerboseError>, + // ) -> String { + // convert_error( + // *(input.fragment()), + // VerboseError { + // errors: err + // .errors + // .into_iter() + // .map(|(span, tag)| (*(span.input.fragment()), tag)) + // .collect(), + // }, + // ) + // } - use super::*; - use crate::io::{ - lexer::*, - parser::ast::*, - // parser::ast::{ - // atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, - // }, + #[test] + fn fact() { + // let input = Tokens { + // tok: &lex_tokens(Span::new("a(B,C).")).unwrap().1, + // }; + let input = Span::new("a(B,C)."); + let refcell = RefCell::new(Vec::new()); + let errors = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state: errors, }; + assert_eq!( + // parse_program::>(input).unwrap().1, + parse_program::>(input).0, + Program { + span: input.input, + tl_doc_comment: None, + statements: vec![Statement::Fact { + span: s!(0, 1, "a(B,C)."), + doc_comment: None, + fact: Fact::NamedTuple(NamedTuple { + span: s!(0, 1, "a(B,C)"), + identifier: s!(0, 1, "a"), + tuple: Tuple { + span: s!(1, 1, "(B,C)"), + open_paren: s!(1, 1, "("), + terms: Some(List { + span: s!(2, 1, "B,C"), + first: Term::Primitive(Primitive::Constant(s!(2, 1, "B"),)), + rest: Some(vec![( + s!(3, 1, ","), + Term::Primitive(Primitive::Constant(s!(4, 1, "C"),)), + )]), + trailing_comma: None, + }), + close_paren: s!(5, 1, ")"), + } + }), + dot: s!(6, 1, ".") + }], + } + ); + } - macro_rules! T { - ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { - unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } - }; - } - macro_rules! s { - ($offset:literal,$line:literal,$str:literal) => { - unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } - }; - } - - fn convert_located_span_error<'a, 's>( - input: Span<'a>, - err: VerboseError>, - ) -> String { - convert_error( - *(input.fragment()), - VerboseError { - errors: err - .errors - .into_iter() - .map(|(span, tag)| (*(span.input.fragment()), tag)) - .collect(), - }, - ) - } - - #[test] - fn fact() { - // let input = Tokens { - // tok: &lex_tokens(Span::new("a(B,C).")).unwrap().1, - // }; - let input = Span::new("a(B,C)."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // parse_program::>(input).unwrap().1, - parse_program::>(input).0, - Program { - span: input.input, - tl_doc_comment: None, - statements: vec![Statement::Fact { - span: s!(0, 1, "a(B,C)."), + #[test] + fn syntax() { + let input = Span::new( + r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, + ); + let refcell = RefCell::new(Vec::new()); + let errors = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state: errors, + }; + assert_eq!( + // parse_program::>(input).unwrap().1, + parse_program::>(input).0, + Program { + tl_doc_comment: None, + span: input.input, + statements: vec![ + Statement::Directive(Directive::Base { + span: s!(0, 1, "@base ."), + doc_comment: None, + base_iri: s!(6, 1, ""), + dot: s!(31, 1, "."), + }), + Statement::Directive(Directive::Prefix { + span: s!( + 32, + 1, + "@prefix rdfs:." + ), + doc_comment: None, + prefix: s!(40, 1, "rdfs:"), + prefix_iri: s!(45, 1, ""), + dot: s!(84, 1, ".") + }), + Statement::Directive(Directive::Import { + span: s!( + 85, + 1, + r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# + ), + doc_comment: None, + predicate: s!(93, 1, "sourceA"), + arrow: s!(100, 1, ":-"), + map: Map { + span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), + identifier: Some(s!(102, 1, "csv")), + open_brace: s!(105, 1, "{"), + pairs: Some(List { + span: s!(106, 1, "resource=\"sources/dataA.csv\""), + first: Pair { + span: s!(106, 1, "resource=\"sources/dataA.csv\""), + key: Term::Primitive(Primitive::Constant(s!( + 106, 1, "resource" + ),)), + equal: s!(114, 1, "="), + value: Term::Primitive(Primitive::String(s!( + 115, + 1, + "\"sources/dataA.csv\"" + ),)), + }, + rest: None, + trailing_comma: None, + }), + close_brace: s!(134, 1, "}"), + }, + dot: s!(135, 1, ".") + }), + Statement::Directive(Directive::Export { + span: s!(136, 1, "@export a:-csv{}."), + doc_comment: None, + predicate: s!(144, 1, "a"), + arrow: s!(145, 1, ":-"), + map: Map { + span: s!(147, 1, "csv{}"), + identifier: Some(s!(147, 1, "csv"),), + open_brace: s!(150, 1, "{"), + + pairs: None, + close_brace: s!(151, 1, "}"), + }, + dot: s!(152, 1, "."), + }), + Statement::Directive(Directive::Output { + span: s!(153, 1, "@output a, b, c."), + doc_comment: None, + predicates: Some(List { + span: s!(161, 1, "a, b, c"), + first: s!(161, 1, "a"), + rest: Some(vec![ + (s!(162, 1, ","), s!(164, 1, "b"),), + (s!(165, 1, ","), s!(167, 1, "c"),), + ]), + trailing_comma: None, + }), + dot: s!(168, 1, "."), + }), + ], + } + ); + } + + // #[test] + // fn ignore_ws_and_comments() { + // let input = Span::new(" Hi %cool comment\n"); + // assert_eq!( + // super::ignore_ws_and_comments(lex_ident::>)(input), + // Ok(( + // s!(22, 2, ""), + // Token { + // kind: TokenKind::Ident, + // span: s!(3, 1, "Hi") + // } + // )) + // ) + // } + + #[test] + fn fact_with_ws() { + let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); + let refcell = RefCell::new(Vec::new()); + let errors = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state: errors, + }; + assert_eq!( + // parse_program::>(input).unwrap().1, + parse_program::>(input).0, + Program { + span: input.input, + tl_doc_comment: None, + statements: vec![ + Statement::Fact { + span: s!(0, 1, "some(Fact, with, whitespace) ."), doc_comment: None, fact: Fact::NamedTuple(NamedTuple { - span: s!(0, 1, "a(B,C)"), - identifier: s!(0, 1, "a"), + span: s!(0, 1, "some(Fact, with, whitespace)"), + identifier: s!(0, 1, "some"), tuple: Tuple { - span: s!(1, 1, "(B,C)"), - open_paren: s!(1, 1, "("), + span: s!(4, 1, "(Fact, with, whitespace)"), + open_paren: s!(4, 1, "("), terms: Some(List { - span: s!(2, 1, "B,C"), - first: Term::Primitive(Primitive::Constant(s!(2, 1, "B"),)), - rest: Some(vec![( - s!(3, 1, ","), - Term::Primitive(Primitive::Constant(s!(4, 1, "C"),)), - )]), + span: s!(5, 1, "Fact, with, whitespace"), + first: Term::Primitive(Primitive::Constant(s!(5, 1, "Fact"),)), + rest: Some(vec![ + ( + s!(9, 1, ","), + Term::Primitive(Primitive::Constant(s!(11, 1, "with"))), + ), + ( + s!(15, 1, ","), + Term::Primitive(Primitive::Constant(s!( + 17, + 1, + "whitespace" + ))), + ), + ]), trailing_comma: None, }), - close_paren: s!(5, 1, ")"), + close_paren: s!(27, 1, ")"), } }), - dot: s!(6, 1, ".") - }], - } - ); - } - - #[test] - fn syntax() { - let input = Span::new( - r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, - ); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // parse_program::>(input).unwrap().1, - parse_program::>(input).0, - Program { - tl_doc_comment: None, - span: input.input, - statements: vec![ - Statement::Directive(Directive::Base { - span: s!(0, 1, "@base ."), - doc_comment: None, - base_iri: s!(6, 1, ""), - dot: s!(31, 1, "."), - }), - Statement::Directive(Directive::Prefix { - span: s!( - 32, - 1, - "@prefix rdfs:." - ), - doc_comment: None, - prefix: s!(40, 1, "rdfs:"), - prefix_iri: s!(45, 1, ""), - dot: s!(84, 1, ".") - }), - Statement::Directive(Directive::Import { - span: s!( - 85, - 1, - r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# - ), - doc_comment: None, - predicate: s!(93, 1, "sourceA"), - arrow: s!(100, 1, ":-"), - map: Map { - span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), - identifier: Some(s!(102, 1, "csv")), - open_brace: s!(105, 1, "{"), - pairs: Some(List { - span: s!(106, 1, "resource=\"sources/dataA.csv\""), - first: Pair { - span: s!(106, 1, "resource=\"sources/dataA.csv\""), - key: Term::Primitive(Primitive::Constant(s!( - 106, 1, "resource" - ),)), - equal: s!(114, 1, "="), - value: Term::Primitive(Primitive::String(s!( - 115, - 1, - "\"sources/dataA.csv\"" - ),)), - }, - rest: None, - trailing_comma: None, - }), - close_brace: s!(134, 1, "}"), - }, - dot: s!(135, 1, ".") - }), - Statement::Directive(Directive::Export { - span: s!(136, 1, "@export a:-csv{}."), - doc_comment: None, - predicate: s!(144, 1, "a"), - arrow: s!(145, 1, ":-"), - map: Map { - span: s!(147, 1, "csv{}"), - identifier: Some(s!(147, 1, "csv"),), - open_brace: s!(150, 1, "{"), - - pairs: None, - close_brace: s!(151, 1, "}"), - }, - dot: s!(152, 1, "."), - }), - Statement::Directive(Directive::Output { - span: s!(153, 1, "@output a, b, c."), - doc_comment: None, - predicates: Some(List { - span: s!(161, 1, "a, b, c"), - first: s!(161, 1, "a"), - rest: Some(vec![ - (s!(162, 1, ","), s!(164, 1, "b"),), - (s!(165, 1, ","), s!(167, 1, "c"),), - ]), - trailing_comma: None, - }), - dot: s!(168, 1, "."), - }), - ], - } - ); - } - - // #[test] - // fn ignore_ws_and_comments() { - // let input = Span::new(" Hi %cool comment\n"); - // assert_eq!( - // super::ignore_ws_and_comments(lex_ident::>)(input), - // Ok(( - // s!(22, 2, ""), - // Token { - // kind: TokenKind::Ident, - // span: s!(3, 1, "Hi") - // } - // )) - // ) - // } - - #[test] - fn fact_with_ws() { - let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // parse_program::>(input).unwrap().1, - parse_program::>(input).0, - Program { - span: input.input, - tl_doc_comment: None, - statements: vec![ - Statement::Fact { - span: s!(0, 1, "some(Fact, with, whitespace) ."), - doc_comment: None, - fact: Fact::NamedTuple(NamedTuple { - span: s!(0, 1, "some(Fact, with, whitespace)"), - identifier: s!(0, 1, "some"), - tuple: Tuple { - span: s!(4, 1, "(Fact, with, whitespace)"), - open_paren: s!(4, 1, "("), - terms: Some(List { - span: s!(5, 1, "Fact, with, whitespace"), - first: Term::Primitive(Primitive::Constant(s!( - 5, 1, "Fact" - ),)), - rest: Some(vec![ - ( - s!(9, 1, ","), - Term::Primitive(Primitive::Constant(s!( - 11, 1, "with" - ))), - ), - ( - s!(15, 1, ","), - Term::Primitive(Primitive::Constant(s!( - 17, - 1, - "whitespace" - ))), - ), - ]), - trailing_comma: None, - }), - close_paren: s!(27, 1, ")"), - } - }), - dot: s!(29, 1, "."), - }, - Statement::Comment(s!(31, 1, "% and a super useful comment\n")) - ], - } - ); - } + dot: s!(29, 1, "."), + }, + Statement::Comment(s!(31, 1, "% and a super useful comment\n")) + ], + } + ); + } - #[test] - fn display_program() { - let input = Span::new( - r#"% This example finds trees of (some species of lime/linden tree) in Dresden, + #[test] + fn display_program() { + let input = Span::new( + r#"% This example finds trees of (some species of lime/linden tree) in Dresden, % which are more than 200 years old. % % It shows how to load (typed) data from (compressed) CSV files, how to @@ -4057,152 +3997,257 @@ limeSpecies(?X, "Tilia") :- taxon(?X, "Tilia", ?P). limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, - ); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, + ); + let refcell = RefCell::new(Vec::new()); + let errors = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state: errors, + }; + // let ast = parse_program::>(input); + let (ast, _) = parse_program::>(input); + println!("{}", ast); + // With the removal of whitespace in the AST this does not work anymore. + // assert_eq!( + // { + // let mut string_from_tokens = String::new(); + // for token in get_all_tokens(&ast) { + // string_from_tokens.push_str(token.span().fragment()); + // } + // println!("String from Tokens:\n"); + // println!("{}\n", string_from_tokens); + // string_from_tokens + // }, + // *input.input.fragment(), + // ); + } + + #[test] + fn parser_test() { + let file = "../testfile2.rls"; + let str = std::fs::read_to_string(file).expect("testfile not found"); + let input = Span::new(str.as_str()); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_program::>(input); + let (ast, errors) = parse_program::>>(input); + // println!("{}\n\n{:#?}", ast, errors); + println!("{}\n\n", ast); + let mut error_map: BTreeMap> = BTreeMap::new(); + for error in errors { + if let Some(set) = error_map.get_mut(&error.pos) { + set.insert(error.msg); + } else { + let mut set = HashSet::new(); + set.insert(error.msg); + error_map.insert(error.pos, set); }; - // let ast = parse_program::>(input); - let (ast, _) = parse_program::>(input); - println!("{}", ast); - // With the removal of whitespace in the AST this does not work anymore. - // assert_eq!( - // { - // let mut string_from_tokens = String::new(); - // for token in get_all_tokens(&ast) { - // string_from_tokens.push_str(token.span().fragment()); - // } - // println!("String from Tokens:\n"); - // println!("{}\n", string_from_tokens); - // string_from_tokens - // }, - // *input.input.fragment(), - // ); } + // dbg!(&error_map); + println!("\n\n"); + // assert!(false); + let lines: Vec<_> = str.lines().collect(); + for (pos, str) in error_map { + // println!("{pos:?}, {str:?}"); + println!("error: {str:?}"); + println!("--> {}:{}:{}", file, pos.line, pos.column); + println!("{}", lines.get((pos.line - 1) as usize).unwrap()); + println!("{0:>1$}\n", "^", pos.column as usize) + } + } - #[test] - fn parser_test() { - let file = "../testfile2.rls"; - let str = std::fs::read_to_string(file).expect("testfile not found"); - let input = Span::new(str.as_str()); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_program::>(input); - let (ast, errors) = parse_program::>>(input); - // println!("{}\n\n{:#?}", ast, errors); - println!("{}\n\n", ast); - let mut error_map: BTreeMap> = BTreeMap::new(); - for error in errors { - if let Some(set) = error_map.get_mut(&error.pos) { - set.insert(error.msg); - } else { - let mut set = HashSet::new(); - set.insert(error.msg); - error_map.insert(error.pos, set); + #[test] + fn arithmetic_expressions() { + assert_eq!( + { + let input = Span::new("42"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + result.unwrap().1 + }, + Term::Primitive(Primitive::Number { + span: s!(0, 1, "42"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "42"}, + exponent: None, + }), + ); + + assert_eq!( + { + let input = Span::new("35+7"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "35+7"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "35"}, + exponent: None, + })), + operation: T! {Plus, 2, 1, "+"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "7"}, + exponent: None, + })), } - // dbg!(&error_map); - println!("\n\n"); - // assert!(false); - let lines: Vec<_> = str.lines().collect(); - for (pos, str) in error_map { - // println!("{pos:?}, {str:?}"); - println!("error: {str:?}"); - println!("--> {}:{}:{}", file, pos.line, pos.column); - println!("{}", lines.get((pos.line - 1) as usize).unwrap()); - println!("{0:>1$}\n", "^", pos.column as usize) + ); + + assert_eq!( + { + let input = Span::new("6*7"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "6*7"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "6"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"6"}, + exponent: None, + })), + operation: T! {Star, 1,1,"*"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(2, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 2,1,"7"}, + exponent: None, + })), } - } + ); - #[test] - fn arithmetic_expressions() { - use TokenKind::*; - - assert_eq!( - { - let input = Span::new("42"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Primitive(Primitive::Number { - span: s!(0, 1, "42"), + assert_eq!( + { + let input = Span::new("49-7"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "49-7"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "49"), sign: None, before: None, dot: None, - after: T! {Number, 0, 1, "42"}, + after: T! {Number, 0, 1, "49"}, exponent: None, - }), - ); - - assert_eq!( - { - let input = Span::new("35+7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "35+7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "35"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0, 1, "35"}, - exponent: None, - })), - operation: T! {Plus, 2, 1, "+"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(3, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 3, 1, "7"}, - exponent: None, - })), - } - ); - - assert_eq!( - { - let input = Span::new("6*7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "6*7"), + })), + operation: T! {Minus, 2, 1, "-"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "7"}, + exponent: None, + })), + } + ); + + assert_eq!( + { + let input = Span::new("84/2"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "84/2"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "84"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0, 1, "84"}, + exponent: None, + })), + operation: T! {Slash, 2, 1, "/"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(3, 1, "2"), + sign: None, + before: None, + dot: None, + after: T! {Number, 3, 1, "2"}, + exponent: None, + })), + } + ); + + assert_eq!( + { + let input = Span::new("5*7+7"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "5*7+7"), + lhs: Box::new(Term::Binary { + span: s!(0, 1, "5*7"), lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "6"), + span: s!(0, 1, "5"), sign: None, before: None, dot: None, - after: T! {Number, 0,1,"6"}, + after: T! {Number, 0,1,"5"}, exponent: None, })), operation: T! {Star, 1,1,"*"}, @@ -4214,647 +4259,535 @@ oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters after: T! {Number, 2,1,"7"}, exponent: None, })), - } - ); - - assert_eq!( - { - let input = Span::new("49-7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "49-7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "49"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0, 1, "49"}, - exponent: None, - })), - operation: T! {Minus, 2, 1, "-"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(3, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 3, 1, "7"}, - exponent: None, - })), - } - ); - - assert_eq!( - { - let input = Span::new("84/2"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "84/2"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "84"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0, 1, "84"}, - exponent: None, - })), - operation: T! {Slash, 2, 1, "/"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(3, 1, "2"), - sign: None, - before: None, - dot: None, - after: T! {Number, 3, 1, "2"}, - exponent: None, - })), - } - ); - - assert_eq!( - { - let input = Span::new("5*7+7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "5*7+7"), - lhs: Box::new(Term::Binary { - span: s!(0, 1, "5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "5"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0,1,"5"}, - exponent: None, - })), - operation: T! {Star, 1,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(2, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 2,1,"7"}, - exponent: None, - })), - }), - operation: T! {Plus, 3,1,"+"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(4, 1, "7"), + }), + operation: T! {Plus, 3,1,"+"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 4,1,"7"}, + exponent: None, + })), + } + ); + + assert_eq!( + { + let input = Span::new("7+5*7"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "7+5*7"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"7"}, + exponent: None + })), + operation: T! {Plus, 1,1,"+"}, + rhs: Box::new(Term::Binary { + span: s!(2, 1, "5*7"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(2, 1, "5"), sign: None, before: None, dot: None, - after: T! {Number, 4,1,"7"}, - exponent: None, + after: T! {Number, 2,1,"5"}, + exponent: None })), - } - ); - - assert_eq!( - { - let input = Span::new("7+5*7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "7+5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "7"), + operation: T! {Star, 3,1,"*"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "7"), sign: None, before: None, dot: None, - after: T! {Number, 0,1,"7"}, + after: T! {Number, 4,1,"7"}, exponent: None })), - operation: T! {Plus, 1,1,"+"}, - rhs: Box::new(Term::Binary { - span: s!(2, 1, "5*7"), + }), + } + ); + + assert_eq!( + { + let input = Span::new("(15+3*2-(7+35)*8)/3"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); + // match result { + // Ok(ast) => { + // println!("{}", ast.1); + // ast.1 + // } + // Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + // panic!( + // "{}", + // convert_error( + // *(input.input.fragment()), + // VerboseError { + // errors: err + // .errors + // .into_iter() + // .map(|(span, tag)| { (*(span.fragment()), tag) }) + // .collect() + // } + // ) + // ) + // } + // Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), + // } + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(0, 1, "(15+3*2-(7+35)*8)"), + open_paren: T!(OpenParen, 0, 1, "("), + terms: Some(List { + span: s!(1, 1, "15+3*2-(7+35)*8"), + first: Term::Binary { + span: s!(1, 1, "15+3*2-(7+35)*8"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(1, 1, "15"), + sign: None, + before: None, + dot: None, + after: T! {Number, 1,1,"15"}, + exponent: None, + })), + operation: T! {Plus, 3,1,"+"}, + rhs: Box::new(Term::Binary { + span: s!(4, 1, "3*2-(7+35)*8"), + lhs: Box::new(Term::Binary { + span: s!(4, 1, "3*2"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(4, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 4,1,"3"}, + exponent: None, + })), + operation: T! {Star, 5,1,"*"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(6, 1, "2"), + sign: None, + before: None, + dot: None, + after: T! {Number, 6,1,"2"}, + exponent: None, + })), + }), + operation: T! {Minus, 7,1,"-"}, + rhs: Box::new(Term::Binary { + span: s!(8, 1, "(7+35)*8"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(8, 1, "(7+35)"), + open_paren: T! {OpenParen, 8, 1, "("}, + terms: Some(List { + span: s!(9, 1, "7+35"), + first: Term::Binary { + span: s!(9, 1, "7+35"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(9, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 9,1,"7"}, + exponent: None, + })), + operation: T! {Plus, 10,1,"+"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(11, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 11,1,"35"}, + exponent: None, + })), + }, + rest: None, + trailing_comma: None, + }), + close_paren: T! {CloseParen, 13,1,")"}, + }))), + operation: T! {Star, 14,1,"*"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(15, 1, "8"), + sign: None, + before: None, + dot: None, + after: T! {Number, 15,1,"8"}, + exponent: None, + })), + }), + }), + }, + rest: None, + trailing_comma: None, + }), + close_paren: T!(CloseParen, 16, 1, ")") + }))), + operation: T! {Slash, 17,1,"/"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(18, 1, "3"), + sign: None, + before: None, + dot: None, + after: T! {Number, 18,1,"3"}, + exponent: None, + })), + } + ); + // Term::Binary { + // span: s!(), + // lhs: Box::new(), + // ws1: None, + // operation: , + // ws2: None, + // rhs: Box::new(), + // } + + assert_eq!( + { + let input = Span::new("15+3*2-(7+35)*8/3"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + // let result = parse_term::>(input); + let result = parse_term::>(input); + result.unwrap().1 + }, + Term::Binary { + span: s!(0, 1, "15+3*2-(7+35)*8/3"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(0, 1, "15"), + sign: None, + before: None, + dot: None, + after: T! {Number, 0,1,"15"}, + exponent: None, + })), + operation: T! {Plus, 2,1,"+"}, + rhs: Box::new(Term::Binary { + span: s!(3, 1, "3*2-(7+35)*8/3"), + lhs: Box::new(Term::Binary { + span: s!(3, 1, "3*2"), lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(2, 1, "5"), + span: s!(3, 1, "3"), sign: None, before: None, dot: None, - after: T! {Number, 2,1,"5"}, - exponent: None + after: T! {Number, 3,1,"3"}, + exponent: None, })), - operation: T! {Star, 3,1,"*"}, + operation: T! {Star, 4,1,"*"}, rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(4, 1, "7"), + span: s!(5, 1, "2"), sign: None, before: None, dot: None, - after: T! {Number, 4,1,"7"}, - exponent: None + after: T! {Number, 5,1,"2"}, + exponent: None, })), }), - } - ); - - assert_eq!( - { - let input = Span::new("(15+3*2-(7+35)*8)/3"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); - // match result { - // Ok(ast) => { - // println!("{}", ast.1); - // ast.1 - // } - // Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - // panic!( - // "{}", - // convert_error( - // *(input.input.fragment()), - // VerboseError { - // errors: err - // .errors - // .into_iter() - // .map(|(span, tag)| { (*(span.fragment()), tag) }) - // .collect() - // } - // ) - // ) - // } - // Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), - // } - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), - lhs: Box::new(Term::Tuple(Box::new(Tuple { - span: s!(0, 1, "(15+3*2-(7+35)*8)"), - open_paren: T!(OpenParen, 0, 1, "("), - terms: Some(List { - span: s!(1, 1, "15+3*2-(7+35)*8"), - first: Term::Binary { - span: s!(1, 1, "15+3*2-(7+35)*8"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(1, 1, "15"), - sign: None, - before: None, - dot: None, - after: T! {Number, 1,1,"15"}, - exponent: None, - })), - operation: T! {Plus, 3,1,"+"}, - rhs: Box::new(Term::Binary { - span: s!(4, 1, "3*2-(7+35)*8"), - lhs: Box::new(Term::Binary { - span: s!(4, 1, "3*2"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(4, 1, "3"), - sign: None, - before: None, - dot: None, - after: T! {Number, 4,1,"3"}, - exponent: None, - })), - operation: T! {Star, 5,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(6, 1, "2"), - sign: None, - before: None, - dot: None, - after: T! {Number, 6,1,"2"}, - exponent: None, - })), - }), - operation: T! {Minus, 7,1,"-"}, - rhs: Box::new(Term::Binary { - span: s!(8, 1, "(7+35)*8"), - lhs: Box::new(Term::Tuple(Box::new(Tuple { - span: s!(8, 1, "(7+35)"), - open_paren: T! {OpenParen, 8, 1, "("}, - terms: Some(List { - span: s!(9, 1, "7+35"), - first: Term::Binary { - span: s!(9, 1, "7+35"), - lhs: Box::new(Term::Primitive( - Primitive::Number { - span: s!(9, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 9,1,"7"}, - exponent: None, - } - )), - operation: T! {Plus, 10,1,"+"}, - rhs: Box::new(Term::Primitive( - Primitive::Number { - span: s!(11, 1, "35"), - sign: None, - before: None, - dot: None, - after: T! {Number, 11,1,"35"}, - exponent: None, - } - )), - }, - rest: None, - trailing_comma: None, - }), - close_paren: T! {CloseParen, 13,1,")"}, - }))), - operation: T! {Star, 14,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(15, 1, "8"), - sign: None, - before: None, - dot: None, - after: T! {Number, 15,1,"8"}, - exponent: None, - })), - }), - }), - }, - rest: None, - trailing_comma: None, - }), - close_paren: T!(CloseParen, 16, 1, ")") - }))), - operation: T! {Slash, 17,1,"/"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(18, 1, "3"), - sign: None, - before: None, - dot: None, - after: T! {Number, 18,1,"3"}, - exponent: None, - })), - } - ); - // Term::Binary { - // span: s!(), - // lhs: Box::new(), - // ws1: None, - // operation: , - // ws2: None, - // rhs: Box::new(), - // } - - assert_eq!( - { - let input = Span::new("15+3*2-(7+35)*8/3"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "15+3*2-(7+35)*8/3"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "15"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0,1,"15"}, - exponent: None, - })), - operation: T! {Plus, 2,1,"+"}, + operation: T! {Minus, 6,1,"-"}, rhs: Box::new(Term::Binary { - span: s!(3, 1, "3*2-(7+35)*8/3"), - lhs: Box::new(Term::Binary { - span: s!(3, 1, "3*2"), + span: s!(7, 1, "(7+35)*8/3"), + lhs: Box::new(Term::Tuple(Box::new(Tuple { + span: s!(7, 1, "(7+35)"), + open_paren: T! {OpenParen, 7,1,"("}, + terms: Some(List { + span: s!(8, 1, "7+35"), + first: Term::Binary { + span: s!(8, 1, "7+35"), + lhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(8, 1, "7"), + sign: None, + before: None, + dot: None, + after: T! {Number, 8,1,"7"}, + exponent: None, + })), + operation: T! {Plus, 9,1,"+"}, + rhs: Box::new(Term::Primitive(Primitive::Number { + span: s!(10, 1, "35"), + sign: None, + before: None, + dot: None, + after: T! {Number, 10,1,"35"}, + exponent: None, + })), + }, + rest: None, + trailing_comma: None, + }), + close_paren: T! {CloseParen, 12,1,")"}, + }))), + operation: T! {Star, 13,1,"*"}, + rhs: Box::new(Term::Binary { + span: s!(14, 1, "8/3"), lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(3, 1, "3"), + span: s!(14, 1, "8"), sign: None, before: None, dot: None, - after: T! {Number, 3,1,"3"}, + after: T! {Number, 14,1,"8"}, exponent: None, })), - operation: T! {Star, 4,1,"*"}, + operation: T! {Slash, 15, 1, "/"}, rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(5, 1, "2"), + span: s!(16, 1, "3"), sign: None, before: None, dot: None, - after: T! {Number, 5,1,"2"}, + after: T! {Number, 16,1,"3"}, exponent: None, })), }), - operation: T! {Minus, 6,1,"-"}, - rhs: Box::new(Term::Binary { - span: s!(7, 1, "(7+35)*8/3"), - lhs: Box::new(Term::Tuple(Box::new(Tuple { - span: s!(7, 1, "(7+35)"), - open_paren: T! {OpenParen, 7,1,"("}, - terms: Some(List { - span: s!(8, 1, "7+35"), - first: Term::Binary { - span: s!(8, 1, "7+35"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(8, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 8,1,"7"}, - exponent: None, - })), - operation: T! {Plus, 9,1,"+"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(10, 1, "35"), - sign: None, - before: None, - dot: None, - after: T! {Number, 10,1,"35"}, - exponent: None, - })), - }, - rest: None, - trailing_comma: None, - }), - close_paren: T! {CloseParen, 12,1,")"}, - }))), - operation: T! {Star, 13,1,"*"}, - rhs: Box::new(Term::Binary { - span: s!(14, 1, "8/3"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(14, 1, "8"), - sign: None, - before: None, - dot: None, - after: T! {Number, 14,1,"8"}, - exponent: None, - })), - operation: T! {Slash, 15, 1, "/"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(16, 1, "3"), - sign: None, - before: None, - dot: None, - after: T! {Number, 16,1,"3"}, - exponent: None, - })), - }), - }), }), - } - ); - - // assert_eq!({ - // let result = parse_term::>(Span::new("1*2*3*4*5")); - // result.unwrap().1 - // },); - - // assert_eq!({ - // let result = parse_term::>(Span::new("(5+3)")); - // result.unwrap().1 - // },); - - // assert_eq!({ - // let result = parse_term::>(Span::new("( int , int , string , skip )")); - // result.unwrap().1 - // },); - - // assert_eq!({ - // let result = parse_term::>(Span::new("(14+4)+3")); - // result.unwrap().1 - // },); - - // assert_eq!({ - // let result = parse_term::>(Span::new( - // "(3 + #sum(?X, ?Y)) * (LENGTH(\"Hello, World!\") + 3)", - // )); - // result.unwrap().1 - // },); - } - - #[test] - fn number_exp() { - assert_eq!( - { - let input = Span::new("e42"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // parse_exponent::>(input) - parse_exponent::>(input).unwrap().1 - }, - Exponent { - e: T! {TokenKind::Exponent, 0,1,"e"}, - sign: None, - number: T! {TokenKind::Number, 1,1,"42"} - } - ) - } + }), + } + ); - #[test] - fn missing_dot() { - let input = Span::new("some(Fact\nSome other, Fact.\nthird(fact)."); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>(input); - println!("{}\n\n{:#?}", result.0, result.1); - // assert!(false); - } + // assert_eq!({ + // let result = parse_term::>(Span::new("1*2*3*4*5")); + // result.unwrap().1 + // },); - #[test] - fn wsoc() { - let input = Span::new(" \t\n % first comment\n % second comment\n"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - dbg!(wsoc0::>(input)); - dbg!(wsoc1::>(input)); - } + // assert_eq!({ + // let result = parse_term::>(Span::new("(5+3)")); + // result.unwrap().1 + // },); - #[test] - fn debug_test() { - let str = "asd"; - let input = Span::new(str); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - dbg!(&result); - println!("{}", result.0); - } + // assert_eq!({ + // let result = parse_term::>(Span::new("( int , int , string , skip )")); + // result.unwrap().1 + // },); - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_language_tag() { - let test_string = "fact(\"テスト\"@ja)."; - let input = Span::new(&test_string); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - assert!(result.1.is_empty()); - } + // assert_eq!({ + // let result = parse_term::>(Span::new("(14+4)+3")); + // result.unwrap().1 + // },); - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_rdf_literal() { - let test_string = "fact(\"2023\"^^xsd:gYear)."; - let input = Span::new(&test_string); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - assert!(result.1.is_empty()); - } + // assert_eq!({ + // let result = parse_term::>(Span::new( + // "(3 + #sum(?X, ?Y)) * (LENGTH(\"Hello, World!\") + 3)", + // )); + // result.unwrap().1 + // },); + } - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_floating_point_numbers() { - // https://regex101.com/r/ObowxD/5 - - let valid_numbers = vec![ - "0.2", - "4534.34534345", - ".456456", - "1.", - "1e545", - "1.1e435", - ".1e232", - "1.e343", - "112E+12", - "12312.1231", - ".1231", - "1231", - "-1e+0", - "1e-1", - ]; - - let invalid_numbers = vec!["3", "E9", ".e3", "7E"]; - - for valid in valid_numbers { - let input = Span::new(valid); + #[test] + fn number_exp() { + assert_eq!( + { + let input = Span::new("e42"); let refcell = RefCell::new(Vec::new()); let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; - - let result = parse_decimal::>>(input); - // dbg!(&input); - // dbg!(&result); - assert!(result.is_ok()) + // parse_exponent::>(input) + parse_exponent::>(input).unwrap().1 + }, + Exponent { + e: T! {TokenKind::Exponent, 0,1,"e"}, + sign: None, + number: T! {TokenKind::Number, 1,1,"42"} } + ) + } - for invalid in invalid_numbers { - let input = Span::new(invalid); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; + #[test] + fn missing_dot() { + let input = Span::new("some(Fact\nSome other, Fact.\nthird(fact)."); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>(input); + println!("{}\n\n{:#?}", result.0, result.1); + // assert!(false); + } - let result = parse_decimal::>>(input); - assert!(result.is_err()) - } - } + #[test] + fn wsoc() { + let input = Span::new(" \t\n % first comment\n % second comment\n"); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + dbg!(wsoc0::>(input)); + dbg!(wsoc1::>(input)); + } + + #[test] + fn debug_test() { + let str = "asd"; + let input = Span::new(str); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + dbg!(&result); + println!("{}", result.0); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_language_tag() { + let test_string = "fact(\"テスト\"@ja)."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_rdf_literal() { + let test_string = "fact(\"2023\"^^xsd:gYear)."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_floating_point_numbers() { + // https://regex101.com/r/ObowxD/5 + + let valid_numbers = vec![ + "0.2", + "4534.34534345", + ".456456", + "1.", + "1e545", + "1.1e435", + ".1e232", + "1.e343", + "112E+12", + "12312.1231", + ".1231", + "1231", + "-1e+0", + "1e-1", + ]; + + let invalid_numbers = vec!["3", "E9", ".e3", "7E"]; - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_complex_comparison() { - let test_string = "complex(?X, ?Y) :- data(?X, ?Y), ABS(?X - ?Y) >= ?X * ?X."; - let input = Span::new(&test_string); + for valid in valid_numbers { + let input = Span::new(valid); let refcell = RefCell::new(Vec::new()); let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; - let result = parse_program::>>(input); + + let result = parse_decimal::>>(input); + // dbg!(&input); // dbg!(&result); - assert!(result.1.is_empty()); + assert!(result.is_ok()) } - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_negation() { - let test_string = "R(?x, ?y, ?z) :- S(?x, ?y, ?z), ~T(?x, ?y), ~ T(a, ?z)."; // should allow for spaces - let input = Span::new(&test_string); + for invalid in invalid_numbers { + let input = Span::new(invalid); let refcell = RefCell::new(Vec::new()); let parser_state = ParserState { errors: &refcell }; let input = Input { input, parser_state, }; - let result = parse_program::>>(input); - assert!(result.1.is_empty()); - } - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_trailing_comma() { - let test_string = "head(?X) :- body( (2 ,), (3, 4 , ), ?X) ."; // should allow for spaces - let input = Span::new(&test_string); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - assert!(result.1.is_empty()); + let result = parse_decimal::>>(input); + assert!(result.is_err()) } } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_complex_comparison() { + let test_string = "complex(?X, ?Y) :- data(?X, ?Y), ABS(?X - ?Y) >= ?X * ?X."; + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + // dbg!(&result); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_negation() { + let test_string = "R(?x, ?y, ?z) :- S(?x, ?y, ?z), ~T(?x, ?y), ~ T(a, ?z)."; // should allow for spaces + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } + + // TODO: Instead of just checking for errors, this should compare the created AST + #[test] + fn parse_trailing_comma() { + let test_string = "head(?X) :- body( (2 ,), (3, 4 , ), ?X) ."; // should allow for spaces + let input = Span::new(&test_string); + let refcell = RefCell::new(Vec::new()); + let parser_state = ParserState { errors: &refcell }; + let input = Input { + input, + parser_state, + }; + let result = parse_program::>>(input); + assert!(result.1.is_empty()); + } } diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs index f93ac833b..ddbe323cf 100644 --- a/nemo/src/io/parser/ast/term.rs +++ b/nemo/src/io/parser/ast/term.rs @@ -3,8 +3,8 @@ use tower_lsp::lsp_types::SymbolKind; use super::map::Map; use super::named_tuple::NamedTuple; use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, List, Range, Wsoc}; -use crate::io::lexer::{Span, Token}; +use super::{ast_to_ascii_tree, AstNode, List, Range}; +use crate::io::lexer::Span; use ascii_tree::write_tree; #[derive(Debug, Clone, PartialEq)] @@ -12,12 +12,6 @@ pub enum Term<'a> { Primitive(Primitive<'a>), UniversalVariable(Span<'a>), ExistentialVariable(Span<'a>), - // TODO: Is whitespace needed? Figure out how unary terms look - UnaryPrefix { - span: Span<'a>, - operation: Span<'a>, - term: Box>, - }, Binary { span: Span<'a>, lhs: Box>, @@ -43,9 +37,6 @@ impl AstNode for Term<'_> { Term::Primitive(token) => Some(vec![token]), Term::UniversalVariable(token) => Some(vec![token]), Term::ExistentialVariable(token) => Some(vec![token]), - Term::UnaryPrefix { - operation, term, .. - } => Some(vec![operation, &**term]), Term::Binary { lhs, operation, @@ -85,7 +76,6 @@ impl AstNode for Term<'_> { Term::Primitive(p) => p.span(), Term::UniversalVariable(span) => *span, Term::ExistentialVariable(span) => *span, - Term::UnaryPrefix { span, .. } => *span, Term::Binary { span, .. } => *span, Term::Aggregation { span, .. } => *span, Term::Tuple(tuple) => tuple.span(), @@ -115,7 +105,6 @@ impl AstNode for Term<'_> { Term::Primitive(_) => name!("Primitive"), Term::UniversalVariable(_) => name!("Variable"), Term::ExistentialVariable(_) => name!("Existential Variable"), - Term::UnaryPrefix { .. } => name!("Unary Term"), Term::Binary { .. } => name!("Binary Term"), Term::Aggregation { .. } => name!("Aggregation"), Term::Tuple(_) => name!("Tuple"), @@ -147,7 +136,6 @@ impl AstNode for Term<'_> { match self { Term::Primitive(_) => None, Term::UniversalVariable(t) => Some(t.range()), - Term::UnaryPrefix { .. } => None, Term::Blank { .. } => None, Term::ExistentialVariable(t) => Some(t.range()), Term::Binary { .. } => None, @@ -164,7 +152,6 @@ impl AstNode for Term<'_> { Term::UniversalVariable(t) => { Some((format!("Variable: {}", t.span()), SymbolKind::VARIABLE)) } - Term::UnaryPrefix { .. } => Some((String::from("Unary prefix"), SymbolKind::OPERATOR)), Term::Blank { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), Term::ExistentialVariable { .. } => { Some((String::from("Existential"), SymbolKind::VARIABLE)) diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs index 4ba1b045c..54f3f2392 100644 --- a/nemo/src/io/parser/types.rs +++ b/nemo/src/io/parser/types.rs @@ -15,11 +15,9 @@ use thiserror::Error; use crate::{ io::formats::import_export::ImportExportError, io::lexer::ParserState, - model::rule_model::{Aggregate, Constraint, Literal, Term}, + model::rule_model::{Aggregate, Constraint, Literal, Term, Variable}, }; -use super::{ast::Position, Variable}; - /// A [LocatedSpan] over the input. pub(super) type Span<'a> = LocatedSpan<&'a str>; @@ -434,7 +432,7 @@ pub(crate) struct Input<'a, 's> { pub(crate) parser_state: ParserState<'s>, } impl<'a, 's> Input<'a, 's> { - fn new(input: &'a str, errors: ParserState<'s>) -> Input<'a, 's> { + pub(crate) fn new(input: &'a str, errors: ParserState<'s>) -> Input<'a, 's> { Input { input: Span::new(input), parser_state: errors, @@ -516,7 +514,7 @@ impl<'a, 's> InputIter for Input<'a, 's> { todo!() } - fn position

(&self, predicate: P) -> Option + fn position

(&self, _predicate: P) -> Option where P: Fn(Self::Item) -> bool, { @@ -575,8 +573,8 @@ impl InputTakeAtPosition for Input<'_, '_> { fn split_at_position1>( &self, - predicate: P, - e: ErrorKind, + _predicate: P, + _e: ErrorKind, ) -> IResult where P: Fn(Self::Item) -> bool, @@ -661,7 +659,7 @@ impl std::fmt::Display for Input<'_, '_> { } impl nom_supreme::context::ContextError for Input<'_, '_> { - fn add_context(location: I, ctx: C, other: Self) -> Self { + fn add_context(_location: I, _ctx: C, _other: Self) -> Self { todo!() } } diff --git a/nemo/src/io/resource_providers.rs b/nemo/src/io/resource_providers.rs index 9b41ff17e..c6f944028 100644 --- a/nemo/src/io/resource_providers.rs +++ b/nemo/src/io/resource_providers.rs @@ -2,7 +2,7 @@ use std::{io::BufRead, path::PathBuf, rc::Rc}; -use crate::io::parser::{all_input_consumed, iri::iri}; +// use crate::io::parser::{all_input_consumed, iri::iri}; use nemo_physical::{error::ReadingError, resource::Resource}; use super::compression_format::CompressionFormat; @@ -12,8 +12,9 @@ pub mod file; /// A resource provider for HTTP(s) requests. pub mod http; -fn is_iri(resource: &Resource) -> bool { - all_input_consumed(iri)(resource).is_ok() +fn is_iri(_resource: &Resource) -> bool { + todo!() + // all_input_consumed(iri)(resource).is_ok() } /// Allows resolving resources to readers. diff --git a/nemo/src/model/rule_model/rule.rs b/nemo/src/model/rule_model/rule.rs index 281c8ebdf..1ab98eab0 100644 --- a/nemo/src/model/rule_model/rule.rs +++ b/nemo/src/model/rule_model/rule.rs @@ -1,6 +1,7 @@ use std::collections::{HashMap, HashSet}; -use crate::{io::parser::ParseError, model::VariableAssignment}; +use crate::io::parser::types::ParseError; +use crate::model::VariableAssignment; use super::{Atom, Constraint, Literal, PrimitiveTerm, Term, Variable}; diff --git a/nemo/src/rule_model/component/fact.rs b/nemo/src/rule_model/component/fact.rs index 160d19695..64c883893 100644 --- a/nemo/src/rule_model/component/fact.rs +++ b/nemo/src/rule_model/component/fact.rs @@ -29,6 +29,11 @@ impl Fact { } } + /// Create a new [Fact] from an AST + pub fn from_ast(_ast: crate::io::parser::ast::statement::Fact) { + todo!("create a fact from an ast") + } + /// Return an iterator over the subterms of this fact. pub fn subterms(&self) -> impl Iterator { self.terms.iter() diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index e71f03ffe..933f925f0 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -45,16 +45,16 @@ impl Program { pub fn from_ast(ast_program: ast::program::Program) -> Self { let mut program = Program::default(); - for (statement_index, statement) in ast_program.statements.iter().enumerate() { + for (_statement_index, statement) in ast_program.statements.iter().enumerate() { match statement { ast::statement::Statement::Directive(directive) => { program.ast_build_directive(directive); } ast::statement::Statement::Fact { - span, - doc_comment, - fact: atom, - dot, + span: _span, + doc_comment: _doc_comment, + fact: _atom, + dot: _dot, } => todo!(), ast::statement::Statement::Rule { head, body, .. } => { program.ast_build_rule(head, body); @@ -149,12 +149,6 @@ impl Program { ast::term::Term::ExistentialVariable(name) => { Term::existential_variable(&name.to_string()) } - ast::term::Term::UnaryPrefix { - operation, term, .. - } => { - // TODO: Currently no associated function with this - todo!() - } ast::term::Term::Binary { lhs, operation, @@ -170,7 +164,9 @@ impl Program { ) } ast::term::Term::Aggregation { - operation, terms, .. + operation: _, + terms: _, + .. } => { todo!() } @@ -190,18 +186,18 @@ impl Program { Term::ground(AnyDataValue::new_iri(value.to_string())) } ast::term::Primitive::PrefixedConstant { - span, - prefix, - colon, - constant, + span: _, + prefix: _, + colon: _, + constant: _, } => todo!(), ast::term::Primitive::Number { - span, - sign, - before, - dot, - after, - exponent, + span: _, + sign: _, + before: _, + dot: _, + after: _, + exponent: _, } => todo!(), ast::term::Primitive::String(string) => { Term::ground(AnyDataValue::new_plain_string(string.to_string())) @@ -214,7 +210,7 @@ impl Program { .set_origin(origin) } - fn ast_build_inner_tuple(origin: Origin, tuple: &ast::tuple::Tuple) -> Term { + fn ast_build_inner_tuple(_origin: Origin, tuple: &ast::tuple::Tuple) -> Term { let subterms = match &tuple.terms { Some(terms) => terms.to_item_vec(), None => vec![], @@ -231,7 +227,7 @@ impl Program { } fn ast_build_inner_named_tuple( - origin: Origin, + _origin: Origin, named_tuple: &ast::named_tuple::NamedTuple, ) -> Term { let subterms = match &named_tuple.tuple.terms { @@ -260,33 +256,33 @@ impl Program { // TODO: Set origin } ast::directive::Directive::Prefix { - span, - doc_comment, - prefix, - prefix_iri, - dot, + span: _, + doc_comment: _, + prefix: _, + prefix_iri: _, + dot: _, } => todo!(), ast::directive::Directive::Import { - span, - doc_comment, - predicate, - arrow, - map, - dot, + span: _, + doc_comment: _, + predicate: _, + arrow: _, + map: _, + dot: _, } => todo!(), ast::directive::Directive::Export { - span, - doc_comment, - predicate, - arrow, - map, - dot, + span: _, + doc_comment: _, + predicate: _, + arrow: _, + map: _, + dot: _, } => todo!(), ast::directive::Directive::Output { - span, - doc_comment, - predicates, - dot, + span: _, + doc_comment: _, + predicates: _, + dot: _, } => todo!(), } } From 06864cf54ecad5885c07d56b5a2484dce303621a Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 10 Jul 2024 18:05:03 +0200 Subject: [PATCH 119/214] Make changes to accommodate the removal of the old parser --- nemo-cli/src/main.rs | 103 +++++++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 43 deletions(-) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index ee453edb5..1a6870dc2 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -29,7 +29,7 @@ use nemo::{ error::{Error, ReadingError}, execution::{DefaultExecutionEngine, ExecutionEngine}, io::{ - parser::{parse_fact, parse_program}, + parser::{parse_fact_str, parse_program_str}, resource_providers::ResourceProviders, ImportManager, }, @@ -165,7 +165,12 @@ fn run(mut cli: CliApp) -> Result<(), Error> { filename: rules.to_string_lossy().to_string(), })?; - let mut program = parse_program(rules_content)?; + // let mut program = parse_program(rules_content)?; + let (ast, _errors) = parse_program_str(&rules_content); + log::debug!("AST:\n{ast}"); + // TODO: Report errors! + log::debug!("ERRORS:\n{_errors:#?}"); + let program = nemo::rule_model::program::Program::from_ast(ast); log::info!("Rules parsed"); log::trace!("{:?}", program); @@ -201,23 +206,33 @@ fn run(mut cli: CliApp) -> Result<(), Error> { ) }); - raw_facts_to_be_traced - .map(|f| f.into_iter().map(parse_fact).collect::, _>>()) - .transpose()? + // raw_facts_to_be_traced + // .map(|f| { + // f.into_iter() + // .map(/*parse_fact_str*/) // FIXME: Iterator over Strings and not &str + // .collect::, _>>() + // }) + // .transpose()? + None::> // NOTE: This is just a quick and dirty fix }; - override_exports(&mut program, cli.output.export_setting); + // FIXME: Change override exports to use the new rule model + // override_exports(&mut program, cli.output.export_setting); let export_manager = cli.output.export_manager()?; // Validate exports even if we do not intend to write data: - for export in program.exports() { - export_manager.validate(export)?; - } + // FIXME: How does the new rule model handle exports? + // for export in program.exports() { + // export_manager.validate(export)?; + // } let import_manager = ImportManager::new(ResourceProviders::with_base_path(cli.import_directory)); - let mut engine: DefaultExecutionEngine = ExecutionEngine::initialize(&program, import_manager)?; + let mut engine: DefaultExecutionEngine = ExecutionEngine::initialize( + /*&program*/ todo!("change the old rule model to the new one"), + import_manager, + )?; TimedCode::instance().sub("Reading & Preprocessing").stop(); @@ -234,15 +249,16 @@ fn run(mut cli: CliApp) -> Result<(), Error> { .start(); log::info!("writing output"); - for export_directive in program.exports() { - if let Some(arity) = engine.predicate_arity(export_directive.predicate()) { - stdout_used |= export_manager.export_table( - export_directive, - engine.predicate_rows(export_directive.predicate())?, - arity, - )?; - } - } + // FIXME: How are exports handled in the new rule model? + // for export_directive in program.exports() { + // if let Some(arity) = engine.predicate_arity(export_directive.predicate()) { + // stdout_used |= export_manager.export_table( + // export_directive, + // engine.predicate_rows(export_directive.predicate())?, + // arity, + // )?; + // } + // } TimedCode::instance() .sub("Output & Final Materialization") @@ -273,30 +289,31 @@ fn run(mut cli: CliApp) -> Result<(), Error> { print_memory_details(&engine); } - if let Some(facts) = facts_to_be_traced { - let (trace, handles) = engine.trace(program.clone(), facts.clone()); - - match cli.tracing.output_file { - Some(output_file) => { - let filename = output_file.to_string_lossy().to_string(); - let trace_json = trace.json(&handles); - - let mut json_file = File::create(output_file)?; - if serde_json::to_writer(&mut json_file, &trace_json).is_err() { - return Err(Error::SerializationError { filename }); - } - } - None => { - for (fact, handle) in facts.into_iter().zip(handles) { - if let Some(tree) = trace.tree(handle) { - println!("\n{}", tree.to_ascii_art()); - } else { - println!("\n{fact} was not derived"); - } - } - } - } - } + // NOTE: As a quick and dirty fix I commented this out, because `program.clone()` did not exist + // if let Some(facts) = facts_to_be_traced { + // let (trace, handles) = engine.trace(program.clone(), facts.clone()); + + // match cli.tracing.output_file { + // Some(output_file) => { + // let filename = output_file.to_string_lossy().to_string(); + // let trace_json = trace.json(&handles); + + // let mut json_file = File::create(output_file)?; + // if serde_json::to_writer(&mut json_file, &trace_json).is_err() { + // return Err(Error::SerializationError { filename }); + // } + // } + // None => { + // for (fact, handle) in facts.into_iter().zip(handles) { + // if let Some(tree) = trace.tree(handle) { + // println!("\n{}", tree.to_ascii_art()); + // } else { + // println!("\n{fact} was not derived"); + // } + // } + // } + // } + // } Ok(()) } From 7e830de5b3a33b1d4470c7a23889c384c7f62780 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 10 Jul 2024 18:05:21 +0200 Subject: [PATCH 120/214] Make changes to accommodate the removal of the old parser --- nemo-python/src/lib.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/nemo-python/src/lib.rs b/nemo-python/src/lib.rs index 4a2158548..ca874c0e7 100644 --- a/nemo-python/src/lib.rs +++ b/nemo-python/src/lib.rs @@ -10,7 +10,7 @@ use std::{ use nemo::{ datavalues::{AnyDataValue, DataValue}, execution::{tracing::trace::ExecutionTraceTree, ExecutionEngine}, - io::{resource_providers::ResourceProviders, ExportManager, ImportManager}, + io::{lexer::Error, resource_providers::ResourceProviders, ExportManager, ImportManager}, meta::timing::TimedCode, model::{ chase_model::{ChaseAtom, ChaseFact}, @@ -38,6 +38,13 @@ impl PythonResult for Result { self.map_err(|err| NemoError::new_err(format!("{}", err))) } } +impl PythonResult for (T, Vec) { + type Value = T; + + fn py_res(self) -> PyResult { + todo!("It is unclear what should get returned") + } +} #[pyclass] #[derive(Clone)] @@ -46,13 +53,17 @@ struct NemoProgram(nemo::model::Program); #[pyfunction] fn load_file(file: String) -> PyResult { let contents = read_to_string(file)?; - let program = nemo::io::parser::parse_program(contents).py_res()?; + let ast = nemo::io::parser::parse_program_str(&contents).py_res()?; + let program = nemo::rule_model::program::Program::from_ast(ast); + let program = todo!("update NemoProgram to use the new rule model"); Ok(NemoProgram(program)) } #[pyfunction] fn load_string(rules: String) -> PyResult { - let program = nemo::io::parser::parse_program(rules).py_res()?; + let ast = nemo::io::parser::parse_program_str(&rules).py_res()?; + let program = nemo::rule_model::program::Program::from_ast(ast); + let program = todo!("update NemoProgram to use the new rule model"); Ok(NemoProgram(program)) } @@ -399,7 +410,10 @@ impl NemoEngine { } fn trace(&mut self, fact: String) -> Option { - let parsed_fact = nemo::io::parser::parse_fact(fact).py_res().ok()?; + let (ast, _errors) = nemo::io::parser::parse_fact_str(&fact); /*.py_res().ok()?;*/ + // TODO: Report errors... + let parsed_fact = nemo::rule_model::component::fact::Fact::from_ast(ast); + let parsed_fact = todo!(); let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![parsed_fact]); let handle = *handles .first() From e4274f2456f3ded57941a210f9e813675e2ac57b Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 10 Jul 2024 22:14:55 +0200 Subject: [PATCH 121/214] Make tests compile again --- nemo/src/io/parser.rs | 6 ++- nemo/src/program_analysis/analysis.rs | 59 --------------------------- 2 files changed, 5 insertions(+), 60 deletions(-) diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index 0b6e1022e..de85b9d12 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2699,7 +2699,11 @@ fn wsoc1<'a, 's, E: ParseError> + ContextError, Cont } /// Parse a full program consisting of directives, facts, rules and comments. -fn parse_program<'a, 's, E: ParseError> + ContextError, Context>>( +pub fn parse_program< + 'a, + 's, + E: ParseError> + ContextError, Context>, +>( input: Input<'a, 's>, ) -> (Program<'a>, Vec) { let result = context( diff --git a/nemo/src/program_analysis/analysis.rs b/nemo/src/program_analysis/analysis.rs index 597f201f7..54c530f28 100644 --- a/nemo/src/program_analysis/analysis.rs +++ b/nemo/src/program_analysis/analysis.rs @@ -406,62 +406,3 @@ impl ChaseProgram { }) } } - -#[cfg(test)] -mod test { - use crate::{ - error::Error, io::parser::parse_program, model::chase_model::ChaseProgram, - program_analysis::analysis::RuleAnalysisError, - }; - - #[test] - #[cfg_attr(miri, ignore)] - fn no_arity_overloading() { - let program = ChaseProgram::try_from( - parse_program( - r#" - @import q :- turtle{resource="dummy.nt"} . - p(?x, ?y) :- q(?x), q(?y) . - "#, - ) - .unwrap(), - ) - .unwrap(); - - assert!(matches!( - program.analyze(), - Err(Error::RuleAnalysisError( - RuleAnalysisError::UnsupportedFeaturePredicateOverloading { .. } - )) - )); - - let program = - ChaseProgram::try_from(parse_program(r#"q(?x, ?y) :- q(?x), q(?y) ."#).unwrap()) - .unwrap(); - - assert!(matches!( - program.analyze(), - Err(Error::RuleAnalysisError( - RuleAnalysisError::UnsupportedFeaturePredicateOverloading { .. } - )) - )); - - let program = ChaseProgram::try_from( - parse_program( - r#" - p(?x, ?y) :- q(?x), q(?y) . - q(23, 42) . - "#, - ) - .unwrap(), - ) - .unwrap(); - - assert!(matches!( - program.analyze(), - Err(Error::RuleAnalysisError( - RuleAnalysisError::UnsupportedFeaturePredicateOverloading { .. } - )) - )); - } -} From cac3cacefd0f748a2f732c27605326dcd47ee7f6 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 10 Jul 2024 23:14:44 +0200 Subject: [PATCH 122/214] Restructure syntax definitions for logical model --- .../rule_model/component/term/operation.rs | 265 +------------ .../term/operation/operation_kind.rs | 363 ++++++++++++++++++ nemo/src/rule_model/program.rs | 2 +- nemo/src/rule_model/syntax.rs | 61 +-- nemo/src/rule_model/syntax/builtins.rs | 110 ++++++ nemo/src/rule_model/syntax/import_export.rs | 6 + .../syntax/import_export/attributes.rs | 14 + .../syntax/import_export/compression.rs | 6 + .../syntax/import_export/file_formats.rs | 22 ++ .../syntax/import_export/value_formats.rs | 22 ++ 10 files changed, 550 insertions(+), 321 deletions(-) create mode 100644 nemo/src/rule_model/component/term/operation/operation_kind.rs create mode 100644 nemo/src/rule_model/syntax/builtins.rs create mode 100644 nemo/src/rule_model/syntax/import_export.rs create mode 100644 nemo/src/rule_model/syntax/import_export/attributes.rs create mode 100644 nemo/src/rule_model/syntax/import_export/compression.rs create mode 100644 nemo/src/rule_model/syntax/import_export/file_formats.rs create mode 100644 nemo/src/rule_model/syntax/import_export/value_formats.rs diff --git a/nemo/src/rule_model/component/term/operation.rs b/nemo/src/rule_model/component/term/operation.rs index ee934f74b..7a6474fd2 100644 --- a/nemo/src/rule_model/component/term/operation.rs +++ b/nemo/src/rule_model/component/term/operation.rs @@ -1,7 +1,11 @@ //! This module defines [Operation]. +pub mod operation_kind; + use std::{fmt::Display, hash::Hash}; +use operation_kind::OperationKind; + use crate::rule_model::{ component::{IteratableVariables, ProgramComponent}, origin::Origin, @@ -9,267 +13,6 @@ use crate::rule_model::{ use super::{primitive::variable::Variable, Term}; -/// Supported operations -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub enum OperationKind { - /// Equality - Equal, - /// Inequality - Unequals, - /// Sum of numeric values - NumericSum, - /// Subtraction between two numeric values - NumericSubtraction, - /// Product of numeric values - NumericProduct, - /// Division between two numeric values - NumericDivision, - /// Logarithm of a numeric value to some numeric base - NumericLogarithm, - /// Numeric value raised to another numeric value - NumericPower, - /// Remainder of a division between two numeric values - NumericRemainder, - /// Numeric greater than comparison - NumericGreaterthan, - /// Numeric greater than or equals comparison - NumericGreaterthaneq, - /// Numeric less than comparison - NumericLessthan, - /// Numeric less than or equals comparison - NumericLessthaneq, - /// Lexicographic comparison between strings - StringCompare, - /// Check whether string is contained in another, correspondng to SPARQL function CONTAINS. - StringContains, - /// String starting at some start position - StringSubstring, - /// First part of a string split by some other string - StringBefore, - /// Second part of a string split by some other string - StringAfter, - /// Whether string starts with a certain string - StringStarts, - /// Whether string ends with a certain string - StringEnds, - /// Boolean negation - BooleanNegation, - /// Cast to double - CastToDouble, - /// Cast to float - CastToFloat, - /// Cast to integer - CastToInteger, - /// Canonical string representation of a value - CanonicalString, - /// Check if value is an integer - CheckIsInteger, - /// Check if value is a float - CheckIsFloat, - /// Check if value is a double - CheckIsDouble, - /// Check if value is an iri - CheckIsIri, - /// Check if value is numeric - CheckIsNumeric, - /// Check if value is a null - CheckIsNull, - /// Check if value is a string - CheckIsString, - /// Get datatype of a value - Datatype, - /// Get language tag of a languaged tagged string - LanguageTag, - /// Lexical value - LexicalValue, - /// Absolute value of a numeric value - NumericAbsolute, - /// Cosine of a numeric value - NumericCosine, - /// Rounding up of a numeric value - NumericCeil, - /// Rounding down of a numeric value - NumericFloor, - /// Additive inverse of a numeric value - NumericNegation, - /// Rounding of a numeric value - NumericRound, - /// Sine of a numeric value - NumericSine, - /// Square root of a numeric value - NumericSquareroot, - /// Tangent of a numeric value - NumericTangent, - /// Length of a string value - StringLength, - /// Reverse of a string value - StringReverse, - /// String converted to lowercase letters - StringLowercase, - /// String converted to uppercase letters - StringUppercase, - /// Bitwise and operation - BitAnd, - /// Bitwise or operation - BitOr, - /// Bitwise xor operation - BitXor, - /// Conjunction of boolean values - BooleanConjunction, - /// Disjunction of boolean values - BooleanDisjunction, - /// Minimum of numeric values - NumericMinimum, - /// Maximum of numeric values - NumericMaximum, - /// Lukasiewicz norm of numeric values - NumericLukasiewicz, - /// Concatentation of two string values, correspondng to SPARQL function CONCAT. - StringConcatenation, -} - -impl OperationKind { - /// Return the [OperationKind] corresponding to the given operation name or `None` if there is no such operation. - pub fn from_name(name: &str) -> Option { - Some(match name.to_uppercase().as_str() { - "+" => Self::NumericSum, - "-" => Self::NumericSubtraction, - "/" => Self::NumericDivision, - "*" => Self::NumericProduct, - "<" => Self::NumericLessthan, - ">" => Self::NumericGreaterthan, - "<=" => Self::NumericLessthaneq, - ">=" => Self::NumericGreaterthaneq, - "isInteger" => Self::CheckIsInteger, - "isFloat" => Self::CheckIsFloat, - "isDouble" => Self::CheckIsDouble, - "isIri" => Self::CheckIsIri, - "isNumeric" => Self::CheckIsNumeric, - "isNull" => Self::CheckIsNull, - "isString" => Self::CheckIsString, - "ABS" => Self::NumericAbsolute, - "SQRT" => Self::NumericSquareroot, - "NOT" => Self::BooleanNegation, - "fullStr" => Self::CanonicalString, - "STR" => Self::LexicalValue, - "SIN" => Self::NumericSine, - "COS" => Self::NumericCosine, - "TAN" => Self::NumericTangent, - "STRLEN" => Self::StringLength, - "STRREV" => Self::StringReverse, - "UCASE" => Self::StringLowercase, - "LCASE" => Self::StringUppercase, - "ROUND" => Self::NumericRound, - "CEIL" => Self::NumericCeil, - "FLOOR" => Self::NumericFloor, - "DATATYPE" => Self::Datatype, - "LANG" => Self::LanguageTag, - "INT" => Self::CastToInteger, - "DOUBLE" => Self::CastToDouble, - "FLOAT" => Self::CastToFloat, - "LOG" => Self::NumericLogarithm, - "POW" => Self::NumericPower, - "COMPARE" => Self::StringCompare, - "CONTAINS" => Self::StringContains, - "SUBSTR" => Self::StringSubstring, - "STRSTARTS" => Self::StringStarts, - "STRENDS" => Self::StringEnds, - "STRBEFORE" => Self::StringBefore, - "STRAFTER" => Self::StringAfter, - "REM" => Self::NumericRemainder, - "BITAND" => Self::BitAnd, - "BITOR" => Self::BitOr, - "BITXOR" => Self::BitXor, - "MAX" => Self::NumericMaximum, - "MIN" => Self::NumericMinimum, - "LUKA" => Self::NumericLukasiewicz, - "SUM" => Self::NumericSum, - "PROD" => Self::NumericProduct, - "AND" => Self::BooleanConjunction, - "OR" => Self::BooleanDisjunction, - "CONCAT" => Self::StringConcatenation, - _ => return None, - }) - } - - /// Precendence of operations for display purposes. - pub(crate) fn precedence(&self) -> usize { - match &self { - Self::NumericSum => 1, - Self::NumericSubtraction => 1, - Self::NumericProduct => 2, - Self::NumericDivision => 2, - _ => 3, - } - } -} - -impl Display for OperationKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let string = match self { - OperationKind::Equal => "EQUAL", - OperationKind::Unequals => "UNEQUAL", - OperationKind::NumericSum => "SUM", - OperationKind::NumericSubtraction => "MINUS", - OperationKind::NumericProduct => "PROD", - OperationKind::NumericDivision => "DIV", - OperationKind::NumericLogarithm => "LOG", - OperationKind::NumericPower => "POW", - OperationKind::NumericRemainder => "REM", - OperationKind::NumericGreaterthan => "GT", - OperationKind::NumericGreaterthaneq => "GTE", - OperationKind::NumericLessthan => "LT", - OperationKind::NumericLessthaneq => "LTE", - OperationKind::StringCompare => "COMPARE", - OperationKind::StringContains => "CONTAINS", - OperationKind::StringSubstring => "SUBSTR", - OperationKind::StringBefore => "STRBEFORE", - OperationKind::StringAfter => "STRAFTER", - OperationKind::StringStarts => "STRSTARTS", - OperationKind::StringEnds => "STRENDS", - OperationKind::BooleanNegation => "NOT", - OperationKind::CastToDouble => "DOUBLE", - OperationKind::CastToFloat => "FLOAT", - OperationKind::CastToInteger => "INT", - OperationKind::CanonicalString => "fullStr", - OperationKind::CheckIsInteger => "isInteger", - OperationKind::CheckIsFloat => "isFloat", - OperationKind::CheckIsDouble => "isDouble", - OperationKind::CheckIsIri => "isIri", - OperationKind::CheckIsNumeric => "isNumeric", - OperationKind::CheckIsNull => "isNull", - OperationKind::CheckIsString => "isString", - OperationKind::Datatype => "DATATYPE", - OperationKind::LanguageTag => "LANG", - OperationKind::LexicalValue => "STR", - OperationKind::NumericAbsolute => "ABS", - OperationKind::NumericCosine => "COS", - OperationKind::NumericCeil => "CEIL", - OperationKind::NumericFloor => "FLOOR", - OperationKind::NumericNegation => "MINUS", - OperationKind::NumericRound => "ROUND", - OperationKind::NumericSine => "SIN", - OperationKind::NumericSquareroot => "SQRT", - OperationKind::NumericTangent => "TAN", - OperationKind::StringLength => "STRLEN", - OperationKind::StringReverse => "STRREV", - OperationKind::StringLowercase => "LCASE", - OperationKind::StringUppercase => "UCASE", - OperationKind::BitAnd => "BITAND", - OperationKind::BitOr => "BITOR", - OperationKind::BitXor => "BITXOR", - OperationKind::BooleanConjunction => "AND", - OperationKind::BooleanDisjunction => "OR", - OperationKind::NumericMinimum => "MIN", - OperationKind::NumericMaximum => "MAX", - OperationKind::NumericLukasiewicz => "LUKA", - OperationKind::StringConcatenation => "CONCAT", - }; - - write!(f, "{}", string) - } -} - /// Operation /// /// An action or computation performed on [Term]s. diff --git a/nemo/src/rule_model/component/term/operation/operation_kind.rs b/nemo/src/rule_model/component/term/operation/operation_kind.rs new file mode 100644 index 000000000..e99b89d1e --- /dev/null +++ b/nemo/src/rule_model/component/term/operation/operation_kind.rs @@ -0,0 +1,363 @@ +//! This module defines [OperationKind]. + +use std::fmt::Display; + +/// Number of arguments supported by an operation +#[derive(Debug)] +pub(crate) enum OperationNumArguments { + /// Operation requires one argument + Unary, + /// Operation requires two arguments + Binary, + /// Operation requires three arguments + Ternary, + /// Operation supports arbitrary many arguments (including zero) + Arbitrary, + /// Operation supports the given number of arguments + Exact(usize), + /// Operation supports arguments that satisfy one of the given requirements + Choice(Vec), +} + +impl OperationNumArguments { + /// Return whether the given number of arguments satisfies this constraint. + pub(crate) fn validate(&self, num_arguments: usize) -> bool { + match self { + OperationNumArguments::Unary => num_arguments == 1, + OperationNumArguments::Binary => num_arguments == 2, + OperationNumArguments::Ternary => num_arguments == 3, + OperationNumArguments::Arbitrary => true, + OperationNumArguments::Exact(exact) => num_arguments == *exact, + OperationNumArguments::Choice(choice) => { + choice.iter().any(|num| num.validate(num_arguments)) + } + } + } +} + +/// Supported operations +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum OperationKind { + /// Equality + Equal, + /// Inequality + Unequals, + /// Sum of numeric values + NumericSum, + /// Subtraction between two numeric values + NumericSubtraction, + /// Product of numeric values + NumericProduct, + /// Division between two numeric values + NumericDivision, + /// Logarithm of a numeric value to some numeric base + NumericLogarithm, + /// Numeric value raised to another numeric value + NumericPower, + /// Remainder of a division between two numeric values + NumericRemainder, + /// Numeric greater than comparison + NumericGreaterthan, + /// Numeric greater than or equals comparison + NumericGreaterthaneq, + /// Numeric less than comparison + NumericLessthan, + /// Numeric less than or equals comparison + NumericLessthaneq, + /// Lexicographic comparison between strings + StringCompare, + /// Check whether string is contained in another, correspondng to SPARQL function CONTAINS. + StringContains, + /// String starting at some start position + StringSubstring, + /// First part of a string split by some other string + StringBefore, + /// Second part of a string split by some other string + StringAfter, + /// Whether string starts with a certain string + StringStarts, + /// Whether string ends with a certain string + StringEnds, + /// Boolean negation + BooleanNegation, + /// Cast to double + CastToDouble, + /// Cast to float + CastToFloat, + /// Cast to integer + CastToInteger, + /// Canonical string representation of a value + CanonicalString, + /// Check if value is an integer + CheckIsInteger, + /// Check if value is a float + CheckIsFloat, + /// Check if value is a double + CheckIsDouble, + /// Check if value is an iri + CheckIsIri, + /// Check if value is numeric + CheckIsNumeric, + /// Check if value is a null + CheckIsNull, + /// Check if value is a string + CheckIsString, + /// Get datatype of a value + Datatype, + /// Get language tag of a languaged tagged string + LanguageTag, + /// Lexical value + LexicalValue, + /// Absolute value of a numeric value + NumericAbsolute, + /// Cosine of a numeric value + NumericCosine, + /// Rounding up of a numeric value + NumericCeil, + /// Rounding down of a numeric value + NumericFloor, + /// Additive inverse of a numeric value + NumericNegation, + /// Rounding of a numeric value + NumericRound, + /// Sine of a numeric value + NumericSine, + /// Square root of a numeric value + NumericSquareroot, + /// Tangent of a numeric value + NumericTangent, + /// Length of a string value + StringLength, + /// Reverse of a string value + StringReverse, + /// String converted to lowercase letters + StringLowercase, + /// String converted to uppercase letters + StringUppercase, + /// Bitwise and operation + BitAnd, + /// Bitwise or operation + BitOr, + /// Bitwise xor operation + BitXor, + /// Conjunction of boolean values + BooleanConjunction, + /// Disjunction of boolean values + BooleanDisjunction, + /// Minimum of numeric values + NumericMinimum, + /// Maximum of numeric values + NumericMaximum, + /// Lukasiewicz norm of numeric values + NumericLukasiewicz, + /// Concatentation of two string values, correspondng to SPARQL function CONCAT. + StringConcatenation, +} + +impl OperationKind { + /// Return the [OperationKind] corresponding to the given operation name or `None` if there is no such operation. + pub fn from_name(name: &str) -> Option { + Some(match name.to_uppercase().as_str() { + "+" => Self::NumericSum, + "-" => Self::NumericSubtraction, + "/" => Self::NumericDivision, + "*" => Self::NumericProduct, + "<" => Self::NumericLessthan, + ">" => Self::NumericGreaterthan, + "<=" => Self::NumericLessthaneq, + ">=" => Self::NumericGreaterthaneq, + "isInteger" => Self::CheckIsInteger, + "isFloat" => Self::CheckIsFloat, + "isDouble" => Self::CheckIsDouble, + "isIri" => Self::CheckIsIri, + "isNumeric" => Self::CheckIsNumeric, + "isNull" => Self::CheckIsNull, + "isString" => Self::CheckIsString, + "ABS" => Self::NumericAbsolute, + "SQRT" => Self::NumericSquareroot, + "NOT" => Self::BooleanNegation, + "fullStr" => Self::CanonicalString, + "STR" => Self::LexicalValue, + "SIN" => Self::NumericSine, + "COS" => Self::NumericCosine, + "TAN" => Self::NumericTangent, + "STRLEN" => Self::StringLength, + "STRREV" => Self::StringReverse, + "UCASE" => Self::StringLowercase, + "LCASE" => Self::StringUppercase, + "ROUND" => Self::NumericRound, + "CEIL" => Self::NumericCeil, + "FLOOR" => Self::NumericFloor, + "DATATYPE" => Self::Datatype, + "LANG" => Self::LanguageTag, + "INT" => Self::CastToInteger, + "DOUBLE" => Self::CastToDouble, + "FLOAT" => Self::CastToFloat, + "LOG" => Self::NumericLogarithm, + "POW" => Self::NumericPower, + "COMPARE" => Self::StringCompare, + "CONTAINS" => Self::StringContains, + "SUBSTR" => Self::StringSubstring, + "STRSTARTS" => Self::StringStarts, + "STRENDS" => Self::StringEnds, + "STRBEFORE" => Self::StringBefore, + "STRAFTER" => Self::StringAfter, + "REM" => Self::NumericRemainder, + "BITAND" => Self::BitAnd, + "BITOR" => Self::BitOr, + "BITXOR" => Self::BitXor, + "MAX" => Self::NumericMaximum, + "MIN" => Self::NumericMinimum, + "LUKA" => Self::NumericLukasiewicz, + "SUM" => Self::NumericSum, + "PROD" => Self::NumericProduct, + "AND" => Self::BooleanConjunction, + "OR" => Self::BooleanDisjunction, + "CONCAT" => Self::StringConcatenation, + _ => return None, + }) + } + + /// Precendence of operations for display purposes. + pub(crate) fn precedence(&self) -> usize { + match &self { + Self::NumericSum => 1, + Self::NumericSubtraction => 1, + Self::NumericProduct => 2, + Self::NumericDivision => 2, + _ => 3, + } + } + + /// Return whether the operation returns a boolean value. + pub(crate) fn is_boolean(&self) -> bool { + match self { + OperationKind::Equal => true, + OperationKind::Unequals => true, + OperationKind::NumericSum => false, + OperationKind::NumericSubtraction => false, + OperationKind::NumericProduct => false, + OperationKind::NumericDivision => false, + OperationKind::NumericLogarithm => false, + OperationKind::NumericPower => false, + OperationKind::NumericRemainder => false, + OperationKind::NumericGreaterthan => true, + OperationKind::NumericGreaterthaneq => true, + OperationKind::NumericLessthan => true, + OperationKind::NumericLessthaneq => true, + OperationKind::StringCompare => false, + OperationKind::StringContains => true, + OperationKind::StringSubstring => false, + OperationKind::StringBefore => true, + OperationKind::StringAfter => true, + OperationKind::StringStarts => true, + OperationKind::StringEnds => true, + OperationKind::BooleanNegation => true, + OperationKind::CastToDouble => false, + OperationKind::CastToFloat => false, + OperationKind::CastToInteger => false, + OperationKind::CanonicalString => false, + OperationKind::CheckIsInteger => true, + OperationKind::CheckIsFloat => true, + OperationKind::CheckIsDouble => true, + OperationKind::CheckIsIri => true, + OperationKind::CheckIsNumeric => true, + OperationKind::CheckIsNull => true, + OperationKind::CheckIsString => true, + OperationKind::Datatype => false, + OperationKind::LanguageTag => false, + OperationKind::LexicalValue => false, + OperationKind::NumericAbsolute => false, + OperationKind::NumericCosine => false, + OperationKind::NumericCeil => false, + OperationKind::NumericFloor => false, + OperationKind::NumericNegation => false, + OperationKind::NumericRound => false, + OperationKind::NumericSine => false, + OperationKind::NumericSquareroot => false, + OperationKind::NumericTangent => false, + OperationKind::StringLength => false, + OperationKind::StringReverse => false, + OperationKind::StringLowercase => false, + OperationKind::StringUppercase => false, + OperationKind::BitAnd => false, + OperationKind::BitOr => false, + OperationKind::BitXor => false, + OperationKind::BooleanConjunction => true, + OperationKind::BooleanDisjunction => true, + OperationKind::NumericMinimum => false, + OperationKind::NumericMaximum => false, + OperationKind::NumericLukasiewicz => false, + OperationKind::StringConcatenation => false, + } + } + + /// Return the number of arguments accepted by this operation + pub(crate) fn number_arguments(&self) {} +} + +impl Display for OperationKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let string = match self { + OperationKind::Equal => "EQUAL", + OperationKind::Unequals => "UNEQUAL", + OperationKind::NumericSum => "SUM", + OperationKind::NumericSubtraction => "MINUS", + OperationKind::NumericProduct => "PROD", + OperationKind::NumericDivision => "DIV", + OperationKind::NumericLogarithm => "LOG", + OperationKind::NumericPower => "POW", + OperationKind::NumericRemainder => "REM", + OperationKind::NumericGreaterthan => "GT", + OperationKind::NumericGreaterthaneq => "GTE", + OperationKind::NumericLessthan => "LT", + OperationKind::NumericLessthaneq => "LTE", + OperationKind::StringCompare => "COMPARE", + OperationKind::StringContains => "CONTAINS", + OperationKind::StringSubstring => "SUBSTR", + OperationKind::StringBefore => "STRBEFORE", + OperationKind::StringAfter => "STRAFTER", + OperationKind::StringStarts => "STRSTARTS", + OperationKind::StringEnds => "STRENDS", + OperationKind::BooleanNegation => "NOT", + OperationKind::CastToDouble => "DOUBLE", + OperationKind::CastToFloat => "FLOAT", + OperationKind::CastToInteger => "INT", + OperationKind::CanonicalString => "fullStr", + OperationKind::CheckIsInteger => "isInteger", + OperationKind::CheckIsFloat => "isFloat", + OperationKind::CheckIsDouble => "isDouble", + OperationKind::CheckIsIri => "isIri", + OperationKind::CheckIsNumeric => "isNumeric", + OperationKind::CheckIsNull => "isNull", + OperationKind::CheckIsString => "isString", + OperationKind::Datatype => "DATATYPE", + OperationKind::LanguageTag => "LANG", + OperationKind::LexicalValue => "STR", + OperationKind::NumericAbsolute => "ABS", + OperationKind::NumericCosine => "COS", + OperationKind::NumericCeil => "CEIL", + OperationKind::NumericFloor => "FLOOR", + OperationKind::NumericNegation => "MINUS", + OperationKind::NumericRound => "ROUND", + OperationKind::NumericSine => "SIN", + OperationKind::NumericSquareroot => "SQRT", + OperationKind::NumericTangent => "TAN", + OperationKind::StringLength => "STRLEN", + OperationKind::StringReverse => "STRREV", + OperationKind::StringLowercase => "LCASE", + OperationKind::StringUppercase => "UCASE", + OperationKind::BitAnd => "BITAND", + OperationKind::BitOr => "BITOR", + OperationKind::BitXor => "BITXOR", + OperationKind::BooleanConjunction => "AND", + OperationKind::BooleanDisjunction => "OR", + OperationKind::NumericMinimum => "MIN", + OperationKind::NumericMaximum => "MAX", + OperationKind::NumericLukasiewicz => "LUKA", + OperationKind::StringConcatenation => "CONCAT", + }; + + write!(f, "{}", string) + } +} diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 933f925f0..95fdcbf80 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -15,7 +15,7 @@ use super::{ rule::{Rule, RuleBuilder}, term::{ function::FunctionTerm, - operation::{Operation, OperationKind}, + operation::{operation_kind::OperationKind, Operation}, Term, }, ProgramComponent, diff --git a/nemo/src/rule_model/syntax.rs b/nemo/src/rule_model/syntax.rs index e3b9cad78..1ad8c3b7e 100644 --- a/nemo/src/rule_model/syntax.rs +++ b/nemo/src/rule_model/syntax.rs @@ -2,62 +2,5 @@ //! These are kept in one location, since they are required in various //! places related to parsing and display. -/// The "predicate name" used for the CSV format in import/export directives. -pub(crate) const FILE_FORMAT_CSV: &str = "csv"; -/// The "predicate name" used for the DSV format in import/export directives. -pub(crate) const FILE_FORMAT_DSV: &str = "dsv"; -/// The "predicate name" used for the TSV format in import/export directives. -pub(crate) const FILE_FORMAT_TSV: &str = "tsv"; -/// The "predicate name" used for the generic RDF format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_UNSPECIFIED: &str = "rdf"; -/// The "predicate name" used for the Ntriples format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_NTRIPLES: &str = "ntriples"; -/// The "predicate name" used for the NQuads format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_NQUADS: &str = "nquads"; -/// The "predicate name" used for the Turtle format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_TURTLE: &str = "turtle"; -/// The "predicate name" used for the TriG format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_TRIG: &str = "trig"; -/// The "predicate name" used for the RDF/XML format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_XML: &str = "rdfxml"; -/// The "predicate name" used for the json format in import/export directives. -pub(crate) const FILE_FORMAT_JSON: &str = "json"; - -/// Name of the parameter for specifying the resource in import/export directives. -pub(crate) const PARAMETER_NAME_RESOURCE: &str = "resource"; -/// Name of the parameter for specifying the format in import/export directives. -pub(crate) const PARAMETER_NAME_FORMAT: &str = "format"; -/// Name of the parameter for specifying a base IRI in import/export directives. -pub(crate) const PARAMETER_NAME_BASE: &str = "base"; -/// Name of the parameter for specifying a delimiter in import/export directives for delimiter-separated values format. -pub(crate) const PARAMETER_NAME_DSV_DELIMITER: &str = "delimiter"; -/// Name of the parameter for specifying the compression in import/export directives. -pub(crate) const PARAMETER_NAME_COMPRESSION: &str = "compression"; -/// Name of the parameter for specifying the limit in import/export directives. -pub(crate) const PARAMETER_NAME_LIMIT: &str = "limit"; - -/// The name of the general, best-effort value format. Importers/exporters suporting this format will usually -/// accept "any" input value and interpret it in the most natural way. Likewise, any value should be writable -/// in this format. -pub(crate) const VALUE_FORMAT_ANY: &str = "any"; -/// The name of the value format that interprets all values as plain strings. Importers/exporters suporting this -/// format will usually accept any input value and interpret it as strings in the most literal way. Only strings -/// can be written in this format. -pub(crate) const VALUE_FORMAT_STRING: &str = "string"; -/// The name of the value format that interprets values as integers whenever possible. Importers/exporters suporting -/// this format will usually only accept input values that are formatted like integers. Conversely, only integer values -/// can be written in this format. -pub(crate) const VALUE_FORMAT_INT: &str = "int"; -/// The name of the value format that interprets values as double-precision floating point numbers whenever possible. -/// Importers/exporters suporting this format will usually only accept input values that are formatted like decimal numbers, -/// integers, or floating-point numbers in scientific notation. Conversely, only double values -/// can be written in this format. -pub(crate) const VALUE_FORMAT_DOUBLE: &str = "double"; -/// The name of the special value format that indicates that a vlaue should be ignored altogether. -/// The respective column/parameter will be skiped in reading/writing. -pub(crate) const VALUE_FORMAT_SKIP: &str = "skip"; - -/// The name of the compression format that means "no compression". -pub(crate) const VALUE_COMPRESSION_NONE: &str = "none"; -/// The name of the compression format that means "no compression". -pub(crate) const VALUE_COMPRESSION_GZIP: &str = "gzip"; +pub(crate) mod builtins; +pub(crate) mod import_export; diff --git a/nemo/src/rule_model/syntax/builtins.rs b/nemo/src/rule_model/syntax/builtins.rs new file mode 100644 index 000000000..fe91f8a95 --- /dev/null +++ b/nemo/src/rule_model/syntax/builtins.rs @@ -0,0 +1,110 @@ +//! This module contains constants relating to the builtin functions that are supported. + +/// Check if two values are equal to each other +pub(crate) const BUILTIN_EQUAL: &str = "EQUAL"; +/// Check if two values are not equal to each other +pub(crate) const BUILTIN_UNEQUAL: &str = "UNEQUAL"; +/// Check if a numeric value is greater than another +pub(crate) const BUILTIN_GREATER: &str = "GREATER"; +/// Check if a numeric value is greater or equal to another +pub(crate) const BUILTIN_GREATEREQ: &str = "GREATEREQ"; +/// Check if a numeric value is smaller than another +pub(crate) const BUILTIN_LESS: &str = "LESS"; +/// Check if a numeric value is smaller or equal to another +pub(crate) const BUILTIN_LESSQ: &str = "LESSEQ"; +/// Check if value is an integer +pub(crate) const BUILTIN_isInteger: &str = "isInteger"; +/// Check if value is a 32bit floating point number +pub(crate) const BUILTIN_isFloat: &str = "isFloat"; +/// Check if value is a 64bit floating point number +pub(crate) const BUILTIN_isDouble: &str = "isDouble"; +/// Check if value is an iri +pub(crate) const BUILTIN_isIri: &str = "isIri"; +/// Check if value is numeric +pub(crate) const BUILTIN_isNumeric: &str = "isNumeric"; +/// Check if value is null +pub(crate) const BUILTIN_isNull: &str = "isNull"; +/// Check if value is string +pub(crate) const BUILTIN_isString: &str = "isString"; +/// Compute the absoule value of a number +pub(crate) const BUILTIN_ABS: &str = "ABS"; +/// Compute the square root of a number +pub(crate) const BUILTIN_SQRT: &str = "SQRT"; +/// Logical negation of a boolean value +pub(crate) const BUILTIN_NOT: &str = "NOT"; +/// String representation of a value +pub(crate) const BUILTIN_fullStr: &str = "fullStr"; +/// Lexical value +pub(crate) const BUILTIN_STR: &str = "STR"; +/// Compute the sine of a value +pub(crate) const BUILTIN_SIN: &str = "SIN"; +/// Compute the cosine of a value +pub(crate) const BUILTIN_COS: &str = "COS"; +/// Compute the tangent of a value +pub(crate) const BUILTIN_TAN: &str = "TAN"; +/// Compute the length of a string +pub(crate) const BUILTIN_STRLEN: &str = "STRLEN"; +/// Compute the reverse of a string value +pub(crate) const BUILTIN_STRREV: &str = "STRREV"; +/// Replace characters in strings with their upper case version +pub(crate) const BUILTIN_UCASE: &str = "UCASE"; +/// Replace characters in strings with their lower case version +pub(crate) const BUILTIN_LCASE: &str = "LCASE"; +/// Round a value to the nearest integer +pub(crate) const BUILTIN_ROUND: &str = "ROUND"; +/// Round up to the nearest integer +pub(crate) const BUILTIN_CEIL: &str = "CEIL"; +/// Round down to the neatest integer +pub(crate) const BUILTIN_FLOOR: &str = "FLOOR"; +/// Return the datatype of the value +pub(crate) const BUILTIN_DATATYPE: &str = "DATATYPE"; +/// Return the language tag of the value +pub(crate) const BUILTIN_LANG: &str = "LANG"; +/// Convert the value to an integer +pub(crate) const BUILTIN_INT: &str = "INT"; +/// Convert the value to a 64bit floating point number +pub(crate) const BUILTIN_DOUBLE: &str = "DOUBLE"; +/// Convert the value to a 32bit floating point number +pub(crate) const BUILTIN_FLOAT: &str = "FLOAT"; +/// Compute the logarithm of the numerical value +pub(crate) const BUILTIN_LOG: &str = "LOG"; +/// Raise the numerical value to a power +pub(crate) const BUILTIN_POW: &str = "POW"; +/// Compare two string values +pub(crate) const BUILTIN_COMPARE: &str = "COMPARE"; +/// Check if one string value is contained in another +pub(crate) const BUILTIN_CONTAINS: &str = "CONTAINS"; +/// Return a substring of a given string value +pub(crate) const BUILTIN_SUBSTR: &str = "SUBSTR"; +/// Check if a string starts with a certain string +pub(crate) const BUILTIN_STRSTARTS: &str = "STRSTARTS"; +/// Check if a string ends with a certain string +pub(crate) const BUILTIN_STRENDS: &str = "STRENDS"; +/// Return the first part of a string split by some other string +pub(crate) const BUILTIN_STRBEFORE: &str = "STRBEFORE"; +/// Return the second part of a string split by some other string +pub(crate) const BUILTIN_STRAFTER: &str = "STRAFTER"; +/// Compute the remainder of two numerical values +pub(crate) const BUILTIN_REM: &str = "REM"; +/// Compute the and on the bit representation of integer values +pub(crate) const BUILTIN_BITAND: &str = "BITAND"; +/// Compute the or on the bit representation of integer values +pub(crate) const BUILTIN_BITOR: &str = "BITOR"; +/// Compute the exclusive or on the bit representation of integer values +pub(crate) const BUILTIN_BITXOR: &str = "BITXOR"; +/// Compute the maximum of numeric values +pub(crate) const BUILTIN_MAX: &str = "MAX"; +/// Compute the minimum of numeric values +pub(crate) const BUILTIN_MIN: &str = "MIN"; +/// Compute the lukasiewicz norm of numeric values +pub(crate) const BUILTIN_LUKA: &str = "LUKA"; +/// Compute the sum of numerical values +pub(crate) const BUILTIN_SUM: &str = "SUM"; +/// Comput the product of numerical values +pub(crate) const BUILTIN_PROD: &str = "PROD"; +/// Compute the logical and between boolean values +pub(crate) const BUILTIN_AND: &str = "AND"; +/// Compute the logical or between boolean values +pub(crate) const BUILTIN_OR: &str = "OR"; +/// Compute the concatenation of string values +pub(crate) const BUILTIN_CONCAT: &str = "CONCAT"; diff --git a/nemo/src/rule_model/syntax/import_export.rs b/nemo/src/rule_model/syntax/import_export.rs new file mode 100644 index 000000000..0c062e31d --- /dev/null +++ b/nemo/src/rule_model/syntax/import_export.rs @@ -0,0 +1,6 @@ +//! This module defines constants relating to import and export directives. + +pub(crate) mod attributes; +pub(crate) mod compression; +pub(crate) mod file_formats; +pub(crate) mod value_formats; diff --git a/nemo/src/rule_model/syntax/import_export/attributes.rs b/nemo/src/rule_model/syntax/import_export/attributes.rs new file mode 100644 index 000000000..c36ad9a38 --- /dev/null +++ b/nemo/src/rule_model/syntax/import_export/attributes.rs @@ -0,0 +1,14 @@ +//! This module contains constants relating to accepted attributes + +/// Name of the attribute for specifying the resource in import/export directives. +pub(crate) const ATTRIBUTE_NAME_RESOURCE: &str = "resource"; +/// Name of the attribute for specifying the format in import/export directives. +pub(crate) const ATTRIBUTE_NAME_FORMAT: &str = "format"; +/// Name of the attribute for specifying a base IRI in import/export directives. +pub(crate) const ATTRIBUTE_NAME_BASE: &str = "base"; +/// Name of the attribute for specifying a delimiter in import/export directives for delimiter-separated values format. +pub(crate) const ATTRIBUTE_NAME_DSV_DELIMITER: &str = "delimiter"; +/// Name of the attribute for specifying the compression in import/export directives. +pub(crate) const ATTRIBUTE_NAME_COMPRESSION: &str = "compression"; +/// Name of the attribute for specifying the limit in import/export directives. +pub(crate) const ATTRIBUTE_NAME_LIMIT: &str = "limit"; diff --git a/nemo/src/rule_model/syntax/import_export/compression.rs b/nemo/src/rule_model/syntax/import_export/compression.rs new file mode 100644 index 000000000..4ef00ebbc --- /dev/null +++ b/nemo/src/rule_model/syntax/import_export/compression.rs @@ -0,0 +1,6 @@ +//! This module contains constants relating to compression formats accepted in import/export statements. + +/// The name of the compression format that means "no compression". +pub(crate) const VALUE_COMPRESSION_NONE: &str = "none"; +/// The name of the compression format that means "no compression". +pub(crate) const VALUE_COMPRESSION_GZIP: &str = "gzip"; diff --git a/nemo/src/rule_model/syntax/import_export/file_formats.rs b/nemo/src/rule_model/syntax/import_export/file_formats.rs new file mode 100644 index 000000000..78d257ded --- /dev/null +++ b/nemo/src/rule_model/syntax/import_export/file_formats.rs @@ -0,0 +1,22 @@ +//! This module defines constants relating to the supported file formats. + +/// The "predicate name" used for the CSV format in import/export directives. +pub(crate) const FILE_FORMAT_CSV: &str = "csv"; +/// The "predicate name" used for the DSV format in import/export directives. +pub(crate) const FILE_FORMAT_DSV: &str = "dsv"; +/// The "predicate name" used for the TSV format in import/export directives. +pub(crate) const FILE_FORMAT_TSV: &str = "tsv"; +/// The "predicate name" used for the generic RDF format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_UNSPECIFIED: &str = "rdf"; +/// The "predicate name" used for the Ntriples format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_NTRIPLES: &str = "ntriples"; +/// The "predicate name" used for the NQuads format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_NQUADS: &str = "nquads"; +/// The "predicate name" used for the Turtle format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_TURTLE: &str = "turtle"; +/// The "predicate name" used for the TriG format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_TRIG: &str = "trig"; +/// The "predicate name" used for the RDF/XML format in import/export directives. +pub(crate) const FILE_FORMAT_RDF_XML: &str = "rdfxml"; +/// The "predicate name" used for the json format in import/export directives. +pub(crate) const FILE_FORMAT_JSON: &str = "json"; diff --git a/nemo/src/rule_model/syntax/import_export/value_formats.rs b/nemo/src/rule_model/syntax/import_export/value_formats.rs new file mode 100644 index 000000000..2542366e8 --- /dev/null +++ b/nemo/src/rule_model/syntax/import_export/value_formats.rs @@ -0,0 +1,22 @@ +//! This module defines constants relating to the value formats accepted by import and export directives. + +/// The name of the general, best-effort value format. Importers/exporters suporting this format will usually +/// accept "any" input value and interpret it in the most natural way. Likewise, any value should be writable +/// in this format. +pub(crate) const VALUE_FORMAT_ANY: &str = "any"; +/// The name of the value format that interprets all values as plain strings. Importers/exporters suporting this +/// format will usually accept any input value and interpret it as strings in the most literal way. Only strings +/// can be written in this format. +pub(crate) const VALUE_FORMAT_STRING: &str = "string"; +/// The name of the value format that interprets values as integers whenever possible. Importers/exporters suporting +/// this format will usually only accept input values that are formatted like integers. Conversely, only integer values +/// can be written in this format. +pub(crate) const VALUE_FORMAT_INT: &str = "int"; +/// The name of the value format that interprets values as double-precision floating point numbers whenever possible. +/// Importers/exporters suporting this format will usually only accept input values that are formatted like decimal numbers, +/// integers, or floating-point numbers in scientific notation. Conversely, only double values +/// can be written in this format. +pub(crate) const VALUE_FORMAT_DOUBLE: &str = "double"; +/// The name of the special value format that indicates that a vlaue should be ignored altogether. +/// The respective column/parameter will be skiped in reading/writing. +pub(crate) const VALUE_FORMAT_SKIP: &str = "skip"; From 2bfb382c5fb02eea1b7cb9a4de1c553544f94821 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 11 Jul 2024 20:16:06 +0200 Subject: [PATCH 123/214] Use enum annotations to connect syntax and program components --- Cargo.lock | 12 + nemo/Cargo.toml | 1 + .../src/rule_model/component/import_export.rs | 76 +--- .../component/import_export/attributes.rs | 41 ++ .../component/import_export/compression.rs | 27 ++ .../component/import_export/file_formats.rs | 98 +++++ .../term/operation/operation_kind.rs | 378 +++++++++--------- nemo/src/rule_model/syntax/builtins.rs | 28 +- .../syntax/import_export/file_formats.rs | 19 + 9 files changed, 409 insertions(+), 271 deletions(-) create mode 100644 nemo/src/rule_model/component/import_export/attributes.rs create mode 100644 nemo/src/rule_model/component/import_export/compression.rs create mode 100644 nemo/src/rule_model/component/import_export/file_formats.rs diff --git a/Cargo.lock b/Cargo.lock index 36187cf68..bbbaeec7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -594,6 +594,17 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-assoc" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24247b89d37b9502dc5a4b80d369aab1a12106067776e440094c786dae5b9d07" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "enum_dispatch" version = "0.3.13" @@ -1457,6 +1468,7 @@ dependencies = [ "bytesize", "csv", "dyn-clone", + "enum-assoc", "env_logger 0.11.3", "flate2", "getrandom", diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index ab6f43231..6483cb8da 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -49,6 +49,7 @@ dyn-clone = "1.0.16" unicode-ident = "1.0.12" nom-greedyerror = "0.5.0" nom-supreme = "0.8.0" +enum-assoc = "1.1.0" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/rule_model/component/import_export.rs b/nemo/src/rule_model/component/import_export.rs index 432e43d52..3567aed3f 100644 --- a/nemo/src/rule_model/component/import_export.rs +++ b/nemo/src/rule_model/component/import_export.rs @@ -1,71 +1,17 @@ //! Import and export directives are a direct representation of the syntactic information //! given in rule files. +pub mod attributes; +pub mod compression; +pub mod file_formats; + use std::{fmt::Display, hash::Hash}; -use nemo_physical::datavalues::MapDataValue; +use file_formats::FileFormat; use crate::rule_model::origin::Origin; -use super::{ProgramComponent, Tag}; - -/// The different supported variants of the RDF format. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] -pub enum RdfVariant { - /// An unspecified format, using the resource name as a heuristic. - #[default] - Unspecified, - /// RDF 1.1 N-Triples - NTriples, - /// RDF 1.1 N-Quads - NQuads, - /// RDF 1.1 Turtle - Turtle, - /// RDF 1.1 RDF/XML - RDFXML, - /// RDF 1.1 TriG - TriG, -} - -impl Display for RdfVariant { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::NTriples => write!(f, "RDF N-Triples"), - Self::NQuads => write!(f, "RDF N-Quads"), - Self::Turtle => write!(f, "RDF Turtle"), - Self::RDFXML => write!(f, "RDF/XML"), - Self::TriG => write!(f, "RDF TriG"), - Self::Unspecified => write!(f, "RDF"), - } - } -} - -/// Supported file formats. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum FileFormat { - /// Comma-separated values - CSV, - /// Delimiter-separated values - DSV, - /// Tab-separated values - TSV, - /// RDF Triples or Quads, with the given format variant. - RDF(RdfVariant), - /// JSON objects - JSON, -} - -impl Display for FileFormat { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::DSV => write!(f, "DSV"), - Self::CSV => write!(f, "CSV"), - Self::TSV => write!(f, "TSV"), - Self::JSON => write!(f, "JSON"), - Self::RDF(variant) => write!(f, "{variant}"), - } - } -} +use super::{term::map::Map, ProgramComponent, Tag}; /// An import/export specification. This object captures all information that is typically /// present in an import or export directive in a Nemo program, including the main format, @@ -81,7 +27,7 @@ pub(crate) struct ImportExportDirective { /// The file format and resource we're using. format: FileFormat, /// The attributes we've been given. - attributes: MapDataValue, + attributes: Map, } impl ImportExportDirective { @@ -119,7 +65,7 @@ pub struct ImportDirective(pub(crate) ImportExportDirective); impl ImportDirective { /// Create a new [ImportDirective]. - pub fn new(predicate: Tag, format: FileFormat, attributes: MapDataValue) -> Self { + pub fn new(predicate: Tag, format: FileFormat, attributes: Map) -> Self { Self(ImportExportDirective { origin: Origin::default(), predicate, @@ -139,7 +85,7 @@ impl ImportDirective { } /// Return the attributes. - pub fn attributes(&self) -> &MapDataValue { + pub fn attributes(&self) -> &Map { &self.0.attributes } } @@ -191,7 +137,7 @@ pub struct ExportDirective(pub(crate) ImportExportDirective); impl ExportDirective { /// Create a new [ExportDirective]. - pub fn new(predicate: Tag, format: FileFormat, attributes: MapDataValue) -> Self { + pub fn new(predicate: Tag, format: FileFormat, attributes: Map) -> Self { Self(ImportExportDirective { origin: Origin::default(), predicate, @@ -211,7 +157,7 @@ impl ExportDirective { } /// Return the attributes. - pub fn attributes(&self) -> &MapDataValue { + pub fn attributes(&self) -> &Map { &self.0.attributes } } diff --git a/nemo/src/rule_model/component/import_export/attributes.rs b/nemo/src/rule_model/component/import_export/attributes.rs new file mode 100644 index 000000000..d83f8e00f --- /dev/null +++ b/nemo/src/rule_model/component/import_export/attributes.rs @@ -0,0 +1,41 @@ +//! This module defines [ImportExportAttribute]s used to specify details about +#![allow(missing_docs)] + +use std::{fmt::Display, hash::Hash}; + +use enum_assoc::Assoc; + +use crate::rule_model::syntax::import_export::attributes; + +/// Supported attributes in import/export directives +#[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] +pub enum ImportExportAttribute { + /// Location of the file + #[assoc(name = attributes::ATTRIBUTE_NAME_RESOURCE)] + Resource, + /// Datatypes of the input relations + #[assoc(name = attributes::ATTRIBUTE_NAME_FORMAT)] + Format, + /// Base IRI + #[assoc(name = attributes::ATTRIBUTE_NAME_BASE)] + Base, + /// Delimiter used to seperate values + #[assoc(name = attributes::ATTRIBUTE_NAME_DSV_DELIMITER)] + Delimiter, + /// Compression format + #[assoc(name = attributes::ATTRIBUTE_NAME_COMPRESSION)] + Compression, + /// Limit import/export to first n number of facts + #[assoc(name = attributes::ATTRIBUTE_NAME_LIMIT)] + Limit, +} + +impl ImportExportAttribute {} + +impl Display for ImportExportAttribute { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name()) + } +} diff --git a/nemo/src/rule_model/component/import_export/compression.rs b/nemo/src/rule_model/component/import_export/compression.rs new file mode 100644 index 000000000..9ccc45d8f --- /dev/null +++ b/nemo/src/rule_model/component/import_export/compression.rs @@ -0,0 +1,27 @@ +//! This module defines the [CompressionFormat]s that are supported. +#![allow(missing_docs)] + +use std::fmt::Display; + +use enum_assoc::Assoc; + +use crate::rule_model::syntax::import_export::compression; + +/// Compression formats +#[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] +#[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] +pub enum CompressionFormat { + /// No compression + #[assoc(name = compression::VALUE_COMPRESSION_NONE)] + None, + /// GZip compression + #[assoc(name = compression::VALUE_COMPRESSION_GZIP)] + GZip, +} + +impl Display for CompressionFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name()) + } +} diff --git a/nemo/src/rule_model/component/import_export/file_formats.rs b/nemo/src/rule_model/component/import_export/file_formats.rs new file mode 100644 index 000000000..ba322188e --- /dev/null +++ b/nemo/src/rule_model/component/import_export/file_formats.rs @@ -0,0 +1,98 @@ +//! This module defines [FileFormat]s that are supported. +#![allow(missing_docs)] + +use std::{collections::HashMap, fmt::Display}; + +use enum_assoc::Assoc; + +use crate::rule_model::{ + component::import_export::attributes::ImportExportAttribute, + syntax::import_export::file_formats, +}; + +/// Marks whether a an attribute is required or optional +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub(crate) enum AttributeRequirement { + /// Attribute is required + Required, + /// Attribute is optional + Optional, +} + +/// Supported file formats +#[derive(Assoc, Debug, Copy, Clone, Eq, PartialEq, Hash)] +#[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] +#[func(pub fn extension(&self) -> &'static str)] +#[func(pub fn attributes(&self) -> HashMap)] +pub enum FileFormat { + /// Comma-separated values + #[assoc(name = file_formats::FILE_FORMAT_CSV)] + #[assoc(extension = file_formats::EXTENSION_CSV)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + CSV, + /// Delimiter-separated values + #[assoc(name = file_formats::FILE_FORMAT_DSV)] + #[assoc(extension = file_formats::EXTENSION_DSV)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + DSV, + /// Tab-separated values + #[assoc(name = file_formats::FILE_FORMAT_TSV)] + #[assoc(extension = file_formats::EXTENSION_TSV)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + TSV, + /// JSON objects + #[assoc(name = file_formats::FILE_FORMAT_JSON)] + #[assoc(extension = file_formats::EXTENSION_JSON)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + JSON, + /// RDF 1.1 N-Triples + #[assoc(name = file_formats::FILE_FORMAT_RDF_NTRIPLES)] + #[assoc(extension = file_formats::EXTENSION_RDF_NTRIPLES)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + NTriples, + /// RDF 1.1 N-Quads + #[assoc(name = file_formats::FILE_FORMAT_RDF_NQUADS)] + #[assoc(extension = file_formats::EXTENSION_RDF_NQUADS)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + NQuads, + /// RDF 1.1 Turtle + #[assoc(name = file_formats::FILE_FORMAT_RDF_TURTLE)] + #[assoc(extension = file_formats::EXTENSION_RDF_TURTLE)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + Turtle, + /// RDF 1.1 RDF/XML + #[assoc(name = file_formats::FILE_FORMAT_RDF_XML)] + #[assoc(extension = file_formats::EXTENSION_RDF_XML)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + RDFXML, + /// RDF 1.1 TriG + #[assoc(name = file_formats::FILE_FORMAT_RDF_TRIG)] + #[assoc(extension = file_formats::EXTENSION_RDF_TRIG)] + #[assoc(attributes = HashMap::from([ + (ImportExportAttribute::Resource, AttributeRequirement::Required) + ]))] + TriG, +} + +impl Display for FileFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name()) + } +} diff --git a/nemo/src/rule_model/component/term/operation/operation_kind.rs b/nemo/src/rule_model/component/term/operation/operation_kind.rs index e99b89d1e..2e9032bac 100644 --- a/nemo/src/rule_model/component/term/operation/operation_kind.rs +++ b/nemo/src/rule_model/component/term/operation/operation_kind.rs @@ -1,7 +1,12 @@ //! This module defines [OperationKind]. +#![allow(missing_docs)] use std::fmt::Display; +use enum_assoc::Assoc; + +use crate::rule_model::syntax::builtins; + /// Number of arguments supported by an operation #[derive(Debug)] pub(crate) enum OperationNumArguments { @@ -13,8 +18,6 @@ pub(crate) enum OperationNumArguments { Ternary, /// Operation supports arbitrary many arguments (including zero) Arbitrary, - /// Operation supports the given number of arguments - Exact(usize), /// Operation supports arguments that satisfy one of the given requirements Choice(Vec), } @@ -27,7 +30,6 @@ impl OperationNumArguments { OperationNumArguments::Binary => num_arguments == 2, OperationNumArguments::Ternary => num_arguments == 3, OperationNumArguments::Arbitrary => true, - OperationNumArguments::Exact(exact) => num_arguments == *exact, OperationNumArguments::Choice(choice) => { choice.iter().any(|num| num.validate(num_arguments)) } @@ -36,188 +38,300 @@ impl OperationNumArguments { } /// Supported operations -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] +#[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] +#[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] +#[func(pub fn num_arguments(&self) -> OperationNumArguments)] +#[func(pub fn is_boolean(&self) -> bool)] pub enum OperationKind { /// Equality + #[assoc(name = builtins::BUILTIN_EQUAL)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = true)] Equal, /// Inequality + #[assoc(name = builtins::BUILTIN_UNEQUAL)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = true)] Unequals, /// Sum of numeric values + #[assoc(name = builtins::BUILTIN_SUM)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] + #[assoc(is_boolean = false)] NumericSum, /// Subtraction between two numeric values + #[assoc(name = builtins::BUILTIN_SUBTRACTION)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] NumericSubtraction, /// Product of numeric values + #[assoc(name = builtins::BUILTIN_PRODUCT)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] + #[assoc(is_boolean = false)] NumericProduct, /// Division between two numeric values + #[assoc(name = builtins::BUILTIN_DIVISION)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] NumericDivision, /// Logarithm of a numeric value to some numeric base + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(name = builtins::BUILTIN_LOGARITHM)] + #[assoc(is_boolean = false)] NumericLogarithm, /// Numeric value raised to another numeric value + #[assoc(name = builtins::BUILTIN_POW)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] NumericPower, /// Remainder of a division between two numeric values + #[assoc(name = builtins::BUILTIN_REM)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] NumericRemainder, /// Numeric greater than comparison + #[assoc(name = builtins::BUILTIN_GREATER)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] NumericGreaterthan, /// Numeric greater than or equals comparison + #[assoc(name = builtins::BUILTIN_GREATEREQ)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] NumericGreaterthaneq, /// Numeric less than comparison + #[assoc(name = builtins::BUILTIN_LESS)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] NumericLessthan, /// Numeric less than or equals comparison + #[assoc(name = builtins::BUILTIN_LESSQ)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] NumericLessthaneq, /// Lexicographic comparison between strings + #[assoc(name = builtins::BUILTIN_COMPARE)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] StringCompare, /// Check whether string is contained in another, correspondng to SPARQL function CONTAINS. + #[assoc(name = builtins::BUILTIN_CONTAINS)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = true)] StringContains, /// String starting at some start position + #[assoc(name = builtins::BUILTIN_SUBSTR)] + #[assoc(num_arguments = OperationNumArguments::Choice(vec![OperationNumArguments::Binary, OperationNumArguments::Ternary]))] + #[assoc(is_boolean = false)] StringSubstring, /// First part of a string split by some other string + #[assoc(name = builtins::BUILTIN_STRBEFORE)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] StringBefore, /// Second part of a string split by some other string + #[assoc(name = builtins::BUILTIN_STRAFTER)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] StringAfter, /// Whether string starts with a certain string + #[assoc(name = builtins::BUILTIN_STRSTARTS)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = true)] StringStarts, /// Whether string ends with a certain string + #[assoc(name = builtins::BUILTIN_STRENDS)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = true)] StringEnds, /// Boolean negation + #[assoc(name = builtins::BUILTIN_NOT)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = true)] BooleanNegation, /// Cast to double + #[assoc(name = builtins::BUILTIN_DOUBLE)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] CastToDouble, /// Cast to float + #[assoc(name = builtins::BUILTIN_FLOAT)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] CastToFloat, /// Cast to integer + #[assoc(name = builtins::BUILTIN_INT)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] CastToInteger, /// Canonical string representation of a value + #[assoc(name = builtins::BUILTIN_FULLSTR)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] CanonicalString, /// Check if value is an integer + #[assoc(name = builtins::BUILTIN_IS_INTEGER)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = true)] CheckIsInteger, /// Check if value is a float + #[assoc(name = builtins::BUILTIN_IS_FLOAT)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = true)] CheckIsFloat, /// Check if value is a double + #[assoc(name = builtins::BUILTIN_IS_DOUBLE)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = true)] CheckIsDouble, /// Check if value is an iri + #[assoc(name = builtins::BUILTIN_IS_IRI)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = true)] CheckIsIri, /// Check if value is numeric + #[assoc(name = builtins::BUILTIN_IS_NUMERIC)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = true)] CheckIsNumeric, /// Check if value is a null + #[assoc(name = builtins::BUILTIN_IS_NULL)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = true)] CheckIsNull, /// Check if value is a string + #[assoc(name = builtins::BUILTIN_IS_STRING)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = true)] CheckIsString, /// Get datatype of a value + #[assoc(name = builtins::BUILTIN_DATATYPE)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] Datatype, /// Get language tag of a languaged tagged string + #[assoc(name = builtins::BUILTIN_LANG)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] LanguageTag, /// Lexical value + #[assoc(name = builtins::BUILTIN_STR)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] LexicalValue, /// Absolute value of a numeric value + #[assoc(name = builtins::BUILTIN_ABS)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericAbsolute, - /// Cosine of a numeric value + /// Cosine of a numeric valueloga + #[assoc(name = builtins::BUILTIN_COS)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericCosine, /// Rounding up of a numeric value + #[assoc(name = builtins::BUILTIN_CEIL)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericCeil, /// Rounding down of a numeric value + #[assoc(name = builtins::BUILTIN_FLOOR)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericFloor, /// Additive inverse of a numeric value + #[assoc(name = builtins::BUILTIN_INVERSE)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericNegation, /// Rounding of a numeric value + #[assoc(name = builtins::BUILTIN_ROUND)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericRound, /// Sine of a numeric value + #[assoc(name = builtins::BUILTIN_SIN)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericSine, /// Square root of a numeric value + #[assoc(name = builtins::BUILTIN_SQRT)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericSquareroot, /// Tangent of a numeric value + #[assoc(name = builtins::BUILTIN_TAN)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] NumericTangent, /// Length of a string value + #[assoc(name = builtins::BUILTIN_STRLEN)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] StringLength, /// Reverse of a string value + #[assoc(name = builtins::BUILTIN_STRREV)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] StringReverse, /// String converted to lowercase letters + #[assoc(name = builtins::BUILTIN_LCASE)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] StringLowercase, /// String converted to uppercase letters + #[assoc(name = builtins::BUILTIN_UCASE)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] StringUppercase, /// Bitwise and operation + #[assoc(name = builtins::BUILTIN_BITAND)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] BitAnd, /// Bitwise or operation + #[assoc(name = builtins::BUILTIN_BITOR)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] BitOr, /// Bitwise xor operation + #[assoc(name = builtins::BUILTIN_BITXOR)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] BitXor, /// Conjunction of boolean values + #[assoc(name = builtins::BUILTIN_AND)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] + #[assoc(is_boolean = true)] BooleanConjunction, /// Disjunction of boolean values + #[assoc(name = builtins::BUILTIN_OR)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] + #[assoc(is_boolean = true)] BooleanDisjunction, /// Minimum of numeric values + #[assoc(name = builtins::BUILTIN_MIN)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] + #[assoc(is_boolean = false)] NumericMinimum, /// Maximum of numeric values + #[assoc(name = builtins::BUILTIN_MAX)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] + #[assoc(is_boolean = false)] NumericMaximum, /// Lukasiewicz norm of numeric values + #[assoc(name = builtins::BUILTIN_LUKA)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] + #[assoc(is_boolean = false)] NumericLukasiewicz, /// Concatentation of two string values, correspondng to SPARQL function CONCAT. + #[assoc(name = builtins::BUILTIN_CONCAT)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] StringConcatenation, } impl OperationKind { - /// Return the [OperationKind] corresponding to the given operation name or `None` if there is no such operation. - pub fn from_name(name: &str) -> Option { - Some(match name.to_uppercase().as_str() { - "+" => Self::NumericSum, - "-" => Self::NumericSubtraction, - "/" => Self::NumericDivision, - "*" => Self::NumericProduct, - "<" => Self::NumericLessthan, - ">" => Self::NumericGreaterthan, - "<=" => Self::NumericLessthaneq, - ">=" => Self::NumericGreaterthaneq, - "isInteger" => Self::CheckIsInteger, - "isFloat" => Self::CheckIsFloat, - "isDouble" => Self::CheckIsDouble, - "isIri" => Self::CheckIsIri, - "isNumeric" => Self::CheckIsNumeric, - "isNull" => Self::CheckIsNull, - "isString" => Self::CheckIsString, - "ABS" => Self::NumericAbsolute, - "SQRT" => Self::NumericSquareroot, - "NOT" => Self::BooleanNegation, - "fullStr" => Self::CanonicalString, - "STR" => Self::LexicalValue, - "SIN" => Self::NumericSine, - "COS" => Self::NumericCosine, - "TAN" => Self::NumericTangent, - "STRLEN" => Self::StringLength, - "STRREV" => Self::StringReverse, - "UCASE" => Self::StringLowercase, - "LCASE" => Self::StringUppercase, - "ROUND" => Self::NumericRound, - "CEIL" => Self::NumericCeil, - "FLOOR" => Self::NumericFloor, - "DATATYPE" => Self::Datatype, - "LANG" => Self::LanguageTag, - "INT" => Self::CastToInteger, - "DOUBLE" => Self::CastToDouble, - "FLOAT" => Self::CastToFloat, - "LOG" => Self::NumericLogarithm, - "POW" => Self::NumericPower, - "COMPARE" => Self::StringCompare, - "CONTAINS" => Self::StringContains, - "SUBSTR" => Self::StringSubstring, - "STRSTARTS" => Self::StringStarts, - "STRENDS" => Self::StringEnds, - "STRBEFORE" => Self::StringBefore, - "STRAFTER" => Self::StringAfter, - "REM" => Self::NumericRemainder, - "BITAND" => Self::BitAnd, - "BITOR" => Self::BitOr, - "BITXOR" => Self::BitXor, - "MAX" => Self::NumericMaximum, - "MIN" => Self::NumericMinimum, - "LUKA" => Self::NumericLukasiewicz, - "SUM" => Self::NumericSum, - "PROD" => Self::NumericProduct, - "AND" => Self::BooleanConjunction, - "OR" => Self::BooleanDisjunction, - "CONCAT" => Self::StringConcatenation, - _ => return None, - }) - } - /// Precendence of operations for display purposes. pub(crate) fn precedence(&self) -> usize { match &self { @@ -228,136 +342,10 @@ impl OperationKind { _ => 3, } } - - /// Return whether the operation returns a boolean value. - pub(crate) fn is_boolean(&self) -> bool { - match self { - OperationKind::Equal => true, - OperationKind::Unequals => true, - OperationKind::NumericSum => false, - OperationKind::NumericSubtraction => false, - OperationKind::NumericProduct => false, - OperationKind::NumericDivision => false, - OperationKind::NumericLogarithm => false, - OperationKind::NumericPower => false, - OperationKind::NumericRemainder => false, - OperationKind::NumericGreaterthan => true, - OperationKind::NumericGreaterthaneq => true, - OperationKind::NumericLessthan => true, - OperationKind::NumericLessthaneq => true, - OperationKind::StringCompare => false, - OperationKind::StringContains => true, - OperationKind::StringSubstring => false, - OperationKind::StringBefore => true, - OperationKind::StringAfter => true, - OperationKind::StringStarts => true, - OperationKind::StringEnds => true, - OperationKind::BooleanNegation => true, - OperationKind::CastToDouble => false, - OperationKind::CastToFloat => false, - OperationKind::CastToInteger => false, - OperationKind::CanonicalString => false, - OperationKind::CheckIsInteger => true, - OperationKind::CheckIsFloat => true, - OperationKind::CheckIsDouble => true, - OperationKind::CheckIsIri => true, - OperationKind::CheckIsNumeric => true, - OperationKind::CheckIsNull => true, - OperationKind::CheckIsString => true, - OperationKind::Datatype => false, - OperationKind::LanguageTag => false, - OperationKind::LexicalValue => false, - OperationKind::NumericAbsolute => false, - OperationKind::NumericCosine => false, - OperationKind::NumericCeil => false, - OperationKind::NumericFloor => false, - OperationKind::NumericNegation => false, - OperationKind::NumericRound => false, - OperationKind::NumericSine => false, - OperationKind::NumericSquareroot => false, - OperationKind::NumericTangent => false, - OperationKind::StringLength => false, - OperationKind::StringReverse => false, - OperationKind::StringLowercase => false, - OperationKind::StringUppercase => false, - OperationKind::BitAnd => false, - OperationKind::BitOr => false, - OperationKind::BitXor => false, - OperationKind::BooleanConjunction => true, - OperationKind::BooleanDisjunction => true, - OperationKind::NumericMinimum => false, - OperationKind::NumericMaximum => false, - OperationKind::NumericLukasiewicz => false, - OperationKind::StringConcatenation => false, - } - } - - /// Return the number of arguments accepted by this operation - pub(crate) fn number_arguments(&self) {} } impl Display for OperationKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let string = match self { - OperationKind::Equal => "EQUAL", - OperationKind::Unequals => "UNEQUAL", - OperationKind::NumericSum => "SUM", - OperationKind::NumericSubtraction => "MINUS", - OperationKind::NumericProduct => "PROD", - OperationKind::NumericDivision => "DIV", - OperationKind::NumericLogarithm => "LOG", - OperationKind::NumericPower => "POW", - OperationKind::NumericRemainder => "REM", - OperationKind::NumericGreaterthan => "GT", - OperationKind::NumericGreaterthaneq => "GTE", - OperationKind::NumericLessthan => "LT", - OperationKind::NumericLessthaneq => "LTE", - OperationKind::StringCompare => "COMPARE", - OperationKind::StringContains => "CONTAINS", - OperationKind::StringSubstring => "SUBSTR", - OperationKind::StringBefore => "STRBEFORE", - OperationKind::StringAfter => "STRAFTER", - OperationKind::StringStarts => "STRSTARTS", - OperationKind::StringEnds => "STRENDS", - OperationKind::BooleanNegation => "NOT", - OperationKind::CastToDouble => "DOUBLE", - OperationKind::CastToFloat => "FLOAT", - OperationKind::CastToInteger => "INT", - OperationKind::CanonicalString => "fullStr", - OperationKind::CheckIsInteger => "isInteger", - OperationKind::CheckIsFloat => "isFloat", - OperationKind::CheckIsDouble => "isDouble", - OperationKind::CheckIsIri => "isIri", - OperationKind::CheckIsNumeric => "isNumeric", - OperationKind::CheckIsNull => "isNull", - OperationKind::CheckIsString => "isString", - OperationKind::Datatype => "DATATYPE", - OperationKind::LanguageTag => "LANG", - OperationKind::LexicalValue => "STR", - OperationKind::NumericAbsolute => "ABS", - OperationKind::NumericCosine => "COS", - OperationKind::NumericCeil => "CEIL", - OperationKind::NumericFloor => "FLOOR", - OperationKind::NumericNegation => "MINUS", - OperationKind::NumericRound => "ROUND", - OperationKind::NumericSine => "SIN", - OperationKind::NumericSquareroot => "SQRT", - OperationKind::NumericTangent => "TAN", - OperationKind::StringLength => "STRLEN", - OperationKind::StringReverse => "STRREV", - OperationKind::StringLowercase => "LCASE", - OperationKind::StringUppercase => "UCASE", - OperationKind::BitAnd => "BITAND", - OperationKind::BitOr => "BITOR", - OperationKind::BitXor => "BITXOR", - OperationKind::BooleanConjunction => "AND", - OperationKind::BooleanDisjunction => "OR", - OperationKind::NumericMinimum => "MIN", - OperationKind::NumericMaximum => "MAX", - OperationKind::NumericLukasiewicz => "LUKA", - OperationKind::StringConcatenation => "CONCAT", - }; - - write!(f, "{}", string) + write!(f, "{}", self.name()) } } diff --git a/nemo/src/rule_model/syntax/builtins.rs b/nemo/src/rule_model/syntax/builtins.rs index fe91f8a95..4389690c2 100644 --- a/nemo/src/rule_model/syntax/builtins.rs +++ b/nemo/src/rule_model/syntax/builtins.rs @@ -13,19 +13,19 @@ pub(crate) const BUILTIN_LESS: &str = "LESS"; /// Check if a numeric value is smaller or equal to another pub(crate) const BUILTIN_LESSQ: &str = "LESSEQ"; /// Check if value is an integer -pub(crate) const BUILTIN_isInteger: &str = "isInteger"; +pub(crate) const BUILTIN_IS_INTEGER: &str = "isInteger"; /// Check if value is a 32bit floating point number -pub(crate) const BUILTIN_isFloat: &str = "isFloat"; +pub(crate) const BUILTIN_IS_FLOAT: &str = "isFloat"; /// Check if value is a 64bit floating point number -pub(crate) const BUILTIN_isDouble: &str = "isDouble"; +pub(crate) const BUILTIN_IS_DOUBLE: &str = "isDouble"; /// Check if value is an iri -pub(crate) const BUILTIN_isIri: &str = "isIri"; +pub(crate) const BUILTIN_IS_IRI: &str = "isIri"; /// Check if value is numeric -pub(crate) const BUILTIN_isNumeric: &str = "isNumeric"; +pub(crate) const BUILTIN_IS_NUMERIC: &str = "isNumeric"; /// Check if value is null -pub(crate) const BUILTIN_isNull: &str = "isNull"; +pub(crate) const BUILTIN_IS_NULL: &str = "isNull"; /// Check if value is string -pub(crate) const BUILTIN_isString: &str = "isString"; +pub(crate) const BUILTIN_IS_STRING: &str = "isString"; /// Compute the absoule value of a number pub(crate) const BUILTIN_ABS: &str = "ABS"; /// Compute the square root of a number @@ -33,7 +33,7 @@ pub(crate) const BUILTIN_SQRT: &str = "SQRT"; /// Logical negation of a boolean value pub(crate) const BUILTIN_NOT: &str = "NOT"; /// String representation of a value -pub(crate) const BUILTIN_fullStr: &str = "fullStr"; +pub(crate) const BUILTIN_FULLSTR: &str = "fullStr"; /// Lexical value pub(crate) const BUILTIN_STR: &str = "STR"; /// Compute the sine of a value @@ -67,7 +67,7 @@ pub(crate) const BUILTIN_DOUBLE: &str = "DOUBLE"; /// Convert the value to a 32bit floating point number pub(crate) const BUILTIN_FLOAT: &str = "FLOAT"; /// Compute the logarithm of the numerical value -pub(crate) const BUILTIN_LOG: &str = "LOG"; +pub(crate) const BUILTIN_LOGARITHM: &str = "LOG"; /// Raise the numerical value to a power pub(crate) const BUILTIN_POW: &str = "POW"; /// Compare two string values @@ -100,8 +100,14 @@ pub(crate) const BUILTIN_MIN: &str = "MIN"; pub(crate) const BUILTIN_LUKA: &str = "LUKA"; /// Compute the sum of numerical values pub(crate) const BUILTIN_SUM: &str = "SUM"; -/// Comput the product of numerical values -pub(crate) const BUILTIN_PROD: &str = "PROD"; +/// Compute the product of numerical values +pub(crate) const BUILTIN_PRODUCT: &str = "PROD"; +/// Compute the difference between to numeric values +pub(crate) const BUILTIN_SUBTRACTION: &str = "MINUS"; +/// Compute the quotient of two numeric values +pub(crate) const BUILTIN_DIVISION: &str = "DIV"; +/// Compute the multiplicative inverse of a numeric value +pub(crate) const BUILTIN_INVERSE: &str = "INVERSE"; /// Compute the logical and between boolean values pub(crate) const BUILTIN_AND: &str = "AND"; /// Compute the logical or between boolean values diff --git a/nemo/src/rule_model/syntax/import_export/file_formats.rs b/nemo/src/rule_model/syntax/import_export/file_formats.rs index 78d257ded..dc60e916f 100644 --- a/nemo/src/rule_model/syntax/import_export/file_formats.rs +++ b/nemo/src/rule_model/syntax/import_export/file_formats.rs @@ -20,3 +20,22 @@ pub(crate) const FILE_FORMAT_RDF_TRIG: &str = "trig"; pub(crate) const FILE_FORMAT_RDF_XML: &str = "rdfxml"; /// The "predicate name" used for the json format in import/export directives. pub(crate) const FILE_FORMAT_JSON: &str = "json"; + +/// The file extension used for CSV files +pub(crate) const EXTENSION_CSV: &str = "csv"; +/// The file extension used for TSV files +pub(crate) const EXTENSION_TSV: &str = "csv"; +/// The file extension used for DSV files +pub(crate) const EXTENSION_DSV: &str = "csv"; +/// The file extension used for Ntriples files +pub(crate) const EXTENSION_RDF_NTRIPLES: &str = "nt"; +/// The file extension used for NQuads files +pub(crate) const EXTENSION_RDF_NQUADS: &str = "nq"; +/// The file extension used for Turtle files +pub(crate) const EXTENSION_RDF_TURTLE: &str = "ttl"; +/// The file extension used for TriG files +pub(crate) const EXTENSION_RDF_TRIG: &str = "trig"; +/// The file extension used for RDF/XML files +pub(crate) const EXTENSION_RDF_XML: &str = "rdf"; +/// The file extension used for json files +pub(crate) const EXTENSION_JSON: &str = "json"; From e33d7170c40e59e923478e9ccfd6c1bed9a72af9 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 15 Jul 2024 22:41:55 +0200 Subject: [PATCH 124/214] Basic error messages for parsing --- Cargo.lock | 45 +++ nemo-cli/Cargo.toml | 1 + nemo-cli/src/main.rs | 88 +++++- nemo-cli/src/sample.tao | 13 + nemo-language-server/src/language_server.rs | 2 +- nemo-language-server/src/lib.rs | 4 +- nemo-python/src/lib.rs | 2 +- nemo/Cargo.toml | 3 + nemo/src/io/parser.rs | 48 +-- nemo/src/lib.rs | 1 + nemo/src/parser.rs | 125 ++++++++ nemo/src/parser/ast.rs | 21 ++ nemo/src/parser/ast/basic.rs | 3 + nemo/src/parser/ast/basic/number.rs | 202 +++++++++++++ nemo/src/parser/ast/program.rs | 33 +++ nemo/src/parser/ast/token.rs | 277 ++++++++++++++++++ nemo/src/parser/context.rs | 53 ++++ nemo/src/parser/error.rs | 170 +++++++++++ nemo/src/parser/input.rs | 231 +++++++++++++++ nemo/src/parser/lsp.rs | 37 +++ nemo/src/parser/span.rs | 124 ++++++++ nemo/src/rule_model.rs | 2 +- .../{component.rs => components.rs} | 10 +- .../{component => components}/atom.rs | 14 +- .../{component => components}/base.rs | 4 +- .../{component => components}/fact.rs | 4 +- .../import_export.rs | 8 +- .../import_export/attributes.rs | 0 .../import_export/compression.rs | 0 .../import_export/file_formats.rs | 4 +- .../{component => components}/literal.rs | 6 +- .../{component => components}/output.rs | 4 +- .../{component => components}/rule.rs | 4 +- .../{component => components}/term.rs | 10 +- .../term/aggregate.rs | 36 ++- .../term/function.rs | 14 +- .../{component => components}/term/map.rs | 8 +- .../term/operation.rs | 8 +- .../term/operation/operation_kind.rs | 3 +- .../term/primitive.rs | 8 +- .../term/primitive/ground.rs | 6 +- .../term/primitive/variable.rs | 6 +- .../term/primitive/variable/existential.rs | 6 +- .../term/primitive/variable/universal.rs | 6 +- .../{component => components}/term/tuple.rs | 12 +- nemo/src/rule_model/error.rs | 75 ++--- .../src/rule_model/error/translation_error.rs | 9 + nemo/src/rule_model/error/validation_error.rs | 57 ++++ nemo/src/rule_model/program.rs | 10 +- nemo/src/rule_model/syntax.rs | 1 + nemo/src/rule_model/syntax/aggregates.rs | 10 + nemo/src/rule_model/util.rs | 12 +- 52 files changed, 1655 insertions(+), 185 deletions(-) create mode 100644 nemo-cli/src/sample.tao create mode 100644 nemo/src/parser.rs create mode 100644 nemo/src/parser/ast.rs create mode 100644 nemo/src/parser/ast/basic.rs create mode 100644 nemo/src/parser/ast/basic/number.rs create mode 100644 nemo/src/parser/ast/program.rs create mode 100644 nemo/src/parser/ast/token.rs create mode 100644 nemo/src/parser/context.rs create mode 100644 nemo/src/parser/error.rs create mode 100644 nemo/src/parser/input.rs create mode 100644 nemo/src/parser/lsp.rs create mode 100644 nemo/src/parser/span.rs rename nemo/src/rule_model/{component.rs => components.rs} (84%) rename nemo/src/rule_model/{component => components}/atom.rs (88%) rename nemo/src/rule_model/{component => components}/base.rs (95%) rename nemo/src/rule_model/{component => components}/fact.rs (97%) rename nemo/src/rule_model/{component => components}/import_export.rs (97%) rename nemo/src/rule_model/{component => components}/import_export/attributes.rs (100%) rename nemo/src/rule_model/{component => components}/import_export/compression.rs (100%) rename nemo/src/rule_model/{component => components}/import_export/file_formats.rs (96%) rename nemo/src/rule_model/{component => components}/literal.rs (90%) rename nemo/src/rule_model/{component => components}/output.rs (96%) rename nemo/src/rule_model/{component => components}/rule.rs (99%) rename nemo/src/rule_model/{component => components}/term.rs (95%) rename nemo/src/rule_model/{component => components}/term/aggregate.rs (82%) rename nemo/src/rule_model/{component => components}/term/function.rs (89%) rename nemo/src/rule_model/{component => components}/term/map.rs (95%) rename nemo/src/rule_model/{component => components}/term/operation.rs (97%) rename nemo/src/rule_model/{component => components}/term/operation/operation_kind.rs (99%) rename nemo/src/rule_model/{component => components}/term/primitive.rs (96%) rename nemo/src/rule_model/{component => components}/term/primitive/ground.rs (92%) rename nemo/src/rule_model/{component => components}/term/primitive/variable.rs (93%) rename nemo/src/rule_model/{component => components}/term/primitive/variable/existential.rs (88%) rename nemo/src/rule_model/{component => components}/term/primitive/variable/universal.rs (91%) rename nemo/src/rule_model/{component => components}/term/tuple.rs (90%) create mode 100644 nemo/src/rule_model/error/translation_error.rs create mode 100644 nemo/src/rule_model/error/validation_error.rs create mode 100644 nemo/src/rule_model/syntax/aggregates.rs diff --git a/Cargo.lock b/Cargo.lock index bbbaeec7d..3f7b40f70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -117,6 +117,16 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "ariadne" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44055e597c674aef7cb903b2b9f6e4cba1277ed0d2d61dae7cd52d7ffa81f8e2" +dependencies = [ + "unicode-width", + "yansi", +] + [[package]] name = "arrayvec" version = "0.7.4" @@ -1462,6 +1472,7 @@ dependencies = [ name = "nemo" version = "0.5.2-dev" dependencies = [ + "ariadne", "ascii_tree", "assert_fs", "bytecount", @@ -1494,6 +1505,8 @@ dependencies = [ "sanitise-file-name", "serde", "serde_json", + "strum", + "strum_macros", "test-log", "thiserror", "tokio", @@ -1505,6 +1518,7 @@ dependencies = [ name = "nemo-cli" version = "0.5.2-dev" dependencies = [ + "ariadne", "assert_cmd", "assert_fs", "clap 4.5.7", @@ -2290,6 +2304,12 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + [[package]] name = "ryu" version = "1.0.18" @@ -2473,6 +2493,25 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.66", +] + [[package]] name = "syn" version = "1.0.109" @@ -3256,6 +3295,12 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yoke" version = "0.7.4" diff --git a/nemo-cli/Cargo.toml b/nemo-cli/Cargo.toml index be15bd0d0..aeb4a2895 100644 --- a/nemo-cli/Cargo.toml +++ b/nemo-cli/Cargo.toml @@ -22,6 +22,7 @@ env_logger = "*" serde_json = "1.0.108" nemo = { path = "../nemo" } +ariadne = "0.4.1" [dev-dependencies] assert_cmd = "2.0" diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 1a6870dc2..5e6d3ac8b 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -22,6 +22,7 @@ pub mod cli; use std::fs::{read_to_string, File}; +use ariadne::{Color, ColorGenerator, Fmt, Label, Report, ReportKind, Source}; use clap::Parser; use cli::{CliApp, Exporting, Reporting}; use colored::Colorize; @@ -35,6 +36,7 @@ use nemo::{ }, meta::timing::{TimedCode, TimedDisplay}, model::{ExportDirective, Program}, + parser::ParserErrorReport, }; /// Set exports according to command-line parameter. @@ -165,15 +167,47 @@ fn run(mut cli: CliApp) -> Result<(), Error> { filename: rules.to_string_lossy().to_string(), })?; + let program = + match nemo::parser::Parser::initialize(&rules_content, rules.to_string_lossy().to_string()) + .parse() + { + Ok(program) => program, + Err(report) => { + report.eprint(report.build_reports(Color::Red))?; + std::process::exit(1); + } + }; + // let mut program = parse_program(rules_content)?; - let (ast, _errors) = parse_program_str(&rules_content); - log::debug!("AST:\n{ast}"); + // let (ast, errors) = parse_program_str(&rules_content); + + // if !errors.is_empty() { + // for error in errors { + // let color = Color::Red; + + // let r = Report::build(ReportKind::Error, String::from("test"), 100) + // .with_code(3) + // .with_message(&error.msg) + // .with_label( + // Label::new(( + // String::from("test"), + // error.pos.offset..(error.pos.offset + 1), + // )) + // .with_message(&error.msg) + // .with_color(color), + // ) + // .finish(); + // r.eprint((String::from("test"), Source::from(&rules_content)))?; + // } + // } + + // log::debug!("AST:\n{ast}"); // TODO: Report errors! - log::debug!("ERRORS:\n{_errors:#?}"); - let program = nemo::rule_model::program::Program::from_ast(ast); + // log::debug!("ERRORS:\n{_errors:#?}"); + // let program = nemo::rule_model::program::Program::from_ast(ast); log::info!("Rules parsed"); - log::trace!("{:?}", program); + // log::trace!("{:?}", program); let facts_to_be_traced: Option> = { let raw_facts_to_be_traced: Option> = @@ -321,6 +355,9 @@ fn run(mut cli: CliApp) -> Result<(), Error> { fn main() { let cli = CliApp::parse(); + // test_error_message(); + // return; + cli.logging.initialize_logging(); log::info!("Version: {}", clap::crate_version!()); log::debug!("Rule files: {:?}", cli.rules); @@ -330,3 +367,44 @@ fn main() { std::process::exit(1) }) } + +fn test_error_message() { + use ariadne::{Color, ColorGenerator, Fmt, Label, Report, ReportKind, Source}; + + let mut colors = ColorGenerator::new(); + + // Generate & choose some colours for each of our elements + let a = colors.next(); + let b = colors.next(); + let out = Color::Fixed(81); + + Report::build(ReportKind::Error, "sample.tao", 12) + .with_code(3) + .with_message(format!("Incompatible types")) + .with_label( + Label::new(("sample.tao", 32..33)) + .with_message(format!("This is of type {}", "Nat".fg(a))) + .with_color(a), + ) + .with_label( + Label::new(("sample.tao", 42..45)) + .with_message(format!("This is of type {}", "Str".fg(b))) + .with_color(b), + ) + .with_label( + Label::new(("sample.tao", 11..48)) + .with_message(format!( + "The values are outputs of this {} expression", + "match".fg(out), + )) + .with_color(out), + ) + .with_note(format!( + "Outputs of {} expressions must coerce to the same type", + "match".fg(out) + )) + .with_help("Test") + .finish() + .print(("sample.tao", Source::from(include_str!("sample.tao")))) + .unwrap(); +} diff --git a/nemo-cli/src/sample.tao b/nemo-cli/src/sample.tao new file mode 100644 index 000000000..5cefbc6a4 --- /dev/null +++ b/nemo-cli/src/sample.tao @@ -0,0 +1,13 @@ +123.10 + +abc(?x, ?y) :- p(12, ?) . + +% def five = match () in { +% () => 5, +% () => "5", +% } +% +% def six = +% five +% + 1 +a(1, 2, ?). \ No newline at end of file diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 2a179937d..bf2379d0c 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -6,7 +6,7 @@ use futures::lock::Mutex; use line_index::{LineCol, LineIndex, WideEncoding}; use nemo::io::parser::ast::program::Program; use nemo::io::parser::ast::{AstNode, Position}; -use nemo::io::parser::new::parse_program_str; +use nemo::io::parser::parse_program_str; use nemo_position::{lsp_position_to_nemo_position, PositionConversionError}; use tower_lsp::lsp_types::{ Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, DocumentChangeOperation, diff --git a/nemo-language-server/src/lib.rs b/nemo-language-server/src/lib.rs index 646a4a250..6cd533107 100644 --- a/nemo-language-server/src/lib.rs +++ b/nemo-language-server/src/lib.rs @@ -15,5 +15,5 @@ pub fn create_language_server() -> (LspService, ClientSocket) { } // See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features -#[cfg(all(feature = "js", feature = "tokio"))] -compile_error!("feature \"js\" and feature \"tokio\" cannot be enabled at the same time"); +// #[cfg(all(feature = "js", feature = "tokio"))] +// compile_error!("feature \"js\" and feature \"tokio\" cannot be enabled at the same time"); diff --git a/nemo-python/src/lib.rs b/nemo-python/src/lib.rs index ca874c0e7..61adac8b2 100644 --- a/nemo-python/src/lib.rs +++ b/nemo-python/src/lib.rs @@ -412,7 +412,7 @@ impl NemoEngine { fn trace(&mut self, fact: String) -> Option { let (ast, _errors) = nemo::io::parser::parse_fact_str(&fact); /*.py_res().ok()?;*/ // TODO: Report errors... - let parsed_fact = nemo::rule_model::component::fact::Fact::from_ast(ast); + let parsed_fact = nemo::rule_model::components::fact::Fact::from_ast(ast); let parsed_fact = todo!(); let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![parsed_fact]); let handle = *handles diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 6483cb8da..3f11c31a8 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -50,6 +50,9 @@ unicode-ident = "1.0.12" nom-greedyerror = "0.5.0" nom-supreme = "0.8.0" enum-assoc = "1.1.0" +ariadne = "0.4.1" +strum = "0.26.3" +strum_macros = "0.26.4" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index de85b9d12..a3e5d61e4 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -2476,30 +2476,30 @@ fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { } } -fn expect<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( - mut parser: F, - error_msg: impl ToString, - error_output: O, - errors: ParserState<'s>, -) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { - move |input| match parser.parse(input) { - Ok(result) => Ok(result), - Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { - let err = Error { - pos: Position { - offset: input.input.location_offset(), - line: input.input.location_line(), - column: input.input.get_utf8_column() as u32, - }, - msg: error_msg.to_string(), - context: vec![], - }; - errors.report_error(err); - Ok((input, error_output)) - } - Err(err) => Err(err), - } -} +// fn expect_abc<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( +// mut parser: F, +// error_msg: impl ToString, +// error_output: O, +// errors: ParserState<'s>, +// ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { +// move |input| match parser.parse(input) { +// Ok(resureport_errorlt) => Ok(result), +// Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { +// let err = Error { +// pos: Position { +// offset: input.input.location_offset(), +// line: input.input.location_line(), +// column: input.input.get_utf8_column() as u32, +// }, +// msg: error_msg.to_string(), +// context: vec![], +// }; +// errors.report_error(err); +// Ok((input, error_output)) +// } +// Err(err) => Err(err), +// } +// } fn recover<'a, 's, E>( mut parser: impl Parser, Statement<'a>, E>, diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 89f3d962d..4677dc557 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -26,6 +26,7 @@ extern crate nemo_physical; pub mod api; pub mod error; pub mod io; +pub mod parser; pub mod execution; pub mod model; diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs new file mode 100644 index 000000000..b67bdc5ff --- /dev/null +++ b/nemo/src/parser.rs @@ -0,0 +1,125 @@ +//! This module defines [Parser], which is responsible for parsing nemo programs. + +pub mod ast; +pub mod context; +pub mod error; +pub mod input; +pub mod lsp; +pub mod span; + +use std::{cell::RefCell, ops::Range, rc::Rc}; + +use ariadne::{Color, Label, Report, ReportKind, Source}; +use ast::{program::Program, ProgramAST}; +use error::{transform_error_tree, ParserError, ParserErrorTree}; +use input::ParserInput; + +use nom::IResult; + +/// State of the parser +#[derive(Debug, Clone, Default)] +pub struct ParserState { + /// Collection of all errors that occurred during parsing + errors: Rc>>, +} + +impl ParserState { + /// Append a [ParserError] to the current list of errors. + pub(crate) fn report_error(&self, error: ParserError) { + self.errors.borrow_mut().push(error); + } +} + +/// Output of a nom parser function +pub type ParserResult<'a, Output> = IResult, Output, ParserErrorTree<'a>>; + +/// Parser for the nemo rule language +#[derive(Debug)] +pub struct Parser<'a> { + /// Reference to the text that is going to be parser + input: &'a str, + /// Label of the input text, usually a path of the input file + label: String, + /// State of the parser + state: ParserState, +} + +/// Contains all errors that occurred during parsing +#[derive(Debug)] +pub struct ParserErrorReport<'a> { + /// Reference to the text that is going to be parser + input: &'a str, + /// Label of the input text, usually a path of the input file + label: String, + /// List of [ParserError]s + errors: Vec, +} + +impl<'a> ParserErrorReport<'a> { + /// Print the given reports. + pub fn eprint<'s, ReportIterator>( + &'s self, + reports: ReportIterator, + ) -> Result<(), std::io::Error> + where + ReportIterator: Iterator)>>, + { + for report in reports { + report.eprint((self.label.clone(), Source::from(self.input)))?; + } + + Ok(()) + } + + /// Build a [Report] for each error. + pub fn build_reports( + &'a self, + color_error: Color, + ) -> impl Iterator)>> { + // report.with_message(format!("")) + + self.errors.iter().map(move |error| { + let message = format!("expected `{}`", error.context[0].name()); + + Report::build(ReportKind::Error, self.label.clone(), 0) + .with_message(message.clone()) + .with_label( + Label::new((self.label.clone(), error.position.range())) + .with_message(message) + .with_color(color_error), + ) + .finish() + }) + } +} + +impl<'a> Parser<'a> { + /// Initialize the parser. + pub fn initialize(input: &'a str, label: String) -> Self { + Self { + input, + label, + state: ParserState::default(), + } + } + + /// Parse the input + pub fn parse(self) -> Result, ParserErrorReport<'a>> { + let parser_input = ParserInput::new(&self.input, self.state.clone()); + + let error_tree = match transform_error_tree(Program::parse)(parser_input) { + Ok((_input, program)) => return Ok(program), + Err(error_tree) => error_tree, + }; + + drop(error_tree); + + Err(ParserErrorReport { + input: self.input, + label: self.label, + errors: Rc::try_unwrap(self.state.errors) + .expect("there should only be one owner now") + .into_inner(), + }) + } +} diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs new file mode 100644 index 000000000..5b46908bb --- /dev/null +++ b/nemo/src/parser/ast.rs @@ -0,0 +1,21 @@ +//! This module defines the abstract syntax tree representation of a nemo program. + +pub mod basic; +pub mod program; +pub mod token; + +use super::{span::ProgramSpan, ParserInput, ParserResult}; + +/// Trait implemented by nodes in the abstract syntax tree +pub trait ProgramAST<'a>: Sync { + /// Return all children of this node. + fn children(&self) -> Vec<&dyn ProgramAST>; + + /// Return the region of text this node originates from. + fn span(&self) -> ProgramSpan; + + /// Parse the given input into this type of node + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a; +} diff --git a/nemo/src/parser/ast/basic.rs b/nemo/src/parser/ast/basic.rs new file mode 100644 index 000000000..febed1d1c --- /dev/null +++ b/nemo/src/parser/ast/basic.rs @@ -0,0 +1,3 @@ +//! This module defines ast nodes for simple building blocks like numbers or strings. + +pub mod number; diff --git a/nemo/src/parser/ast/basic/number.rs b/nemo/src/parser/ast/basic/number.rs new file mode 100644 index 000000000..65b3941c7 --- /dev/null +++ b/nemo/src/parser/ast/basic/number.rs @@ -0,0 +1,202 @@ +//! This module defines the ast node for numbers. + +use enum_assoc::Assoc; +use nom::{ + branch::alt, + combinator::opt, + sequence::{pair, tuple}, +}; + +use crate::parser::{ + ast::{ + token::{Token, TokenKind}, + ProgramAST, + }, + context::{context, ParserContext}, + span::ProgramSpan, + ParserInput, ParserResult, +}; + +/// Marker that indicates the type of a number +#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] +#[func(pub fn token(token: &TokenKind) -> Option)] +#[func(pub fn print(&self) -> &'static str)] +enum NumberTypeMarker { + /// Marks a number as a 32-bit floating point number + #[assoc(token = TokenKind::TypeMarkerFloat)] + #[assoc(print = "f")] + Float, + /// Marks a number as a 64-bit floating point number + #[assoc(token = TokenKind::TypeMarkerDouble)] + #[assoc(print = "d")] + Double, +} + +/// Sign of a number +#[derive(Assoc, Default, Debug, Clone, Copy, PartialEq, Eq)] +#[func(pub fn token(token: &TokenKind) -> Option)] +#[func(pub fn print(&self) -> &'static str)] +enum NumberSign { + /// Positive + #[assoc(token = TokenKind::Plus)] + #[assoc(print = "+")] + #[default] + Positive, + //// Negative + #[assoc(token = TokenKind::Minus)] + #[assoc(print = "-")] + Negative, +} + +/// AST Node representing a number +#[derive(Debug)] +pub struct Number<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Sign of the integer part + integer_sign: NumberSign, + /// The integer part of the number + integer: Token<'a>, + /// The fractional part of the number + fractional: Option>, + /// Sign and exponent of the number + exponent: Option<(NumberSign, Token<'a>)>, + /// Type + type_marker: Option, +} + +impl<'a> Number<'a> { + /// Parse the sign of the number + fn parse_sign(input: ParserInput<'a>) -> ParserResult<'a, NumberSign> { + alt((Token::plus, Token::minus))(input).map(|(rest, sign)| { + ( + rest, + NumberSign::token(&sign.kind()).expect("unknown token"), + ) + }) + } + + /// Parser the integer part of the number. + fn parse_integer(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + Token::digits(input) + } + + /// Parse the fractional part of the number. + fn parse_fractional(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + pair(Token::dot, Token::digits)(input).map(|(rest, (_, result))| (rest, result)) + } + + /// Parse the exponent part of the number. + fn parse_exponent(input: ParserInput<'a>) -> ParserResult<'a, (NumberSign, Token<'a>)> { + tuple(( + alt((Token::exponent_lower, Token::exponent_upper)), + opt(Self::parse_sign), + Self::parse_integer, + ))(input) + .map(|(rest, (_, sign, integer))| (rest, (sign.unwrap_or_default(), integer))) + } + + /// Parse the type marker of the number. + fn parse_type(input: ParserInput<'a>) -> ParserResult<'a, NumberTypeMarker> { + alt((Token::type_marker_float, Token::type_marker_double))(input).map(|(rest, marker)| { + ( + rest, + NumberTypeMarker::token(&marker.kind()).expect("unknown token"), + ) + }) + } +} + +impl<'a> ProgramAST<'a> for Number<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized, + { + let input_span = input.span; + + context( + ParserContext::Number, + tuple(( + opt(Self::parse_sign), + Self::parse_integer, + opt(Self::parse_fractional), + opt(Self::parse_exponent), + opt(Self::parse_type), + )), + )(input) + .map( + |(rest, (integer_sign, integer, fractional, exponent, type_marker))| { + let rest_span = rest.span; + + ( + rest, + Number { + span: input_span.until_rest(&rest_span), + integer_sign: integer_sign.unwrap_or_default(), + integer, + fractional, + exponent, + type_marker, + }, + ) + }, + ) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{basic::number::Number, ProgramAST}, + ParserInput, ParserState, + }; + + #[test] + fn parse_numbers() { + let valid_numbers = vec![ + "123", + "-210", + "0012", + "-0012", + "0.2", + "4534.34534345", + "1e545", + "1.1e435", + "0.1e232d", + "1.0e343f", + "112E+12", + "12312.1231", + "0.1231f", + "1231", + "-1e+0", + "1e-1", + ]; + + let invalid_numbers = vec![".1", "1.", "E9", ".e3", "7E", "."]; + + for valid in valid_numbers { + let input = ParserInput::new(valid, ParserState::default()); + let result = all_consuming(Number::parse)(input); + + assert!(result.is_ok()) + } + + for invalid in invalid_numbers { + let input = ParserInput::new(invalid, ParserState::default()); + let result = all_consuming(Number::parse)(input); + + assert!(result.is_err()) + } + } +} diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs new file mode 100644 index 000000000..327b1f4d9 --- /dev/null +++ b/nemo/src/parser/ast/program.rs @@ -0,0 +1,33 @@ +//! This module defines [Program]. + +use crate::parser::{ + context::{context, ParserContext}, + input::ParserInput, + ParserResult, +}; + +use super::{basic::number::Number, ProgramAST}; + +/// +#[derive(Debug)] +pub struct Program<'a> { + number: Number<'a>, +} + +impl<'a> ProgramAST<'a> for Program<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + todo!() + } + + fn span(&self) -> crate::parser::span::ProgramSpan { + todo!() + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + context(ParserContext::Program, Number::parse)(input) + .map(|(rest, result)| (rest, Program { number: result })) + } +} diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs new file mode 100644 index 000000000..7a295b24a --- /dev/null +++ b/nemo/src/parser/ast/token.rs @@ -0,0 +1,277 @@ +//! This module defines [Token]. +#![allow(missing_docs)] + +use enum_assoc::Assoc; + +use nom::{ + branch::alt, + bytes::complete::{is_not, tag, take, take_till}, + character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace0, multispace1}, + combinator::{all_consuming, cut, map, opt, recognize}, + error::ParseError, + multi::{many0, many1}, + sequence::{delimited, pair, tuple}, + IResult, +}; + +use crate::parser::{ + context::{context, ParserContext}, + span::ProgramSpan, + ParserInput, ParserResult, +}; + +/// Enumeration of all accepted kinds of [Token]s +#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] +#[func(pub fn name(&self) -> &'static str)] +pub enum TokenKind { + /// Question mark, used to mark universal variables + #[assoc(name = "?")] + QuestionMark, + /// Exclamation mark, used to mark existential variables + #[assoc(name = "!")] + ExclamationMark, + /// Open parenthesis + #[assoc(name = "(")] + OpenParenthesis, + /// Closed parenthesis + #[assoc(name = ")")] + ClosedParenthesis, + /// Open bracket + #[assoc(name = "[")] + OpenBracket, + /// Closed bracket + #[assoc(name = "]")] + ClosedBracket, + /// Open brace + #[assoc(name = "{")] + OpenBrace, + /// Closed brace + #[assoc(name = "}")] + ClosedBrace, + /// Open Chevrons + #[assoc(name = "<")] + OpenChevrons, + /// Closed Chevrons + #[assoc(name = ">")] + ClosedChevrons, + /// Dot + #[assoc(name = ".")] + Dot, + /// Arrow, used to separate rules + #[assoc(name = ":-")] + Arrow, + /// Colon + #[assoc(name = ":")] + Colon, + /// Greater than + #[assoc(name = ">")] + Greater, + /// Greater than or equal + #[assoc(name = ">=")] + GreaterEqual, + /// Less than + #[assoc(name = "<")] + Less, + /// Less than or equal + #[assoc(name = "<=")] + LessEqual, + /// Equal + #[assoc(name = "=")] + Equal, + /// Tilde, used for negation + #[assoc(name = "~")] + Tilde, + /// Double caret + #[assoc(name = "^^")] + DoubleCaret, + /// Hash, used in front of aggregates + #[assoc(name = "#")] + Hash, + /// Underscore, used for anonymous variables + #[assoc(name = "_")] + Underscore, + /// At, used to indicate directives + #[assoc(name = "@")] + At, + /// Plus + #[assoc(name = "+")] + Plus, + /// Minus + #[assoc(name = "-")] + Minus, + /// Star + #[assoc(name = "*")] + Star, + /// Division + #[assoc(name = "/")] + Division, + /// True + #[assoc(name = "true")] + True, + /// False + #[assoc(name = "false")] + False, + /// Quote + #[assoc(name = "\"")] + Quote, + /// Blank node label + #[assoc(name = "_:")] + BlankNodeLabel, + /// Name + #[assoc(name = "name")] + Name, + /// Digits + #[assoc(name = "digits")] + Digits, + /// Exponent (lower case) + #[assoc(name = "e")] + ExponentLower, + /// Exponent (upper case) + #[assoc(name = "E")] + ExponentUpper, + /// Marker float + #[assoc(name = "f")] + TypeMarkerFloat, + /// Marker double + #[assoc(name = "d")] + TypeMarkerDouble, + /// IRI + #[assoc(name = "iri")] + Iri, + /// A comment (as single token) + #[assoc(name = "comment")] + Comment, + /// A doc comment attached to e.g. a rule + #[assoc(name = "doc-comment")] + DocComment, + /// Toplevel comment describing the rule file + #[assoc(name = "top-level-comment")] + TopLevelComment, + /// White spaces + #[assoc(name = "whitespace")] + Whitespace, + /// End of file + #[assoc(name = "end-of-file")] + EndOfFile, + /// Token that captures errors + #[assoc(name = "error")] + Error, +} + +/// A token is the smallest unit recognized by the parser +/// that is used to built up more complex expressions +#[derive(Debug)] +pub struct Token<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// The kind of token + kind: TokenKind, +} + +/// Macro for generating token parser functions +macro_rules! string_token { + ($func_name: ident, $token: expr) => { + /// Parse this token. + pub fn $func_name(input: ParserInput<'a>) -> ParserResult<'a, Token> { + map( + context(ParserContext::Token { kind: $token }, tag($token.name())), + |input: ParserInput| Token { + span: input.span, + kind: $token, + }, + )(input) + } + }; +} + +impl<'a> Token<'a> { + /// Return the [TokenKind] of this token. + pub fn kind(&self) -> TokenKind { + self.kind + } + + /// Parse [TokenKind::Name]. + pub fn name(input: ParserInput<'a>) -> ParserResult<'a, Token> { + context( + ParserContext::token(TokenKind::Name), + recognize(pair( + alpha1, + many0(alt((alphanumeric1, tag("_"), tag("-")))), + )), + )(input) + .map(|(rest_input, result)| { + ( + rest_input, + Token { + span: result.span, + kind: TokenKind::Name, + }, + ) + }) + } + + /// Parse [TokenKind::Iri]. + pub fn iri(input: ParserInput<'a>) -> ParserResult<'a, Token> { + context( + ParserContext::token(TokenKind::Iri), + recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), + )(input) + .map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::Iri, + }, + ) + }) + } + + /// Parse [TokenKind::Digits]. + pub fn digits(input: ParserInput<'a>) -> ParserResult<'a, Token> { + context(ParserContext::token(TokenKind::Digits), digit1)(input).map( + |(rest_input, result)| { + ( + rest_input, + Token { + span: result.span, + kind: TokenKind::Digits, + }, + ) + }, + ) + } + + string_token!(open_parenthesis, TokenKind::OpenParenthesis); + string_token!(closed_parenthesis, TokenKind::ClosedParenthesis); + string_token!(open_brace, TokenKind::OpenBrace); + string_token!(closed_brace, TokenKind::ClosedBrace); + string_token!(open_chevrons, TokenKind::OpenChevrons); + string_token!(closed_chevrons, TokenKind::ClosedChevrons); + string_token!(open_bracket, TokenKind::OpenBracket); + string_token!(closed_bracket, TokenKind::ClosedBrace); + string_token!(question_mark, TokenKind::QuestionMark); + string_token!(exclamation_mark, TokenKind::ExclamationMark); + string_token!(dot, TokenKind::Dot); + string_token!(greater, TokenKind::Greater); + string_token!(greater_equal, TokenKind::GreaterEqual); + string_token!(less, TokenKind::Less); + string_token!(less_equal, TokenKind::LessEqual); + string_token!(equal, TokenKind::Equal); + string_token!(tilde, TokenKind::Tilde); + string_token!(double_caret, TokenKind::DoubleCaret); + string_token!(hash, TokenKind::Hash); + string_token!(underscore, TokenKind::Underscore); + string_token!(at, TokenKind::At); + string_token!(plus, TokenKind::Plus); + string_token!(minus, TokenKind::Minus); + string_token!(star, TokenKind::Star); + string_token!(division, TokenKind::Division); + string_token!(quote, TokenKind::Quote); + string_token!(blank_node_label, TokenKind::BlankNodeLabel); + string_token!(exponent_lower, TokenKind::ExponentLower); + string_token!(exponent_upper, TokenKind::ExponentUpper); + string_token!(type_marker_double, TokenKind::TypeMarkerDouble); + string_token!(type_marker_float, TokenKind::TypeMarkerFloat); +} diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs new file mode 100644 index 000000000..2bbec890a --- /dev/null +++ b/nemo/src/parser/context.rs @@ -0,0 +1,53 @@ +//! This module defines [ParserContext]. +#![allow(missing_docs)] + +use enum_assoc::Assoc; +use nom_supreme::context::ContextError; + +use super::{ast::token::TokenKind, error::ParserErrorTree, ParserInput, ParserResult}; + +/// Context, in which a particular parse error occurred +#[derive(Assoc, Debug, Clone, Copy)] +#[func(pub fn name(&self) -> &'static str)] +pub enum ParserContext { + /// Token + #[assoc(name = _kind.name())] + Token { kind: TokenKind }, + /// Number + #[assoc(name = "number")] + Number, + /// Program + #[assoc(name = "program")] + Program, +} + +impl ParserContext { + /// Create a [ParserContext] from a [TokenKind]. + pub fn token(kind: TokenKind) -> Self { + Self::Token { kind } + } +} + +/// Add context to an input parser. +pub(crate) fn context<'a, Output, NomParser>( + context: ParserContext, + mut f: NomParser, +) -> impl FnMut(ParserInput<'a>) -> ParserResult<'a, Output> +where + NomParser: nom::Parser, Output, ParserErrorTree<'a>>, +{ + move |i| match f.parse(i.clone()) { + Ok(o) => Ok(o), + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Err(nom::Err::Error(e)) => Err(nom::Err::Error(ParserErrorTree::add_context( + i, + context.clone(), + e, + ))), + Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(ParserErrorTree::add_context( + i, + context.clone(), + e, + ))), + } +} diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs new file mode 100644 index 000000000..d446da394 --- /dev/null +++ b/nemo/src/parser/error.rs @@ -0,0 +1,170 @@ +//! This module defines the error type that is returned when the parser is unsuccessful. + +use nom::Parser; +use nom_supreme::error::{GenericErrorTree, StackContext}; + +use super::{context::ParserContext, span::CharacterPosition, ParserInput, ParserResult}; + +/// Error tree used by nom parser +pub type ParserErrorTree<'a> = GenericErrorTree< + ParserInput<'a>, + &'static str, + ParserContext, + Box, +>; + +/// Error while parsing a nemo program +#[derive(Debug)] +pub(crate) struct ParserError { + /// Position where the error occurred + pub(crate) position: CharacterPosition, + /// Parsing stack + pub(crate) context: Vec, +} + +// fn recover<'a, E>( +// mut parser: impl Parser, Statement<'a>, E>, +// error_msg: impl ToString, +// context: Context, +// _errors: ParserState<'s>, +// ) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { +// move |input: Input<'a, 's>| match parser.parse(input) { +// Ok(result) => Ok(result), +// Err(err) if input.input.is_empty() => Err(err), +// Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { +// let _err = Error { +// pos: Position { +// offset: input.input.location_offset(), +// line: input.input.location_line(), +// column: input.input.get_utf8_column() as u32, +// }, +// msg: error_msg.to_string(), +// context: vec![context], +// }; +// // errors.report_error(err); +// let (rest_input, span) = skip_to_statement_end::>>(input); +// Ok((rest_input, Statement::Error(span))) +// } +// Err(err) => Err(err), +// } +// } + +/// Function to translate an [ParserErrorTree] returned by the nom parser +/// into a [ParserError] that can be displayed to the user. +pub(crate) fn transform_error_tree<'a, Output>( + mut parser: impl Parser, Output, ParserErrorTree<'a>>, +) -> impl FnMut(ParserInput<'a>) -> ParserResult<'a, Output> { + move |input| match parser.parse(input.clone()) { + Ok(result) => Ok(result), + Err(e) => { + if input.span.0.is_empty() { + return Err(e); + }; + match &e { + nom::Err::Incomplete(_) => (), + nom::Err::Error(err) | nom::Err::Failure(err) => { + let (_deepest_pos, errors) = get_deepest_errors(err); + for error in errors { + input.state.report_error(error); + } + } + }; + Err(e) + } + } +} + +fn context_strs( + contexts: &Vec<(ParserInput<'_>, StackContext)>, +) -> Vec { + contexts + .iter() + .map(|(_, c)| match c { + StackContext::Kind(_) => todo!(), + StackContext::Context(c) => *c, + }) + .collect() +} + +fn get_deepest_errors<'a, 's>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec) { + match e { + ParserErrorTree::Base { location, .. } => { + let span = location.span.0; + let err_pos = CharacterPosition { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }; + ( + err_pos, + vec![ParserError { + position: err_pos, + context: Vec::new(), + }], + ) + } + ParserErrorTree::Stack { base, contexts } => { + // let mut err_pos = Position::default(); + match &**base { + ParserErrorTree::Base { location, .. } => { + let span = location.span.0; + let err_pos = CharacterPosition { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }; + let mut msg = String::from(""); + for (_, context) in contexts { + match context { + StackContext::Kind(_) => todo!(), + StackContext::Context(c) => match c { + ParserContext::Token { kind: t } => { + msg.push_str(&t.name()); + } + _ => (), + }, + } + } + ( + err_pos, + vec![ParserError { + position: err_pos, + context: context_strs(contexts), + }], + ) + } + ParserErrorTree::Stack { base, contexts } => { + let (pos, mut deepest_errors) = get_deepest_errors(base); + let contexts = context_strs(contexts); + for error in &mut deepest_errors { + error.context.append(&mut contexts.clone()); + } + (pos, deepest_errors) + } + ParserErrorTree::Alt(_error_tree) => { + let (pos, mut deepest_errors) = get_deepest_errors(base); + let contexts = context_strs(contexts); + for error in &mut deepest_errors { + error.context.append(&mut contexts.clone()); + } + (pos, deepest_errors) + } + } + } + ParserErrorTree::Alt(vec) => { + let mut return_vec: Vec = Vec::new(); + let mut deepest_pos = CharacterPosition::default(); + for error in vec { + let (pos, mut deepest_errors) = get_deepest_errors(error); + if pos > deepest_pos { + deepest_pos = pos; + return_vec.clear(); + return_vec.append(&mut deepest_errors); + } else if pos == deepest_pos { + return_vec.append(&mut deepest_errors); + } + } + (deepest_pos, return_vec) + } + } +} diff --git a/nemo/src/parser/input.rs b/nemo/src/parser/input.rs new file mode 100644 index 000000000..728af9e66 --- /dev/null +++ b/nemo/src/parser/input.rs @@ -0,0 +1,231 @@ +//! This module defines [ParseInput]. + +use std::str::{CharIndices, Chars}; + +use nom::{ + error::ErrorKind, AsBytes, IResult, InputIter, InputLength, InputTake, InputTakeAtPosition, +}; +use nom_locate::LocatedSpan; + +use super::{span::ProgramSpan, ParserState}; + +/// Input to a nom parser function +#[derive(Debug, Clone)] +pub struct ParserInput<'a> { + pub(crate) span: ProgramSpan<'a>, + pub(crate) state: ParserState, +} + +impl<'a> ParserInput<'a> { + /// Create a new [ParserInput] from a string slice. + pub fn new(input: &'a str, state: ParserState) -> Self { + Self { + span: ProgramSpan(LocatedSpan::new(input)), + state, + } + } +} + +impl<'a> AsBytes for ParserInput<'a> { + fn as_bytes(&self) -> &[u8] { + self.span.0.fragment().as_bytes() + } +} + +impl<'a> nom::Compare> for ParserInput<'a> { + fn compare(&self, t: ParserInput) -> nom::CompareResult { + self.span.0.compare(t.as_bytes()) + } + + fn compare_no_case(&self, t: ParserInput) -> nom::CompareResult { + self.span.0.compare_no_case(t.as_bytes()) + } +} + +impl<'a> nom::Compare<&str> for ParserInput<'a> { + fn compare(&self, t: &str) -> nom::CompareResult { + self.span.0.compare(t) + } + + fn compare_no_case(&self, t: &str) -> nom::CompareResult { + self.span.0.compare_no_case(t) + } +} + +impl<'a> nom::ExtendInto for ParserInput<'a> { + type Item = char; + + type Extender = String; + + fn new_builder(&self) -> Self::Extender { + self.span.0.new_builder() + } + + fn extend_into(&self, acc: &mut Self::Extender) { + self.span.0.extend_into(acc) + } +} + +impl<'a> nom::FindSubstring<&str> for ParserInput<'a> { + fn find_substring(&self, substr: &str) -> Option { + self.span.0.find_substring(substr) + } +} + +impl<'a> InputLength for ParserInput<'a> { + fn input_len(&self) -> usize { + self.span.0.input_len() + } +} + +impl<'a> InputIter for ParserInput<'a> { + type Item = char; + type Iter = CharIndices<'a>; + type IterElem = Chars<'a>; + + fn iter_indices(&self) -> Self::Iter { + self.span.0.iter_indices() + } + + fn iter_elements(&self) -> Self::IterElem { + self.span.0.iter_elements() + } + + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + self.span.0.position(predicate) + } + + fn slice_index(&self, count: usize) -> Result { + self.span.0.slice_index(count) + } +} + +impl InputTake for ParserInput<'_> { + fn take(&self, count: usize) -> Self { + Self { + span: ProgramSpan(self.span.0.take(count)), + state: self.state.clone(), + } + } + + fn take_split(&self, count: usize) -> (Self, Self) { + let (first, second) = self.span.0.take_split(count); + ( + Self { + span: ProgramSpan(first), + state: self.state.clone(), + }, + Self { + span: ProgramSpan(second), + state: self.state.clone(), + }, + ) + } +} + +impl InputTakeAtPosition for ParserInput<'_> { + type Item = char; + + fn split_at_position>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self.span.0.position(predicate) { + Some(n) => Ok(self.take_split(n)), + None => Err(nom::Err::Incomplete(nom::Needed::new(1))), + } + } + + fn split_at_position1>( + &self, + _predicate: P, + _e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + // self.input.0.split_at_position1(predicate, e) + todo!() + } + + fn split_at_position_complete>( + &self, + predicate: P, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self.split_at_position(predicate) { + Err(nom::Err::Incomplete(_)) => Ok(self.take_split(self.input_len())), + res => res, + } + } + + fn split_at_position1_complete>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult + where + P: Fn(Self::Item) -> bool, + { + match self.span.0.fragment().position(predicate) { + Some(0) => Err(nom::Err::Error(E::from_error_kind(self.clone(), e))), + Some(n) => Ok(self.take_split(n)), + None => { + if self.span.0.fragment().input_len() == 0 { + Err(nom::Err::Error(E::from_error_kind(self.clone(), e))) + } else { + Ok(self.take_split(self.input_len())) + } + } + } + } +} + +impl nom::Offset for ParserInput<'_> { + fn offset(&self, second: &Self) -> usize { + self.span.0.offset(&second.span.0) + } +} + +impl nom::ParseTo for ParserInput<'_> { + fn parse_to(&self) -> Option { + todo!() + } +} + +impl<'a, R> nom::Slice for ParserInput<'a> +where + &'a str: nom::Slice, +{ + fn slice(&self, range: R) -> Self { + ParserInput { + span: ProgramSpan(self.span.0.slice(range)), + state: self.state.clone(), + } + } +} + +impl nom_greedyerror::Position for ParserInput<'_> { + fn position(&self) -> usize { + nom_greedyerror::Position::position(&self.span.0) + } +} + +impl std::fmt::Display for ParserInput<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "line {}, column {}", + self.span.0.location_line(), + self.span.0.get_utf8_column() + ) + } +} diff --git a/nemo/src/parser/lsp.rs b/nemo/src/parser/lsp.rs new file mode 100644 index 000000000..434a10e60 --- /dev/null +++ b/nemo/src/parser/lsp.rs @@ -0,0 +1,37 @@ +//! This module defines traits and data structures +//! relating to the language server protocol support. +//! TODO: Document this better + +use tower_lsp::lsp_types::SymbolKind; + +use super::span::CharacterRange; + +/// An LSP Identifier +#[derive(Debug)] +pub struct LSPIdentifier { + identifier: String, + scope: String, +} + +/// Information about the symbol +#[derive(Debug)] +pub struct LSPSymbolInfo { + name: String, + kind: SymbolKind, +} + +/// Trait implemented by objects that correspond to +/// that correspond to objects identified by the LSP +pub trait LSPComponent { + /// Return a an [LSPIdentifier]. + /// + /// The identifier scope will scope this identifier up to any [`AstNode`] + /// that has an identifier that has this node's identifier scope as a prefix. + /// + /// This can be used to restict rename operations to be local, e.g. for variable idenfiers inside of rules. + fn identifier(&self) -> Option; + /// Return information about this symbol, e.g. for syntax highlighting + fn symbol_info(&self) -> Option; + /// Range of the part of the node that should be renamed or [None] if the node can not be renamed + fn range_renaming(&self) -> Option; +} diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs new file mode 100644 index 000000000..a34005875 --- /dev/null +++ b/nemo/src/parser/span.rs @@ -0,0 +1,124 @@ +//! This module defines data structures that mark spans of text in an input file. + +use std::ops::Range; + +use nom_locate::LocatedSpan; + +/// Locates a certain character within a file, +/// giving its offset, line and column number +#[derive(Debug, Clone, Copy, Eq)] +pub struct CharacterPosition { + /// Index of the character in the source file + pub offset: usize, + /// Line where the character occurs (starting with 1) + pub line: u32, + /// Column where the character occurs (starting with 1) + pub column: u32, +} + +impl CharacterPosition { + /// Return a one character range at this position + pub fn range(&self) -> Range { + self.offset..(self.offset + 1) + } +} + +// TODO: Remove this once error is cleaned up +impl Default for CharacterPosition { + fn default() -> Self { + Self { + offset: Default::default(), + line: Default::default(), + column: Default::default(), + } + } +} + +impl PartialEq for CharacterPosition { + fn eq(&self, other: &Self) -> bool { + self.offset == other.offset + } +} + +impl PartialOrd for CharacterPosition { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for CharacterPosition { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.offset.cmp(&other.offset) + } +} + +/// Describes a region of text with [CharacterPosition]s +#[derive(Debug, Clone, Copy)] +pub struct CharacterRange { + /// Start position + pub start: CharacterPosition, + /// End position + pub end: CharacterPosition, +} + +/// Maker for a region of text within a string slice +#[derive(Debug, Clone, Copy)] +pub struct ProgramSpan<'a>(pub(crate) LocatedSpan<&'a str>); + +impl<'a> From> for ProgramSpan<'a> { + fn from(value: LocatedSpan<&'a str>) -> Self { + Self(value) + } +} + +impl<'a> ProgramSpan<'a> { + /// Compute the [CharacterRange] for this region of text. + pub fn range(&self) -> CharacterRange { + let start = CharacterPosition { + offset: self.0.location_offset(), + line: self.0.location_line(), + column: u32::try_from(self.0.get_utf8_column()) + .expect("cannot convert column number to u32"), + }; + + let end_offset = start.offset + self.0.fragment().len(); + let end_line = start.line + + u32::try_from(self.0.fragment().lines().count() - 1) + .expect("cannot convert line number to u32"); + let end_column = if self.0.fragment().lines().count() > 1 { + u32::try_from( + 1 + self + .0 + .fragment() + .lines() + .last() + .expect("there is at least one line") + .len(), + ) + .expect("cannot convert column number to u32") + } else { + start.column + + u32::try_from(self.0.fragment().len()).expect("cannot convert text range to u32") + }; + + let end = CharacterPosition { + offset: end_offset, + line: end_line, + column: end_column, + }; + + CharacterRange { start, end } + } + + /// TODO: Description and Specify safety conditions + pub fn until_rest(&self, rest: &Self) -> Self { + unsafe { + Self(LocatedSpan::new_from_raw_offset( + self.0.location_offset(), + self.0.location_line(), + &self.0[..(rest.0.location_offset() - self.0.location_offset())], + (), + )) + } + } +} diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs index 37704a45f..3c07f0a46 100644 --- a/nemo/src/rule_model.rs +++ b/nemo/src/rule_model.rs @@ -6,6 +6,6 @@ pub mod util; pub(crate) mod origin; pub(crate) mod syntax; -pub mod component; +pub mod components; pub mod error; pub mod program; diff --git a/nemo/src/rule_model/component.rs b/nemo/src/rule_model/components.rs similarity index 84% rename from nemo/src/rule_model/component.rs rename to nemo/src/rule_model/components.rs index 272abf69b..3e7012063 100644 --- a/nemo/src/rule_model/component.rs +++ b/nemo/src/rule_model/components.rs @@ -14,7 +14,7 @@ use std::fmt::{Debug, Display}; use term::primitive::variable::Variable; -use super::{error::ProgramConstructionError, origin::Origin}; +use super::{error::ProgramValidationError, origin::Origin}; /// Name of a term #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -39,9 +39,9 @@ impl Display for Tag { } /// Trait implemented by objects that are part of the logical rule model of the nemo language. -pub trait ProgramComponent: Debug + Display + Clone + PartialEq + Eq { +pub trait ProgramComponent: Debug + Display { /// Construct this object from a string. - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized; @@ -54,13 +54,13 @@ pub trait ProgramComponent: Debug + Display + Clone + PartialEq + Eq { Self: Sized; /// Validate this component - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized; } /// Trait implemented by program components that allow iterating over [Variable]s -pub trait IteratableVariables { +pub trait IterableVariables { /// Return an iterator over all [Variable]s contained within this program component. fn variables<'a>(&'a self) -> Box + 'a>; diff --git a/nemo/src/rule_model/component/atom.rs b/nemo/src/rule_model/components/atom.rs similarity index 88% rename from nemo/src/rule_model/component/atom.rs rename to nemo/src/rule_model/components/atom.rs index 5bfc30246..0aa511497 100644 --- a/nemo/src/rule_model/component/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -2,11 +2,11 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; +use crate::rule_model::{error::ProgramValidationError, origin::Origin}; use super::{ term::{primitive::variable::Variable, Term}, - IteratableVariables, ProgramComponent, Tag, + IterableVariables, ProgramComponent, Tag, }; /// Atom @@ -36,7 +36,7 @@ macro_rules! atom { ($name:tt; $($tt:tt)*) => {{ let mut terms = Vec::new(); term_list!(terms; $($tt)*); - crate::rule_model::component::atom::Atom::new($name, terms) + crate::rule_model::components::atom::Atom::new($name, terms) }}; } @@ -93,7 +93,7 @@ impl Hash for Atom { } impl ProgramComponent for Atom { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -112,7 +112,7 @@ impl ProgramComponent for Atom { self } - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized, { @@ -128,7 +128,7 @@ impl ProgramComponent for Atom { } } -impl IteratableVariables for Atom { +impl IterableVariables for Atom { fn variables<'a>(&'a self) -> Box + 'a> { Box::new(self.terms.iter().flat_map(|term| term.variables())) } @@ -140,7 +140,7 @@ impl IteratableVariables for Atom { #[cfg(test)] mod test { - use crate::rule_model::component::{term::primitive::variable::Variable, IteratableVariables}; + use crate::rule_model::components::{term::primitive::variable::Variable, IterableVariables}; #[test] fn atom_basic() { diff --git a/nemo/src/rule_model/component/base.rs b/nemo/src/rule_model/components/base.rs similarity index 95% rename from nemo/src/rule_model/component/base.rs rename to nemo/src/rule_model/components/base.rs index ead9f7ac4..c3733f66c 100644 --- a/nemo/src/rule_model/component/base.rs +++ b/nemo/src/rule_model/components/base.rs @@ -45,7 +45,7 @@ impl Hash for Base { } impl ProgramComponent for Base { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -64,7 +64,7 @@ impl ProgramComponent for Base { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/fact.rs b/nemo/src/rule_model/components/fact.rs similarity index 97% rename from nemo/src/rule_model/component/fact.rs rename to nemo/src/rule_model/components/fact.rs index 64c883893..2f2cec62c 100644 --- a/nemo/src/rule_model/component/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -75,7 +75,7 @@ impl Hash for Fact { } impl ProgramComponent for Fact { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -94,7 +94,7 @@ impl ProgramComponent for Fact { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/import_export.rs b/nemo/src/rule_model/components/import_export.rs similarity index 97% rename from nemo/src/rule_model/component/import_export.rs rename to nemo/src/rule_model/components/import_export.rs index 3567aed3f..4214ff7b8 100644 --- a/nemo/src/rule_model/component/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -104,7 +104,7 @@ impl Display for ImportDirective { } impl ProgramComponent for ImportDirective { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -123,7 +123,7 @@ impl ProgramComponent for ImportDirective { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { @@ -176,7 +176,7 @@ impl Display for ExportDirective { } impl ProgramComponent for ExportDirective { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -195,7 +195,7 @@ impl ProgramComponent for ExportDirective { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/import_export/attributes.rs b/nemo/src/rule_model/components/import_export/attributes.rs similarity index 100% rename from nemo/src/rule_model/component/import_export/attributes.rs rename to nemo/src/rule_model/components/import_export/attributes.rs diff --git a/nemo/src/rule_model/component/import_export/compression.rs b/nemo/src/rule_model/components/import_export/compression.rs similarity index 100% rename from nemo/src/rule_model/component/import_export/compression.rs rename to nemo/src/rule_model/components/import_export/compression.rs diff --git a/nemo/src/rule_model/component/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs similarity index 96% rename from nemo/src/rule_model/component/import_export/file_formats.rs rename to nemo/src/rule_model/components/import_export/file_formats.rs index ba322188e..51db58466 100644 --- a/nemo/src/rule_model/component/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -6,7 +6,7 @@ use std::{collections::HashMap, fmt::Display}; use enum_assoc::Assoc; use crate::rule_model::{ - component::import_export::attributes::ImportExportAttribute, + components::import_export::attributes::ImportExportAttribute, syntax::import_export::file_formats, }; @@ -15,7 +15,7 @@ use crate::rule_model::{ pub(crate) enum AttributeRequirement { /// Attribute is required Required, - /// Attribute is optional + /// Attribute is optional and results in the provided default Optional, } diff --git a/nemo/src/rule_model/component/literal.rs b/nemo/src/rule_model/components/literal.rs similarity index 90% rename from nemo/src/rule_model/component/literal.rs rename to nemo/src/rule_model/components/literal.rs index dd94e3983..5b5d74868 100644 --- a/nemo/src/rule_model/component/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::error::ProgramConstructionError; +use crate::rule_model::error::ProgramValidationError; use super::{atom::Atom, term::operation::Operation, ProgramComponent}; @@ -32,7 +32,7 @@ impl Display for Literal { } impl ProgramComponent for Literal { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -58,7 +58,7 @@ impl ProgramComponent for Literal { } } - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/output.rs b/nemo/src/rule_model/components/output.rs similarity index 96% rename from nemo/src/rule_model/component/output.rs rename to nemo/src/rule_model/components/output.rs index dbb5665e5..b1dbb72fa 100644 --- a/nemo/src/rule_model/component/output.rs +++ b/nemo/src/rule_model/components/output.rs @@ -47,7 +47,7 @@ impl Hash for Output { } impl ProgramComponent for Output { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -66,7 +66,7 @@ impl ProgramComponent for Output { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/rule.rs b/nemo/src/rule_model/components/rule.rs similarity index 99% rename from nemo/src/rule_model/component/rule.rs rename to nemo/src/rule_model/components/rule.rs index 96ea1050d..0281c1e88 100644 --- a/nemo/src/rule_model/component/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -106,7 +106,7 @@ impl Hash for Rule { } impl ProgramComponent for Rule { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -125,7 +125,7 @@ impl ProgramComponent for Rule { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/term.rs b/nemo/src/rule_model/components/term.rs similarity index 95% rename from nemo/src/rule_model/component/term.rs rename to nemo/src/rule_model/components/term.rs index 0c06ec120..2134a5d6d 100644 --- a/nemo/src/rule_model/component/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -26,9 +26,9 @@ use primitive::{ }; use tuple::Tuple; -use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; +use crate::rule_model::{error::ProgramValidationError, origin::Origin}; -use super::{IteratableVariables, ProgramComponent}; +use super::{IterableVariables, ProgramComponent}; /// Term /// @@ -166,7 +166,7 @@ impl Display for Term { } impl ProgramComponent for Term { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -196,7 +196,7 @@ impl ProgramComponent for Term { } } - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized, { @@ -204,7 +204,7 @@ impl ProgramComponent for Term { } } -impl IteratableVariables for Term { +impl IterableVariables for Term { fn variables<'a>(&'a self) -> Box + 'a> { let mut iter_primitive = None; let mut iter_function = None; diff --git a/nemo/src/rule_model/component/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs similarity index 82% rename from nemo/src/rule_model/component/term/aggregate.rs rename to nemo/src/rule_model/components/term/aggregate.rs index 56ed4cb55..d1b1a8562 100644 --- a/nemo/src/rule_model/component/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -1,52 +1,56 @@ //! This module defines [Aggregate] +#![allow(missing_docs)] use std::{fmt::Display, hash::Hash}; +use enum_assoc::Assoc; +use strum_macros::EnumIter; + use crate::rule_model::{ - component::{IteratableVariables, ProgramComponent}, + components::{IterableVariables, ProgramComponent}, origin::Origin, + syntax::aggregates, }; use super::{primitive::variable::Variable, Term}; /// Aggregate operation on logical values -#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[derive(Assoc, EnumIter, Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] pub enum AggregateKind { /// Count of distinct values + #[assoc(name = aggregates::AGGREGATE_COUNT)] CountValues, /// Minimum numerical value + #[assoc(name = aggregates::AGGREGATE_MIN)] MinNumber, /// Maximum numerical value + #[assoc(name = aggregates::AGGREGATE_MAX)] MaxNumber, /// Sum of numerical values + #[assoc(name = aggregates::AGGREGATE_SUM)] SumOfNumbers, } impl Display for AggregateKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let name = match self { - AggregateKind::CountValues => "count", - AggregateKind::MinNumber => "min", - AggregateKind::MaxNumber => "max", - AggregateKind::SumOfNumbers => "sum", - }; - - f.write_fmt(format_args!("#{}", name)) + f.write_fmt(format_args!("#{}", self.name())) } } /// Aggregate /// -/// Function that performs a computatin over a set of [Term]s +/// Function that performs a computation over a set of [Term]s /// and returns a single value. #[derive(Debug, Clone, Eq)] pub struct Aggregate { /// Origin of this component origin: Origin, - /// Type of aggrgate operation + /// Type of aggregate operation kind: AggregateKind, - /// Expression over which to aggragte + /// Expression over which to aggregate aggregate: Term, /// Distinct variables distinct: Vec, @@ -133,7 +137,7 @@ impl Hash for Aggregate { } impl ProgramComponent for Aggregate { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -152,7 +156,7 @@ impl ProgramComponent for Aggregate { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { @@ -160,7 +164,7 @@ impl ProgramComponent for Aggregate { } } -impl IteratableVariables for Aggregate { +impl IterableVariables for Aggregate { fn variables<'a>(&'a self) -> Box + 'a> { Box::new(self.aggregate.variables().chain(self.distinct.iter())) } diff --git a/nemo/src/rule_model/component/term/function.rs b/nemo/src/rule_model/components/term/function.rs similarity index 89% rename from nemo/src/rule_model/component/term/function.rs rename to nemo/src/rule_model/components/term/function.rs index 392cd8430..8bfe3ec86 100644 --- a/nemo/src/rule_model/component/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -3,8 +3,8 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - component::{IteratableVariables, ProgramComponent, Tag}, - error::ProgramConstructionError, + components::{IterableVariables, ProgramComponent, Tag}, + error::ProgramValidationError, origin::Origin, }; @@ -35,7 +35,7 @@ macro_rules! function { ($name:tt; $($tt:tt)*) => {{ let mut terms = Vec::new(); term_list!(terms; $($tt)*); - crate::rule_model::component::term::function::FunctionTerm::new($name,terms) + crate::rule_model::components::term::function::FunctionTerm::new($name,terms) }}; } @@ -100,7 +100,7 @@ impl Hash for FunctionTerm { } impl ProgramComponent for FunctionTerm { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -119,7 +119,7 @@ impl ProgramComponent for FunctionTerm { self } - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized, { @@ -135,7 +135,7 @@ impl ProgramComponent for FunctionTerm { } } -impl IteratableVariables for FunctionTerm { +impl IterableVariables for FunctionTerm { fn variables<'a>(&'a self) -> Box + 'a> { Box::new(self.terms.iter().flat_map(|term| term.variables())) } @@ -147,7 +147,7 @@ impl IteratableVariables for FunctionTerm { #[cfg(test)] mod test { - use crate::rule_model::component::{term::primitive::variable::Variable, IteratableVariables}; + use crate::rule_model::components::{term::primitive::variable::Variable, IterableVariables}; #[test] fn function_basic() { diff --git a/nemo/src/rule_model/component/term/map.rs b/nemo/src/rule_model/components/term/map.rs similarity index 95% rename from nemo/src/rule_model/component/term/map.rs rename to nemo/src/rule_model/components/term/map.rs index c78dd579f..9a755590e 100644 --- a/nemo/src/rule_model/component/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - component::{IteratableVariables, ProgramComponent, Tag}, + components::{IterableVariables, ProgramComponent, Tag}, origin::Origin, }; @@ -84,7 +84,7 @@ impl Hash for Map { } impl ProgramComponent for Map { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -103,7 +103,7 @@ impl ProgramComponent for Map { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { @@ -111,7 +111,7 @@ impl ProgramComponent for Map { } } -impl IteratableVariables for Map { +impl IterableVariables for Map { fn variables<'a>(&'a self) -> Box + 'a> { Box::new( self.map diff --git a/nemo/src/rule_model/component/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs similarity index 97% rename from nemo/src/rule_model/component/term/operation.rs rename to nemo/src/rule_model/components/term/operation.rs index 7a6474fd2..685ff15d5 100644 --- a/nemo/src/rule_model/component/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -7,7 +7,7 @@ use std::{fmt::Display, hash::Hash}; use operation_kind::OperationKind; use crate::rule_model::{ - component::{IteratableVariables, ProgramComponent}, + components::{IterableVariables, ProgramComponent}, origin::Origin, }; @@ -157,7 +157,7 @@ impl Hash for Operation { } impl ProgramComponent for Operation { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -176,7 +176,7 @@ impl ProgramComponent for Operation { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { @@ -184,7 +184,7 @@ impl ProgramComponent for Operation { } } -impl IteratableVariables for Operation { +impl IterableVariables for Operation { fn variables<'a>(&'a self) -> Box + 'a> { Box::new(self.subterms.iter().flat_map(|term| term.variables())) } diff --git a/nemo/src/rule_model/component/term/operation/operation_kind.rs b/nemo/src/rule_model/components/term/operation/operation_kind.rs similarity index 99% rename from nemo/src/rule_model/component/term/operation/operation_kind.rs rename to nemo/src/rule_model/components/term/operation/operation_kind.rs index 2e9032bac..ea48ad193 100644 --- a/nemo/src/rule_model/component/term/operation/operation_kind.rs +++ b/nemo/src/rule_model/components/term/operation/operation_kind.rs @@ -4,6 +4,7 @@ use std::fmt::Display; use enum_assoc::Assoc; +use strum_macros::EnumIter; use crate::rule_model::syntax::builtins; @@ -38,7 +39,7 @@ impl OperationNumArguments { } /// Supported operations -#[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] +#[derive(Assoc, EnumIter, Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] #[func(pub fn name(&self) -> &'static str)] #[func(pub fn from_name(name: &str) -> Option)] #[func(pub fn num_arguments(&self) -> OperationNumArguments)] diff --git a/nemo/src/rule_model/component/term/primitive.rs b/nemo/src/rule_model/components/term/primitive.rs similarity index 96% rename from nemo/src/rule_model/component/term/primitive.rs rename to nemo/src/rule_model/components/term/primitive.rs index 846870004..0f3e9f34a 100644 --- a/nemo/src/rule_model/component/term/primitive.rs +++ b/nemo/src/rule_model/components/term/primitive.rs @@ -10,7 +10,7 @@ use nemo_physical::datavalues::AnyDataValue; use variable::{existential::ExistentialVariable, universal::UniversalVariable, Variable}; use crate::rule_model::{ - component::{IteratableVariables, ProgramComponent}, + components::{IterableVariables, ProgramComponent}, origin::Origin, }; @@ -103,7 +103,7 @@ impl Display for Primitive { } impl ProgramComponent for Primitive { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -127,7 +127,7 @@ impl ProgramComponent for Primitive { } } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { @@ -138,7 +138,7 @@ impl ProgramComponent for Primitive { } } -impl IteratableVariables for Primitive { +impl IterableVariables for Primitive { fn variables<'a>(&'a self) -> Box + 'a> { Box::new( match self { diff --git a/nemo/src/rule_model/component/term/primitive/ground.rs b/nemo/src/rule_model/components/term/primitive/ground.rs similarity index 92% rename from nemo/src/rule_model/component/term/primitive/ground.rs rename to nemo/src/rule_model/components/term/primitive/ground.rs index 9dbdbebe8..9fa365bac 100644 --- a/nemo/src/rule_model/component/term/primitive/ground.rs +++ b/nemo/src/rule_model/components/term/primitive/ground.rs @@ -5,7 +5,7 @@ use std::{fmt::Display, hash::Hash}; use nemo_physical::datavalues::{AnyDataValue, IriDataValue}; use crate::rule_model::{ - component::ProgramComponent, error::ProgramConstructionError, origin::Origin, + components::ProgramComponent, error::ProgramValidationError, origin::Origin, }; /// Primitive ground term @@ -97,7 +97,7 @@ impl Hash for GroundTerm { } impl ProgramComponent for GroundTerm { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -116,7 +116,7 @@ impl ProgramComponent for GroundTerm { self } - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs similarity index 93% rename from nemo/src/rule_model/component/term/primitive/variable.rs rename to nemo/src/rule_model/components/term/primitive/variable.rs index 7f0dcaaed..9e4e580a8 100644 --- a/nemo/src/rule_model/component/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -5,7 +5,7 @@ use std::fmt::Display; use existential::ExistentialVariable; use universal::UniversalVariable; -use crate::rule_model::{error::ProgramConstructionError, origin::Origin}; +use crate::rule_model::{error::ProgramValidationError, origin::Origin}; use super::ProgramComponent; @@ -93,7 +93,7 @@ impl Display for Variable { } impl ProgramComponent for Variable { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -117,7 +117,7 @@ impl ProgramComponent for Variable { } } - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/term/primitive/variable/existential.rs b/nemo/src/rule_model/components/term/primitive/variable/existential.rs similarity index 88% rename from nemo/src/rule_model/component/term/primitive/variable/existential.rs rename to nemo/src/rule_model/components/term/primitive/variable/existential.rs index e33f8958a..7213cddfb 100644 --- a/nemo/src/rule_model/component/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/existential.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - component::ProgramComponent, error::ProgramConstructionError, origin::Origin, + components::ProgramComponent, error::ProgramValidationError, origin::Origin, }; use super::VariableName; @@ -60,7 +60,7 @@ impl Hash for ExistentialVariable { } impl ProgramComponent for ExistentialVariable { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -79,7 +79,7 @@ impl ProgramComponent for ExistentialVariable { self } - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/term/primitive/variable/universal.rs b/nemo/src/rule_model/components/term/primitive/variable/universal.rs similarity index 91% rename from nemo/src/rule_model/component/term/primitive/variable/universal.rs rename to nemo/src/rule_model/components/term/primitive/variable/universal.rs index 90fd21f2b..a510e6b12 100644 --- a/nemo/src/rule_model/component/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/universal.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - component::ProgramComponent, error::ProgramConstructionError, origin::Origin, + components::ProgramComponent, error::ProgramValidationError, origin::Origin, }; use super::VariableName; @@ -83,7 +83,7 @@ impl Hash for UniversalVariable { } impl ProgramComponent for UniversalVariable { - fn parse(_string: &str) -> Result { + fn parse(_string: &str) -> Result { todo!() } @@ -99,7 +99,7 @@ impl ProgramComponent for UniversalVariable { self } - fn validate(&self) -> Result<(), ProgramConstructionError> + fn validate(&self) -> Result<(), ProgramValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/component/term/tuple.rs b/nemo/src/rule_model/components/term/tuple.rs similarity index 90% rename from nemo/src/rule_model/component/term/tuple.rs rename to nemo/src/rule_model/components/term/tuple.rs index b10147483..21e373066 100644 --- a/nemo/src/rule_model/component/term/tuple.rs +++ b/nemo/src/rule_model/components/term/tuple.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - component::{IteratableVariables, ProgramComponent}, + components::{IterableVariables, ProgramComponent}, origin::Origin, }; @@ -32,7 +32,7 @@ macro_rules! tuple { ($($tt:tt)*) => {{ let mut terms = Vec::new(); term_list!(terms; $($tt)*); - crate::rule_model::component::term::tuple::Tuple::new(terms) + crate::rule_model::components::term::tuple::Tuple::new(terms) }}; } @@ -81,7 +81,7 @@ impl PartialOrd for Tuple { } impl ProgramComponent for Tuple { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -100,7 +100,7 @@ impl ProgramComponent for Tuple { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramConstructionError> + fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> where Self: Sized, { @@ -108,7 +108,7 @@ impl ProgramComponent for Tuple { } } -impl IteratableVariables for Tuple { +impl IterableVariables for Tuple { fn variables<'a>(&'a self) -> Box + 'a> { Box::new(self.terms.iter().flat_map(|term| term.variables())) } @@ -120,7 +120,7 @@ impl IteratableVariables for Tuple { #[cfg(test)] mod test { - use crate::rule_model::component::{term::primitive::variable::Variable, IteratableVariables}; + use crate::rule_model::components::{term::primitive::variable::Variable, IterableVariables}; #[test] fn tuple_basic() { diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index f9a0ec9aa..ace65cbb1 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -1,55 +1,28 @@ -//! This module defines [ProgramConstructionError] +//! This module defines [ProgramValidationError] -use thiserror::Error; +pub mod translation_error; +pub mod validation_error; -use super::component::term::{aggregate::Aggregate, primitive::variable::Variable, Term}; +use std::fmt::Display; -/// Error returned during the construction of objects from nemo's logical rule model -#[derive(Error, Debug)] -pub enum ProgramConstructionError { - /// An existentially quantified variable occurs in the body of a rule. - #[error(r#"existential variable used in rule body: `{0}`"#)] - BodyExistential(Variable), - /// Unsafe variable used in the head of the rule. - #[error(r#"unsafe variable used in rule head: `{0}`"#)] - HeadUnsafe(Variable), - /// Anonymous variable used in the head of the rule. - #[error(r#"anonymous variable used in rule head"#)] - HeadAnonymous, - /// Operation with unsafe variable - #[error(r#"unsafe variable used in computation: `{0}`"#)] - OperationUnsafe(Variable), - /// Unsafe variable used in multiple negative literals - #[error(r#"unsafe variable used in multiple negative literals: `{0}`"#)] - MultipleNegativeLiteralsUnsafe(Variable), - /// Aggregate is used in body - #[error(r#"aggregate used in rule body: `{0}`"#)] - BodyAggregate(Aggregate), - /// Unsupported feature: Multiple aggregates in one rule - #[error(r#"multiple aggregates in one rule is currently unsupported"#)] - AggregateMultiple, - /// Unsupported feature: Aggregates combined with existential rules - #[error(r#"aggregates and existential variables in one rule is currently unsupported"#)] - AggregatesAndExistentials, - /// A variable is both universally and existentially quantified - #[error(r#"variable is both universal and existential: `{0}`"#)] - VariableMultipleQuantifiers(String), - /// Fact contains non-ground term - #[error(r#"non-ground term used in fact: `{0}`"#)] - FactNonGround(Term), - /// Atom used without any arguments - #[error(r#"atoms without arguments are currently unsupported"#)] - AtomNoArguments, - /// Non-primitive terms are currently unsupported - #[error(r#"complex terms are currently unsupported"#)] - ComplexTerm, - /// Invalid variable name was used - #[error(r#"variable name is invalid: `{0}`"#)] - InvalidVariableName(String), - /// Invalid tag was used - #[error(r#"tag is invalid: `{0}`"#)] - InvalidTermTag(String), - /// Invalid predicate name was used - #[error(r#"predicate name is invalid: `{0}"#)] - InvalidPredicateName(String), +use validation_error::ValidationErrorKind; + +use super::components::ProgramComponent; + +/// Error that occurs during validation of a program. +#[derive(Debug)] +pub struct ProgramValidationError { + /// The kind of error + kind: ValidationErrorKind, + /// stack of components in which the error occurred + context: Vec>, } + +impl Display for ProgramValidationError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.kind) + } +} + +// #[derive(Debug)] +// pub struct ProgramValidationErrors {} diff --git a/nemo/src/rule_model/error/translation_error.rs b/nemo/src/rule_model/error/translation_error.rs new file mode 100644 index 000000000..bc0eeb824 --- /dev/null +++ b/nemo/src/rule_model/error/translation_error.rs @@ -0,0 +1,9 @@ +//! This module defines [TranslationErrorKind] + +use thiserror::Error; + +/// Types of errors that occur +/// while translating the ASP representation of a nemo program +/// into its logical representation. +#[derive(Error, Debug, Copy, Clone)] +pub enum TranslationErrorKind {} diff --git a/nemo/src/rule_model/error/validation_error.rs b/nemo/src/rule_model/error/validation_error.rs new file mode 100644 index 000000000..1861c15e4 --- /dev/null +++ b/nemo/src/rule_model/error/validation_error.rs @@ -0,0 +1,57 @@ +//! This module defines [ValidationErrorKind]. + +use thiserror::Error; + +use crate::rule_model::components::term::{ + aggregate::Aggregate, primitive::variable::Variable, Term, +}; + +/// Types of errors that occur while building the logical rule model +#[derive(Error, Debug)] +pub enum ValidationErrorKind { + /// An existentially quantified variable occurs in the body of a rule. + #[error(r#"existential variable used in rule body: `{0}`"#)] + BodyExistential(Variable), + /// Unsafe variable used in the head of the rule. + #[error(r#"unsafe variable used in rule head: `{0}`"#)] + HeadUnsafe(Variable), + /// Anonymous variable used in the head of the rule. + #[error(r#"anonymous variable used in rule head"#)] + HeadAnonymous, + /// Operation with unsafe variable + #[error(r#"unsafe variable used in computation: `{0}`"#)] + OperationUnsafe(Variable), + /// Unsafe variable used in multiple negative literals + #[error(r#"unsafe variable used in multiple negative literals: `{0}`"#)] + MultipleNegativeLiteralsUnsafe(Variable), + /// Aggregate is used in body + #[error(r#"aggregate used in rule body: `{0}`"#)] + BodyAggregate(Aggregate), + /// Unsupported feature: Multiple aggregates in one rule + #[error(r#"multiple aggregates in one rule is currently unsupported"#)] + AggregateMultiple, + /// Unsupported feature: Aggregates combined with existential rules + #[error(r#"aggregates and existential variables in one rule is currently unsupported"#)] + AggregatesAndExistentials, + /// A variable is both universally and existentially quantified + #[error(r#"variable is both universal and existential: `{0}`"#)] + VariableMultipleQuantifiers(String), + /// Fact contains non-ground term + #[error(r#"non-ground term used in fact: `{0}`"#)] + FactNonGround(Term), + /// Atom used without any arguments + #[error(r#"atoms without arguments are currently unsupported"#)] + AtomNoArguments, + /// Non-primitive terms are currently unsupported + #[error(r#"complex terms are currently unsupported"#)] + ComplexTerm, + /// Invalid variable name was used + #[error(r#"variable name is invalid: `{0}`"#)] + InvalidVariableName(String), + /// Invalid tag was used + #[error(r#"tag is invalid: `{0}`"#)] + InvalidTermTag(String), + /// Invalid predicate name was used + #[error(r#"predicate name is invalid: `{0}"#)] + InvalidPredicateName(String), +} diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 95fdcbf80..fd6cfe90a 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -2,10 +2,10 @@ use nemo_physical::datavalues::AnyDataValue; -use crate::{io::parser::ast, rule_model::component::term::tuple::Tuple}; +use crate::{io::parser::ast, rule_model::components::term::tuple::Tuple}; use super::{ - component::{ + components::{ atom::Atom, base::Base, fact::Fact, @@ -34,8 +34,6 @@ pub struct Program { rules: Vec, /// Facts facts: Vec, - /// Base - base: Option, /// Outputs outputs: Vec, } @@ -251,8 +249,8 @@ impl Program { fn ast_build_directive(&mut self, directive: &ast::directive::Directive) { match directive { - ast::directive::Directive::Base { base_iri, .. } => { - self.base = Some(Base::new(base_iri.to_string())); + ast::directive::Directive::Base { base_iri: _, .. } => { + // self.base = Some(Base::new(base_iri.to_string())); // TODO: Set origin } ast::directive::Directive::Prefix { diff --git a/nemo/src/rule_model/syntax.rs b/nemo/src/rule_model/syntax.rs index 1ad8c3b7e..af21b4583 100644 --- a/nemo/src/rule_model/syntax.rs +++ b/nemo/src/rule_model/syntax.rs @@ -2,5 +2,6 @@ //! These are kept in one location, since they are required in various //! places related to parsing and display. +pub(crate) mod aggregates; pub(crate) mod builtins; pub(crate) mod import_export; diff --git a/nemo/src/rule_model/syntax/aggregates.rs b/nemo/src/rule_model/syntax/aggregates.rs new file mode 100644 index 000000000..262b6b540 --- /dev/null +++ b/nemo/src/rule_model/syntax/aggregates.rs @@ -0,0 +1,10 @@ +//! This module contains constants relating to aggregate names. + +/// Compute the sum of a list of numbers +pub(crate) const AGGREGATE_SUM: &str = "sum"; +/// Count the number of values +pub(crate) const AGGREGATE_COUNT: &str = "count"; +/// Return the minimum value +pub(crate) const AGGREGATE_MIN: &str = "min"; +/// Return the maximum value +pub(crate) const AGGREGATE_MAX: &str = "max"; diff --git a/nemo/src/rule_model/util.rs b/nemo/src/rule_model/util.rs index e9bb7d261..02133c880 100644 --- a/nemo/src/rule_model/util.rs +++ b/nemo/src/rule_model/util.rs @@ -7,26 +7,26 @@ macro_rules! term_list { () => {}; // Match a single universally quantified variable ($terms:ident; ? $var:ident) => { - $terms.push(crate::rule_model::component::term::Term::universal_variable(stringify!($var))); + $terms.push(crate::rule_model::components::term::Term::universal_variable(stringify!($var))); }; // Match universally quantified variables ($terms:ident; ? $var:ident, $($others:tt)* ) => { - $terms.push(crate::rule_model::component::term::Term::universal_variable(stringify!($var))); term_list!($terms; $($others)*) + $terms.push(crate::rule_model::components::term::Term::universal_variable(stringify!($var))); term_list!($terms; $($others)*) }; // Match a single existentially quantified variable ($terms:ident; ! $var:ident) => { - $terms.push(crate::rule_model::component::term::Term::existential_variable(stringify!($var))); + $terms.push(crate::rule_model::components::term::Term::existential_variable(stringify!($var))); }; // Match existentially quantified variables ($terms:ident; ! $var:ident, $($others:tt)* ) => { - $terms.push(crate::rule_model::component::term::Term::existential_variable(stringify!($var))); term_list!($terms; $($others)*) + $terms.push(crate::rule_model::components::term::Term::existential_variable(stringify!($var))); term_list!($terms; $($others)*) }; // Match a single occurence of anything ($terms:ident; $e:tt) => { - $terms.push(crate::rule_model::component::term::Term::from($e)); + $terms.push(crate::rule_model::components::term::Term::from($e)); }; // Match a list of anything ($terms:ident; $e:tt, $($others:tt)* ) => { - $terms.push(crate::rule_model::component::term::Term::from($e)); term_list!($terms; $($others)*) + $terms.push(crate::rule_model::components::term::Term::from($e)); term_list!($terms; $($others)*) }; } From 154b7f3c81de19b7496e5eaecf523c966782c454 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 16 Jul 2024 21:40:21 +0200 Subject: [PATCH 125/214] Remake parser for rules --- nemo/src/parser/ast.rs | 4 +- nemo/src/parser/ast/expression.rs | 139 ++++++++++++++++ nemo/src/parser/ast/{ => expression}/basic.rs | 6 + nemo/src/parser/ast/expression/basic/blank.rs | 92 +++++++++++ .../parser/ast/expression/basic/boolean.rs | 112 +++++++++++++ nemo/src/parser/ast/expression/basic/iri.rs | 91 +++++++++++ .../ast/{ => expression}/basic/number.rs | 2 +- .../ast/expression/basic/rdf_literal.rs | 110 +++++++++++++ .../src/parser/ast/expression/basic/string.rs | 116 ++++++++++++++ .../parser/ast/expression/basic/variable.rs | 149 ++++++++++++++++++ nemo/src/parser/ast/expression/complex.rs | 4 + .../src/parser/ast/expression/complex/atom.rs | 121 ++++++++++++++ .../parser/ast/expression/complex/tuple.rs | 114 ++++++++++++++ nemo/src/parser/ast/expression/sequence.rs | 5 + .../ast/expression/sequence/key_value.rs | 1 + .../src/parser/ast/expression/sequence/one.rs | 129 +++++++++++++++ .../parser/ast/expression/sequence/simple.rs | 101 ++++++++++++ nemo/src/parser/ast/program.rs | 10 +- nemo/src/parser/ast/rule.rs | 119 ++++++++++++++ nemo/src/parser/ast/tag.rs | 69 ++++++++ nemo/src/parser/ast/token.rs | 70 ++++++-- nemo/src/parser/context.rs | 30 ++++ nemo/src/rule_model/components.rs | 6 +- nemo/src/rule_model/components/atom.rs | 6 +- nemo/src/rule_model/components/base.rs | 4 +- nemo/src/rule_model/components/fact.rs | 4 +- .../rule_model/components/import_export.rs | 8 +- nemo/src/rule_model/components/literal.rs | 6 +- nemo/src/rule_model/components/output.rs | 4 +- nemo/src/rule_model/components/rule.rs | 4 +- nemo/src/rule_model/components/term.rs | 6 +- .../rule_model/components/term/aggregate.rs | 4 +- .../rule_model/components/term/function.rs | 6 +- nemo/src/rule_model/components/term/map.rs | 4 +- .../rule_model/components/term/operation.rs | 4 +- .../rule_model/components/term/primitive.rs | 4 +- .../components/term/primitive/ground.rs | 6 +- .../components/term/primitive/variable.rs | 6 +- .../term/primitive/variable/existential.rs | 8 +- .../term/primitive/variable/universal.rs | 8 +- nemo/src/rule_model/components/term/tuple.rs | 4 +- nemo/src/rule_model/error.rs | 12 +- .../src/rule_model/error/translation_error.rs | 6 +- 43 files changed, 1635 insertions(+), 79 deletions(-) create mode 100644 nemo/src/parser/ast/expression.rs rename nemo/src/parser/ast/{ => expression}/basic.rs (50%) create mode 100644 nemo/src/parser/ast/expression/basic/blank.rs create mode 100644 nemo/src/parser/ast/expression/basic/boolean.rs create mode 100644 nemo/src/parser/ast/expression/basic/iri.rs rename nemo/src/parser/ast/{ => expression}/basic/number.rs (98%) create mode 100644 nemo/src/parser/ast/expression/basic/rdf_literal.rs create mode 100644 nemo/src/parser/ast/expression/basic/string.rs create mode 100644 nemo/src/parser/ast/expression/basic/variable.rs create mode 100644 nemo/src/parser/ast/expression/complex.rs create mode 100644 nemo/src/parser/ast/expression/complex/atom.rs create mode 100644 nemo/src/parser/ast/expression/complex/tuple.rs create mode 100644 nemo/src/parser/ast/expression/sequence.rs create mode 100644 nemo/src/parser/ast/expression/sequence/key_value.rs create mode 100644 nemo/src/parser/ast/expression/sequence/one.rs create mode 100644 nemo/src/parser/ast/expression/sequence/simple.rs create mode 100644 nemo/src/parser/ast/rule.rs create mode 100644 nemo/src/parser/ast/tag.rs diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 5b46908bb..edd85be28 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -1,7 +1,9 @@ //! This module defines the abstract syntax tree representation of a nemo program. -pub mod basic; +pub mod expression; pub mod program; +pub mod rule; +pub mod tag; pub mod token; use super::{span::ProgramSpan, ParserInput, ParserResult}; diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs new file mode 100644 index 000000000..b3274a948 --- /dev/null +++ b/nemo/src/parser/ast/expression.rs @@ -0,0 +1,139 @@ +//! This module defines [Expression]. + +pub mod basic; +pub mod complex; +pub mod sequence; + +use basic::{ + blank::Blank, boolean::Boolean, iri::Iri, number::Number, rdf_literal::RdfLiteral, + string::StringLiteral, variable::Variable, +}; +use complex::{atom::Atom, tuple::Tuple}; +use nom::{branch::alt, combinator::map}; + +use crate::parser::{ + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::ProgramAST; + +/// An expression that is the building block of rules. +#[derive(Debug)] +pub enum Expression<'a> { + /// Atom + Atom(Atom<'a>), + /// Blank + Blank(Blank<'a>), + /// Boolean + Boolean(Boolean<'a>), + /// Iri + Iri(Iri<'a>), + /// Number + Number(Number<'a>), + /// Rdf literal + RdfLiteral(RdfLiteral<'a>), + /// String + String(StringLiteral<'a>), + /// Tuple + Tuple(Tuple<'a>), + /// Variable + Variable(Variable<'a>), +} + +impl<'a> ProgramAST<'a> for Expression<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + match self { + Expression::Atom(expression) => expression.children(), + Expression::Blank(expression) => expression.children(), + Expression::Boolean(expression) => expression.children(), + Expression::Iri(expression) => expression.children(), + Expression::Number(expression) => expression.children(), + Expression::RdfLiteral(expression) => expression.children(), + Expression::String(expression) => expression.children(), + Expression::Tuple(expression) => expression.children(), + Expression::Variable(expression) => expression.children(), + } + } + + fn span(&self) -> ProgramSpan { + match self { + Expression::Atom(expression) => expression.span(), + Expression::Blank(expression) => expression.span(), + Expression::Boolean(expression) => expression.span(), + Expression::Iri(expression) => expression.span(), + Expression::Number(expression) => expression.span(), + Expression::RdfLiteral(expression) => expression.span(), + Expression::String(expression) => expression.span(), + Expression::Tuple(expression) => expression.span(), + Expression::Variable(expression) => expression.span(), + } + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + context( + ParserContext::Expression, + alt(( + map(Atom::parse, Self::Atom), + map(Tuple::parse, Self::Tuple), + map(Blank::parse, Self::Blank), + map(Boolean::parse, Self::Boolean), + map(Iri::parse, Self::Iri), + map(Number::parse, Self::Number), + map(RdfLiteral::parse, Self::RdfLiteral), + map(StringLiteral::parse, Self::String), + map(Variable::parse, Self::Variable), + )), + )(input) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::Expression, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_expression_tuple() { + let test = vec!["(1,2)"]; + + for input in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Expression::parse)(parser_input); + + println!("{:?}", result); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert!(matches!(result.1, Expression::Tuple(_))); + } + } + + #[test] + fn parse_expression_atom() { + let test = vec!["abc(1,2)"]; + + for input in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Expression::parse)(parser_input); + + println!("{:?}", result); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert!(matches!(result.1, Expression::Atom(_))); + } + } +} diff --git a/nemo/src/parser/ast/basic.rs b/nemo/src/parser/ast/expression/basic.rs similarity index 50% rename from nemo/src/parser/ast/basic.rs rename to nemo/src/parser/ast/expression/basic.rs index febed1d1c..6a3a710b3 100644 --- a/nemo/src/parser/ast/basic.rs +++ b/nemo/src/parser/ast/expression/basic.rs @@ -1,3 +1,9 @@ //! This module defines ast nodes for simple building blocks like numbers or strings. +pub mod blank; +pub mod boolean; +pub mod iri; pub mod number; +pub mod rdf_literal; +pub mod string; +pub mod variable; diff --git a/nemo/src/parser/ast/expression/basic/blank.rs b/nemo/src/parser/ast/expression/basic/blank.rs new file mode 100644 index 000000000..04002b275 --- /dev/null +++ b/nemo/src/parser/ast/expression/basic/blank.rs @@ -0,0 +1,92 @@ +//! This module defines [Blank] + +use nom::{branch::alt, sequence::pair}; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// AST node representing a blank node +#[derive(Debug)] +pub struct Blank<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Name of the blank node + name: Token<'a>, +} + +impl<'a> Blank<'a> { + /// Return the name of the blank node. + pub fn name(&self) -> String { + self.name.to_string() + } + + /// Parse name of the blank node. + fn parse_name(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + alt((Token::name, Token::digits))(input) + } +} + +impl<'a> ProgramAST<'a> for Blank<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::Blank, + pair(Token::blank_node_prefix, Self::parse_name), + )(input) + .map(|(rest, (_, name))| { + let rest_span = rest.span; + + ( + rest, + Blank { + span: input_span.until_rest(&rest_span), + name, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::basic::blank::Blank, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_blank() { + let test = vec![("_:a", "a".to_string()), ("_:123", "123".to_string())]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Blank::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.name()); + } + } +} diff --git a/nemo/src/parser/ast/expression/basic/boolean.rs b/nemo/src/parser/ast/expression/basic/boolean.rs new file mode 100644 index 000000000..f4f6dcd48 --- /dev/null +++ b/nemo/src/parser/ast/expression/basic/boolean.rs @@ -0,0 +1,112 @@ +//! This module defines [Boolean] +#![allow(missing_docs)] + +use enum_assoc::Assoc; +use nom::branch::alt; + +use crate::parser::{ + ast::{ + token::{Token, TokenKind}, + ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Boolean values +#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] +#[func(pub fn token(token: TokenKind) -> Option)] +pub enum BooleanValue { + /// False + #[assoc(token = TokenKind::False)] + False, + /// True + #[assoc(token = TokenKind::True)] + True, +} + +/// AST node representing a Boolean node +#[derive(Debug)] +pub struct Boolean<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Value + value: BooleanValue, +} + +impl<'a> Boolean<'a> { + /// Return the name of the Boolean node. + pub fn value(&self) -> BooleanValue { + self.value + } + + /// Parse boolean + fn parse_boolean_value(input: ParserInput<'a>) -> ParserResult<'a, BooleanValue> { + alt((Token::boolean_true, Token::boolean_false))(input).map(|(rest, result)| { + ( + rest, + BooleanValue::token(result.kind()).expect("unexpected token"), + ) + }) + } +} + +impl<'a> ProgramAST<'a> for Boolean<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context(ParserContext::Boolean, Self::parse_boolean_value)(input).map(|(rest, value)| { + let rest_span = rest.span; + + ( + rest, + Boolean { + span: input_span.until_rest(&rest_span), + value, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::basic::boolean::Boolean, ProgramAST}, + input::ParserInput, + ParserState, + }; + + use super::BooleanValue; + + #[test] + fn parse_boolean() { + let test = vec![("true", BooleanValue::True), ("false", BooleanValue::False)]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Boolean::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.value()); + } + } +} diff --git a/nemo/src/parser/ast/expression/basic/iri.rs b/nemo/src/parser/ast/expression/basic/iri.rs new file mode 100644 index 000000000..6fd5d973a --- /dev/null +++ b/nemo/src/parser/ast/expression/basic/iri.rs @@ -0,0 +1,91 @@ +//! This module defines [Iri] +#![allow(missing_docs)] + +use nom::sequence::tuple; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// AST node representing a Iri +#[derive(Debug)] +pub struct Iri<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Part of the Iri that is the content + content: Token<'a>, +} + +impl<'a> Iri<'a> { + /// Return the content of the iri. + pub fn content(&self) -> String { + self.content.to_string() + } +} + +impl<'a> ProgramAST<'a> for Iri<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::Iri, + tuple((Token::open_chevrons, Token::iri, Token::closed_chevrons)), + )(input) + .map(|(rest, (_, content, _))| { + let rest_span = rest.span; + + ( + rest, + Iri { + span: input_span.until_rest(&rest_span), + content, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::basic::iri::Iri, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_iri() { + let test = vec![( + "", + "https://www.test.com/test#test".to_string(), + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Iri::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.content()); + } + } +} diff --git a/nemo/src/parser/ast/basic/number.rs b/nemo/src/parser/ast/expression/basic/number.rs similarity index 98% rename from nemo/src/parser/ast/basic/number.rs rename to nemo/src/parser/ast/expression/basic/number.rs index 65b3941c7..c384dde7c 100644 --- a/nemo/src/parser/ast/basic/number.rs +++ b/nemo/src/parser/ast/expression/basic/number.rs @@ -158,7 +158,7 @@ mod test { use nom::combinator::all_consuming; use crate::parser::{ - ast::{basic::number::Number, ProgramAST}, + ast::{expression::basic::number::Number, ProgramAST}, ParserInput, ParserState, }; diff --git a/nemo/src/parser/ast/expression/basic/rdf_literal.rs b/nemo/src/parser/ast/expression/basic/rdf_literal.rs new file mode 100644 index 000000000..6abd71895 --- /dev/null +++ b/nemo/src/parser/ast/expression/basic/rdf_literal.rs @@ -0,0 +1,110 @@ +//! This module defines [RdfLiteral] +#![allow(missing_docs)] + +use nom::sequence::tuple; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::iri::Iri; + +/// AST node representing an rdf literal +#[derive(Debug)] +pub struct RdfLiteral<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Content part rdf literal + content: Token<'a>, + /// Tag of the rdf literal + tag: Iri<'a>, +} + +impl<'a> RdfLiteral<'a> { + /// Return the content of the rdf literal. + pub fn content(&self) -> String { + self.content.to_string() + } + + // Return the tag of the rdf literal. + pub fn tag(&self) -> String { + self.tag.content() + } + + /// Parse the content part of the rdf literal. + pub fn parse_content(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + tuple((Token::quote, Token::string, Token::quote))(input) + .map(|(rest, (_, content, _))| (rest, content)) + } +} + +impl<'a> ProgramAST<'a> for RdfLiteral<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::RdfLiteral, + tuple((Self::parse_content, Token::double_caret, Iri::parse)), + )(input) + .map(|(rest, (content, _, tag))| { + let rest_span = rest.span; + + ( + rest, + RdfLiteral { + span: input_span.until_rest(&rest_span), + content, + tag, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::basic::rdf_literal::RdfLiteral, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_rdf_literal() { + let test = vec![( + "\"true\"^^", + ( + "true".to_string(), + "http://www.w3.org/2001/XMLSchema#boolean".to_string(), + ), + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(RdfLiteral::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, (result.1.content(), result.1.tag())); + } + } +} diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs new file mode 100644 index 000000000..4e3a9b27d --- /dev/null +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -0,0 +1,116 @@ +//! This module defines [String] +#![allow(missing_docs)] + +use nom::{ + combinator::opt, + sequence::{pair, tuple}, +}; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// AST node representing a string +#[derive(Debug)] +pub struct StringLiteral<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Part of the string that is the content + content: Token<'a>, + /// Optional language tag associated with the string + language_tag: Option>, +} + +impl<'a> StringLiteral<'a> { + /// Return the content of the string. + pub fn content(&self) -> String { + self.content.to_string() + } + + /// Return the language tag of the string, if present. + pub fn language_tag(&self) -> Option { + self.language_tag.as_ref().map(|token| token.to_string()) + } + + /// Parse the main part of the string. + pub fn parse_string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + tuple((Token::quote, Token::string, Token::quote))(input) + .map(|(rest, (_, content, _))| (rest, content)) + } + + /// Parse the language tag of the string. + pub fn parse_language_tag(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + pair(Token::at, Token::name)(input).map(|(rest, (_, tag))| (rest, tag)) + } +} + +impl<'a> ProgramAST<'a> for StringLiteral<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::String, + pair(Self::parse_string, opt(Self::parse_language_tag)), + )(input) + .map(|(rest, (content, language_tag))| { + let rest_span = rest.span; + + ( + rest, + StringLiteral { + span: input_span.until_rest(&rest_span), + content, + language_tag, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::basic::string::StringLiteral, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_string() { + let test = vec![ + ("\"test\"", ("test".to_string(), None)), + ( + "\"テスト\"@ja", + ("テスト".to_string(), Some("ja".to_string())), + ), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(StringLiteral::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, (result.1.content(), result.1.language_tag())) + } + } +} diff --git a/nemo/src/parser/ast/expression/basic/variable.rs b/nemo/src/parser/ast/expression/basic/variable.rs new file mode 100644 index 000000000..402cc63ae --- /dev/null +++ b/nemo/src/parser/ast/expression/basic/variable.rs @@ -0,0 +1,149 @@ +//! This module defines [Variable] +#![allow(missing_docs)] + +use enum_assoc::Assoc; +use nom::{branch::alt, combinator::opt, sequence::pair}; + +use crate::parser::{ + ast::{ + token::{Token, TokenKind}, + ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Marker that indicates the type of variable +#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] +#[func(pub fn token(token: TokenKind) -> Option)] +pub enum VariableType { + /// Universal variable + #[assoc(token = TokenKind::QuestionMark)] + Universal, + /// Existential variable + #[assoc(token = TokenKind::ExclamationMark)] + Existential, + /// Anonymous variable + #[assoc(token = TokenKind::Underscore)] + Anonymous, +} + +/// AST node representing a variable +#[derive(Debug)] +pub struct Variable<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Type of variable + kind: VariableType, + + /// Name of the variable + name: Option>, +} + +impl<'a> Variable<'a> { + /// Return the name of the variable, if given + pub fn name(&self) -> Option { + self.name.as_ref().map(|token| token.to_string()) + } + + /// Return the type of variable + pub fn kind(&self) -> VariableType { + self.kind + } + + /// Parse the variable prefix + fn parse_variable_prefix(input: ParserInput<'a>) -> ParserResult<'a, VariableType> { + alt(( + Token::question_mark, + Token::exclamation_mark, + Token::underscore, + ))(input) + .map(|(rest, result)| { + ( + rest, + VariableType::token(result.kind()).expect("unknown token"), + ) + }) + } + + /// Parse the name of the variable + fn parse_variable_name(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + Token::name(input) + } +} + +impl<'a> ProgramAST<'a> for Variable<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::Variable, + pair(Self::parse_variable_prefix, opt(Self::parse_variable_name)), + )(input) + .map(|(rest, (kind, name))| { + let rest_span = rest.span; + + ( + rest, + Variable { + span: input_span.until_rest(&rest_span), + kind, + name, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::basic::variable::Variable, ProgramAST}, + input::ParserInput, + ParserState, + }; + + use super::VariableType; + + #[test] + fn parse_variable() { + let test = vec![ + ( + "?universal", + (Some("universal".to_string()), VariableType::Universal), + ), + ( + "!existential", + (Some("existential".to_string()), VariableType::Existential), + ), + ("_", (None, VariableType::Anonymous)), + ("?", (None, VariableType::Universal)), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Variable::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, (result.1.name(), result.1.kind())); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex.rs b/nemo/src/parser/ast/expression/complex.rs new file mode 100644 index 000000000..83969c72f --- /dev/null +++ b/nemo/src/parser/ast/expression/complex.rs @@ -0,0 +1,4 @@ +//! This module defines complex expressions like tuples or maps. + +pub mod atom; +pub mod tuple; diff --git a/nemo/src/parser/ast/expression/complex/atom.rs b/nemo/src/parser/ast/expression/complex/atom.rs new file mode 100644 index 000000000..22168bea0 --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/atom.rs @@ -0,0 +1,121 @@ +//! This module defines [Atom]. + +use nom::{ + combinator::opt, + sequence::{delimited, pair}, +}; + +use crate::parser::{ + ast::{ + expression::{sequence::simple::ExpressionSequenceSimple, Expression}, + tag::Tag, + token::Token, + ProgramAST, + }, + context::{context, ParserContext}, + span::ProgramSpan, +}; + +/// A possibly tagged sequence of [Expression]s. +#[derive(Debug)] +pub struct Atom<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Tag of this Atom + tag: Tag<'a>, + /// List of underlying expressions + expressions: ExpressionSequenceSimple<'a>, +} + +impl<'a> Atom<'a> { + /// Return an iterator over the underlying [Expression]s. + pub fn expressions(&self) -> impl Iterator> { + self.expressions.iter() + } + + /// Return the tag of this atom. + pub fn tag(&self) -> &Tag<'a> { + &self.tag + } +} + +impl<'a> ProgramAST<'a> for Atom<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + let mut result: Vec<&dyn ProgramAST> = vec![]; + for expression in &self.expressions { + result.push(expression) + } + + result + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: crate::parser::input::ParserInput<'a>) -> crate::parser::ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::Atom, + pair( + Tag::parse, + delimited( + pair(Token::open_parenthesis, opt(Token::whitespace)), + ExpressionSequenceSimple::parse, + pair(opt(Token::whitespace), Token::closed_parenthesis), + ), + ), + )(input) + .map(|(rest, (tag, expressions))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + tag, + expressions, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::complex::atom::Atom, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_atom() { + let test = vec![ + ("abc(1)", ("abc".to_string(), 1)), + ("abc(1,2)", ("abc".to_string(), 2)), + ("abc( 1 )", ("abc".to_string(), 1)), + ("abc( 1 , 2 )", ("abc".to_string(), 2)), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Atom::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + (result.1.tag().to_string(), result.1.expressions().count()) + ); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex/tuple.rs b/nemo/src/parser/ast/expression/complex/tuple.rs new file mode 100644 index 000000000..32dfede7a --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/tuple.rs @@ -0,0 +1,114 @@ +//! This module defines [Tuple]. + +use nom::{ + combinator::opt, + sequence::{delimited, pair, terminated, tuple}, +}; + +use crate::parser::{ + ast::{ + expression::{sequence::one::ExpressionSequenceOne, Expression}, + token::Token, + ProgramAST, + }, + context::{context, ParserContext}, + span::ProgramSpan, +}; + +/// A sequence of [Expression]s. +#[derive(Debug)] +pub struct Tuple<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// List of underlying expressions + expressions: ExpressionSequenceOne<'a>, +} + +impl<'a> Tuple<'a> { + /// Return an iterator over the underlying [Expression]s. + pub fn expressions(&self) -> impl Iterator> { + self.expressions.iter() + } +} + +impl<'a> ProgramAST<'a> for Tuple<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + let mut result: Vec<&dyn ProgramAST> = vec![]; + for expression in &self.expressions { + result.push(expression) + } + + result + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: crate::parser::input::ParserInput<'a>) -> crate::parser::ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::Tuple, + delimited( + pair(Token::open_parenthesis, opt(Token::whitespace)), + terminated( + ExpressionSequenceOne::parse, + opt(tuple(( + opt(Token::whitespace), + Token::comma, + opt(Token::whitespace), + ))), + ), + pair(opt(Token::whitespace), Token::closed_parenthesis), + ), + )(input) + .map(|(rest, expressions)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + expressions, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::complex::tuple::Tuple, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_tuple() { + let test = vec![ + ("(1,)", 1), + ("(1,2)", 2), + ("( 1 ,)", 1), + ("( 1 , 2 )", 2), + ("( 1 , 2 ,)", 2), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Tuple::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.expressions().count()); + } + } +} diff --git a/nemo/src/parser/ast/expression/sequence.rs b/nemo/src/parser/ast/expression/sequence.rs new file mode 100644 index 000000000..583d3db63 --- /dev/null +++ b/nemo/src/parser/ast/expression/sequence.rs @@ -0,0 +1,5 @@ +//! This module defines helper parsers for sequences of expressions. + +pub mod key_value; +pub mod one; +pub mod simple; diff --git a/nemo/src/parser/ast/expression/sequence/key_value.rs b/nemo/src/parser/ast/expression/sequence/key_value.rs new file mode 100644 index 000000000..a0bc22eb2 --- /dev/null +++ b/nemo/src/parser/ast/expression/sequence/key_value.rs @@ -0,0 +1 @@ +//! This module defines \ No newline at end of file diff --git a/nemo/src/parser/ast/expression/sequence/one.rs b/nemo/src/parser/ast/expression/sequence/one.rs new file mode 100644 index 000000000..e178d6e7b --- /dev/null +++ b/nemo/src/parser/ast/expression/sequence/one.rs @@ -0,0 +1,129 @@ +//! This module defines + +use std::vec::IntoIter; + +use nom::{ + branch::alt, + combinator::{map, opt}, + multi::separated_list1, + sequence::tuple, +}; + +use crate::parser::{ + ast::{expression::Expression, token::Token, ProgramAST}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Sequence of comma-delimited expressions +/// +/// A sequence of one must be followed by a comma +#[derive(Debug)] +pub struct ExpressionSequenceOne<'a> { + /// [ProgramSpan] associated with this sequence + span: ProgramSpan<'a>, + + /// List of expressions + expressions: Vec>, +} + +impl<'a> ExpressionSequenceOne<'a> { + /// Return an iterator over the [Expression]s. + pub fn iter(&self) -> impl Iterator> { + self.into_iter() + } + + /// Parse a sequence of length one. + fn parse_sequence_single(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + tuple((Expression::parse, opt(Token::whitespace), Token::comma))(input) + .map(|(rest, (result, _, _))| (rest, result)) + } + + /// Parse a sequence of length greater one. + fn parse_sequence(input: ParserInput<'a>) -> ParserResult<'a, Vec>> { + tuple(( + Expression::parse, + tuple((opt(Token::whitespace), Token::comma, opt(Token::whitespace))), + separated_list1( + tuple((opt(Token::whitespace), Token::comma, opt(Token::whitespace))), + Expression::parse, + ), + ))(input) + .map(|(rest, (first, _, others))| { + let mut result = vec![first]; + result.extend(others); + + (rest, result) + }) + } + + /// Parse a comma separated list of [Expression]s. + pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + let input_span = input.span; + + alt(( + Self::parse_sequence, + map(Self::parse_sequence_single, |result| vec![result]), + ))(input) + .map(|(rest, expressions)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + expressions, + }, + ) + }) + } +} + +impl<'a, 'b> IntoIterator for &'b ExpressionSequenceOne<'a> { + type Item = &'b Expression<'a>; + type IntoIter = std::slice::Iter<'b, Expression<'a>>; + + fn into_iter(self) -> Self::IntoIter { + self.expressions.iter() + } +} + +impl<'a> IntoIterator for ExpressionSequenceOne<'a> { + type Item = Expression<'a>; + type IntoIter = IntoIter>; + + fn into_iter(self) -> Self::IntoIter { + self.expressions.into_iter() + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::expression::sequence::one::ExpressionSequenceOne, input::ParserInput, ParserState, + }; + + #[test] + fn parse_expression_sequence_one() { + let test = vec![ + ("12,", 1), + ("12 ,", 1), + ("1,?x,2", 3), + ("1, ?x, 2", 3), + ("1 , ?x, 2", 3), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(ExpressionSequenceOne::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.into_iter().count()); + } + } +} diff --git a/nemo/src/parser/ast/expression/sequence/simple.rs b/nemo/src/parser/ast/expression/sequence/simple.rs new file mode 100644 index 000000000..f81c51916 --- /dev/null +++ b/nemo/src/parser/ast/expression/sequence/simple.rs @@ -0,0 +1,101 @@ +//! This module defines + +use std::vec::IntoIter; + +use nom::{combinator::opt, multi::separated_list1, sequence::tuple}; + +use crate::parser::{ + ast::{expression::Expression, token::Token, ProgramAST}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Sequence of comma-delimited expressions +#[derive(Debug)] +pub struct ExpressionSequenceSimple<'a> { + /// [ProgramSpan] associated with this sequence + span: ProgramSpan<'a>, + + /// List of expressions + expressions: Vec>, +} + +impl<'a> ExpressionSequenceSimple<'a> { + /// Return an iterator over the [Expression]s. + pub fn iter(&self) -> impl Iterator> { + self.into_iter() + } + + /// Parse a comma separated list of [Expression]s. + pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + let input_span = input.span; + + separated_list1( + tuple((opt(Token::whitespace), Token::comma, opt(Token::whitespace))), + Expression::parse, + )(input) + .map(|(rest, expressions)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + expressions, + }, + ) + }) + } +} + +impl<'a, 'b> IntoIterator for &'b ExpressionSequenceSimple<'a> { + type Item = &'b Expression<'a>; + type IntoIter = std::slice::Iter<'b, Expression<'a>>; + + fn into_iter(self) -> Self::IntoIter { + self.expressions.iter() + } +} + +impl<'a> IntoIterator for ExpressionSequenceSimple<'a> { + type Item = Expression<'a>; + type IntoIter = IntoIter>; + + fn into_iter(self) -> Self::IntoIter { + self.expressions.into_iter() + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::expression::sequence::simple::ExpressionSequenceSimple, input::ParserInput, + ParserState, + }; + + #[test] + fn parse_expression_sequence_simple() { + let test = vec![ + ("12", 1), + ("1,?x,2", 3), + ("1, ?x, 2", 3), + ("1, ?x, 2", 3), + ("1 , ?x, 2", 3), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(ExpressionSequenceSimple::parse)(parser_input); + + println!("{:?}", result); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.into_iter().count()); + } + } +} diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 327b1f4d9..9cdc2ca7e 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -6,12 +6,12 @@ use crate::parser::{ ParserResult, }; -use super::{basic::number::Number, ProgramAST}; +use super::{rule::Rule, ProgramAST}; -/// +/// AST representation of a nemo program #[derive(Debug)] pub struct Program<'a> { - number: Number<'a>, + rules: Rule<'a>, } impl<'a> ProgramAST<'a> for Program<'a> { @@ -27,7 +27,7 @@ impl<'a> ProgramAST<'a> for Program<'a> { where Self: Sized + 'a, { - context(ParserContext::Program, Number::parse)(input) - .map(|(rest, result)| (rest, Program { number: result })) + context(ParserContext::Program, Rule::parse)(input) + .map(|(rest, result)| (rest, Program { rules: result })) } } diff --git a/nemo/src/parser/ast/rule.rs b/nemo/src/parser/ast/rule.rs new file mode 100644 index 000000000..a607532f0 --- /dev/null +++ b/nemo/src/parser/ast/rule.rs @@ -0,0 +1,119 @@ +//! This module defines [Rule]. + +use nom::{ + combinator::opt, + sequence::{pair, tuple}, +}; + +use crate::parser::{ + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::{ + expression::{sequence::simple::ExpressionSequenceSimple, Expression}, + token::Token, + ProgramAST, +}; + +/// A rule describing a logical implication +#[derive(Debug)] +pub struct Rule<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Head of the rule + head: ExpressionSequenceSimple<'a>, + /// Body of the rule, + body: ExpressionSequenceSimple<'a>, +} + +impl<'a> Rule<'a> { + /// Return an iterator of the [Expression]s contained in the head. + pub fn head(&self) -> impl Iterator> { + self.head.iter() + } + + /// Return an iterator of the [Expression]s contained in the body. + pub fn body(&self) -> impl Iterator> { + self.body.iter() + } +} + +impl<'a> ProgramAST<'a> for Rule<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + let mut result = Vec::<&dyn ProgramAST>::new(); + + for expression in self.head().chain(self.body()) { + result.push(expression); + } + + result + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::Rule, + tuple(( + ExpressionSequenceSimple::parse, + tuple((opt(Token::whitespace), Token::arrow, opt(Token::whitespace))), + ExpressionSequenceSimple::parse, + pair(opt(Token::whitespace), Token::dot), + )), + )(input) + .map(|(rest, (head, _, body, _))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + head, + body, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{rule::Rule, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_rule() { + let test = vec![ + ("a(?x, ?y) :- b(?x, ?y) .", (1, 1)), + ("a(?x,?y), d(1), c(1) :- b(?x, ?y), c(1, 2).", (3, 2)), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Rule::parse)(parser_input); + + println!("{:?}", result); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, (result.1.head().count(), result.1.body().count())); + } + } +} diff --git a/nemo/src/parser/ast/tag.rs b/nemo/src/parser/ast/tag.rs new file mode 100644 index 000000000..349144a86 --- /dev/null +++ b/nemo/src/parser/ast/tag.rs @@ -0,0 +1,69 @@ +//! This module defines [Tag]. + +use nom::{branch::alt, combinator::map, sequence::tuple}; + +use crate::parser::{input::ParserInput, ParserResult}; + +use super::{expression::basic::iri::Iri, token::Token, ProgramAST}; + +/// Tag used to give a name to complex expressions +#[derive(Debug)] +pub enum Tag<'a> { + /// Plain name + Plain(Token<'a>), + /// Prefixed name + Prefixed { prefix: Token<'a>, tag: Token<'a> }, + /// Iri + Iri(Iri<'a>), +} + +impl<'a> Tag<'a> { + /// Parse a [Tag]. + pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + alt(( + map( + tuple((Token::name, Token::colon, Token::name)), + |(prefix, _, tag)| Self::Prefixed { prefix, tag }, + ), + map(Token::name, Self::Plain), + map(Iri::parse, Self::Iri), + ))(input) + } + + /// Return a string representation of the [Tag]. + /// + /// Note that this does not resolve prefixes. + pub fn to_string(&self) -> String { + match self { + Tag::Plain(token) => token.to_string(), + Tag::Prefixed { prefix, tag } => format!("{}:{}", prefix.to_string(), tag.to_string()), + Tag::Iri(iri) => iri.content(), + } + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ast::tag::Tag, input::ParserInput, ParserState}; + + #[test] + fn parse_tag() { + let test = vec![ + ("abc", "abc".to_string()), + ("abc:def", "abc:def".to_string()), + ("", "http://example.com".to_string()), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Tag::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.to_string()); + } + } +} diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 7a295b24a..31b58f0a1 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -5,13 +5,11 @@ use enum_assoc::Assoc; use nom::{ branch::alt, - bytes::complete::{is_not, tag, take, take_till}, - character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace0, multispace1}, - combinator::{all_consuming, cut, map, opt, recognize}, - error::ParseError, - multi::{many0, many1}, - sequence::{delimited, pair, tuple}, - IResult, + bytes::complete::{is_not, tag}, + character::complete::{alpha1, alphanumeric1, digit1, multispace1}, + combinator::{map, recognize}, + multi::many0, + sequence::pair, }; use crate::parser::{ @@ -57,6 +55,9 @@ pub enum TokenKind { /// Dot #[assoc(name = ".")] Dot, + /// Comma + #[assoc(name = ",")] + Comma, /// Arrow, used to separate rules #[assoc(name = ":-")] Arrow, @@ -114,9 +115,9 @@ pub enum TokenKind { /// Quote #[assoc(name = "\"")] Quote, - /// Blank node label + /// Blank node prefix #[assoc(name = "_:")] - BlankNodeLabel, + BlankNodePrefix, /// Name #[assoc(name = "name")] Name, @@ -138,6 +139,9 @@ pub enum TokenKind { /// IRI #[assoc(name = "iri")] Iri, + /// String + #[assoc(name = "string")] + String, /// A comment (as single token) #[assoc(name = "comment")] Comment, @@ -169,6 +173,13 @@ pub struct Token<'a> { kind: TokenKind, } +impl<'a> Token<'a> { + /// Return a copy of the underlying text + pub fn to_string(&self) -> String { + self.span.0.to_string() + } +} + /// Macro for generating token parser functions macro_rules! string_token { ($func_name: ident, $token: expr) => { @@ -213,11 +224,7 @@ impl<'a> Token<'a> { /// Parse [TokenKind::Iri]. pub fn iri(input: ParserInput<'a>) -> ParserResult<'a, Token> { - context( - ParserContext::token(TokenKind::Iri), - recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), - )(input) - .map(|(rest, result)| { + is_not("> \n")(input).map(|(rest, result)| { ( rest, Token { @@ -228,6 +235,19 @@ impl<'a> Token<'a> { }) } + /// Parse [TokenKind::String]. + pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token> { + is_not("\"")(input).map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::String, + }, + ) + }) + } + /// Parse [TokenKind::Digits]. pub fn digits(input: ParserInput<'a>) -> ParserResult<'a, Token> { context(ParserContext::token(TokenKind::Digits), digit1)(input).map( @@ -243,6 +263,21 @@ impl<'a> Token<'a> { ) } + /// Parse [TokenKind::Whitespace]. + pub fn whitespace(input: ParserInput<'a>) -> ParserResult<'a, Token> { + context(ParserContext::token(TokenKind::Whitespace), multispace1)(input).map( + |(rest_input, result)| { + ( + rest_input, + Token { + span: result.span, + kind: TokenKind::Digits, + }, + ) + }, + ) + } + string_token!(open_parenthesis, TokenKind::OpenParenthesis); string_token!(closed_parenthesis, TokenKind::ClosedParenthesis); string_token!(open_brace, TokenKind::OpenBrace); @@ -254,6 +289,9 @@ impl<'a> Token<'a> { string_token!(question_mark, TokenKind::QuestionMark); string_token!(exclamation_mark, TokenKind::ExclamationMark); string_token!(dot, TokenKind::Dot); + string_token!(comma, TokenKind::Comma); + string_token!(arrow, TokenKind::Arrow); + string_token!(colon, TokenKind::Colon); string_token!(greater, TokenKind::Greater); string_token!(greater_equal, TokenKind::GreaterEqual); string_token!(less, TokenKind::Less); @@ -268,8 +306,10 @@ impl<'a> Token<'a> { string_token!(minus, TokenKind::Minus); string_token!(star, TokenKind::Star); string_token!(division, TokenKind::Division); + string_token!(boolean_true, TokenKind::True); + string_token!(boolean_false, TokenKind::False); string_token!(quote, TokenKind::Quote); - string_token!(blank_node_label, TokenKind::BlankNodeLabel); + string_token!(blank_node_prefix, TokenKind::BlankNodePrefix); string_token!(exponent_lower, TokenKind::ExponentLower); string_token!(exponent_upper, TokenKind::ExponentUpper); string_token!(type_marker_double, TokenKind::TypeMarkerDouble); diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index 2bbec890a..df6825fe4 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -16,6 +16,36 @@ pub enum ParserContext { /// Number #[assoc(name = "number")] Number, + /// Variable + #[assoc(name = "variable")] + Variable, + /// String + #[assoc(name = "string")] + String, + /// Iri + #[assoc(name = "iri")] + Iri, + /// Rdf Literal + #[assoc(name = "rdf-literal")] + RdfLiteral, + /// Blank node + #[assoc(name = "blank")] + Blank, + /// Boolean + #[assoc(name = "boolean")] + Boolean, + /// Expression + #[assoc(name = "expression")] + Expression, + /// Tuple + #[assoc(name = "tuple")] + Tuple, + /// Atom + #[assoc(name = "atom")] + Atom, + /// Rule + #[assoc(name = "rule")] + Rule, /// Program #[assoc(name = "program")] Program, diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index 3e7012063..24f92b0bc 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -14,7 +14,7 @@ use std::fmt::{Debug, Display}; use term::primitive::variable::Variable; -use super::{error::ProgramValidationError, origin::Origin}; +use super::{error::ValidationError, origin::Origin}; /// Name of a term #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -41,7 +41,7 @@ impl Display for Tag { /// Trait implemented by objects that are part of the logical rule model of the nemo language. pub trait ProgramComponent: Debug + Display { /// Construct this object from a string. - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized; @@ -54,7 +54,7 @@ pub trait ProgramComponent: Debug + Display { Self: Sized; /// Validate this component - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized; } diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index 0aa511497..7d60a7165 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{error::ProgramValidationError, origin::Origin}; +use crate::rule_model::{error::ValidationError, origin::Origin}; use super::{ term::{primitive::variable::Variable, Term}, @@ -93,7 +93,7 @@ impl Hash for Atom { } impl ProgramComponent for Atom { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -112,7 +112,7 @@ impl ProgramComponent for Atom { self } - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/base.rs b/nemo/src/rule_model/components/base.rs index c3733f66c..b52dfc7b2 100644 --- a/nemo/src/rule_model/components/base.rs +++ b/nemo/src/rule_model/components/base.rs @@ -45,7 +45,7 @@ impl Hash for Base { } impl ProgramComponent for Base { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -64,7 +64,7 @@ impl ProgramComponent for Base { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index 2f2cec62c..cb9de1d1f 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -75,7 +75,7 @@ impl Hash for Fact { } impl ProgramComponent for Fact { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -94,7 +94,7 @@ impl ProgramComponent for Fact { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index 4214ff7b8..fee9be91b 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -104,7 +104,7 @@ impl Display for ImportDirective { } impl ProgramComponent for ImportDirective { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -123,7 +123,7 @@ impl ProgramComponent for ImportDirective { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { @@ -176,7 +176,7 @@ impl Display for ExportDirective { } impl ProgramComponent for ExportDirective { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -195,7 +195,7 @@ impl ProgramComponent for ExportDirective { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index 5b5d74868..032bc5642 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::error::ProgramValidationError; +use crate::rule_model::error::ValidationError; use super::{atom::Atom, term::operation::Operation, ProgramComponent}; @@ -32,7 +32,7 @@ impl Display for Literal { } impl ProgramComponent for Literal { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -58,7 +58,7 @@ impl ProgramComponent for Literal { } } - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/output.rs b/nemo/src/rule_model/components/output.rs index b1dbb72fa..8c0bac8a8 100644 --- a/nemo/src/rule_model/components/output.rs +++ b/nemo/src/rule_model/components/output.rs @@ -47,7 +47,7 @@ impl Hash for Output { } impl ProgramComponent for Output { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -66,7 +66,7 @@ impl ProgramComponent for Output { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 0281c1e88..947d80bff 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -106,7 +106,7 @@ impl Hash for Rule { } impl ProgramComponent for Rule { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -125,7 +125,7 @@ impl ProgramComponent for Rule { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term.rs b/nemo/src/rule_model/components/term.rs index 2134a5d6d..a379ffe6d 100644 --- a/nemo/src/rule_model/components/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -26,7 +26,7 @@ use primitive::{ }; use tuple::Tuple; -use crate::rule_model::{error::ProgramValidationError, origin::Origin}; +use crate::rule_model::{error::ValidationError, origin::Origin}; use super::{IterableVariables, ProgramComponent}; @@ -166,7 +166,7 @@ impl Display for Term { } impl ProgramComponent for Term { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -196,7 +196,7 @@ impl ProgramComponent for Term { } } - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index d1b1a8562..d4d6c4a7c 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -137,7 +137,7 @@ impl Hash for Aggregate { } impl ProgramComponent for Aggregate { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -156,7 +156,7 @@ impl ProgramComponent for Aggregate { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index 8bfe3ec86..08a6756b5 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -4,7 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ components::{IterableVariables, ProgramComponent, Tag}, - error::ProgramValidationError, + error::ValidationError, origin::Origin, }; @@ -100,7 +100,7 @@ impl Hash for FunctionTerm { } impl ProgramComponent for FunctionTerm { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -119,7 +119,7 @@ impl ProgramComponent for FunctionTerm { self } - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index 9a755590e..c07e5c33c 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -84,7 +84,7 @@ impl Hash for Map { } impl ProgramComponent for Map { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -103,7 +103,7 @@ impl ProgramComponent for Map { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index 685ff15d5..4eee15d17 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -157,7 +157,7 @@ impl Hash for Operation { } impl ProgramComponent for Operation { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -176,7 +176,7 @@ impl ProgramComponent for Operation { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive.rs b/nemo/src/rule_model/components/term/primitive.rs index 0f3e9f34a..9b49d4e97 100644 --- a/nemo/src/rule_model/components/term/primitive.rs +++ b/nemo/src/rule_model/components/term/primitive.rs @@ -103,7 +103,7 @@ impl Display for Primitive { } impl ProgramComponent for Primitive { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -127,7 +127,7 @@ impl ProgramComponent for Primitive { } } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/ground.rs b/nemo/src/rule_model/components/term/primitive/ground.rs index 9fa365bac..26f20fa03 100644 --- a/nemo/src/rule_model/components/term/primitive/ground.rs +++ b/nemo/src/rule_model/components/term/primitive/ground.rs @@ -5,7 +5,7 @@ use std::{fmt::Display, hash::Hash}; use nemo_physical::datavalues::{AnyDataValue, IriDataValue}; use crate::rule_model::{ - components::ProgramComponent, error::ProgramValidationError, origin::Origin, + components::ProgramComponent, error::ValidationError, origin::Origin, }; /// Primitive ground term @@ -97,7 +97,7 @@ impl Hash for GroundTerm { } impl ProgramComponent for GroundTerm { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -116,7 +116,7 @@ impl ProgramComponent for GroundTerm { self } - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs index 9e4e580a8..861cb1b31 100644 --- a/nemo/src/rule_model/components/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -5,7 +5,7 @@ use std::fmt::Display; use existential::ExistentialVariable; use universal::UniversalVariable; -use crate::rule_model::{error::ProgramValidationError, origin::Origin}; +use crate::rule_model::{error::ValidationError, origin::Origin}; use super::ProgramComponent; @@ -93,7 +93,7 @@ impl Display for Variable { } impl ProgramComponent for Variable { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -117,7 +117,7 @@ impl ProgramComponent for Variable { } } - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/variable/existential.rs b/nemo/src/rule_model/components/term/primitive/variable/existential.rs index 7213cddfb..683925c68 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/existential.rs @@ -2,9 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - components::ProgramComponent, error::ProgramValidationError, origin::Origin, -}; +use crate::rule_model::{components::ProgramComponent, error::ValidationError, origin::Origin}; use super::VariableName; @@ -60,7 +58,7 @@ impl Hash for ExistentialVariable { } impl ProgramComponent for ExistentialVariable { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -79,7 +77,7 @@ impl ProgramComponent for ExistentialVariable { self } - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/variable/universal.rs b/nemo/src/rule_model/components/term/primitive/variable/universal.rs index a510e6b12..8d7b98bbb 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/universal.rs @@ -2,9 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - components::ProgramComponent, error::ProgramValidationError, origin::Origin, -}; +use crate::rule_model::{components::ProgramComponent, error::ValidationError, origin::Origin}; use super::VariableName; @@ -83,7 +81,7 @@ impl Hash for UniversalVariable { } impl ProgramComponent for UniversalVariable { - fn parse(_string: &str) -> Result { + fn parse(_string: &str) -> Result { todo!() } @@ -99,7 +97,7 @@ impl ProgramComponent for UniversalVariable { self } - fn validate(&self) -> Result<(), ProgramValidationError> + fn validate(&self) -> Result<(), ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/tuple.rs b/nemo/src/rule_model/components/term/tuple.rs index 21e373066..d7b182a29 100644 --- a/nemo/src/rule_model/components/term/tuple.rs +++ b/nemo/src/rule_model/components/term/tuple.rs @@ -81,7 +81,7 @@ impl PartialOrd for Tuple { } impl ProgramComponent for Tuple { - fn parse(_string: &str) -> Result + fn parse(_string: &str) -> Result where Self: Sized, { @@ -100,7 +100,7 @@ impl ProgramComponent for Tuple { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ProgramValidationError> + fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> where Self: Sized, { diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index ace65cbb1..31fe04a6e 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -11,18 +11,24 @@ use super::components::ProgramComponent; /// Error that occurs during validation of a program. #[derive(Debug)] -pub struct ProgramValidationError { +pub struct ValidationError { /// The kind of error kind: ValidationErrorKind, /// stack of components in which the error occurred - context: Vec>, + context: Vec>, // Just use (ComponentType, Origin) instead } -impl Display for ProgramValidationError { +impl Display for ValidationError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.kind) } } +/// +#[derive(Debug)] +pub struct ValidationErrorBuilder { + current_context: Vec>, +} + // #[derive(Debug)] // pub struct ProgramValidationErrors {} diff --git a/nemo/src/rule_model/error/translation_error.rs b/nemo/src/rule_model/error/translation_error.rs index bc0eeb824..cc342a0ba 100644 --- a/nemo/src/rule_model/error/translation_error.rs +++ b/nemo/src/rule_model/error/translation_error.rs @@ -6,4 +6,8 @@ use thiserror::Error; /// while translating the ASP representation of a nemo program /// into its logical representation. #[derive(Error, Debug, Copy, Clone)] -pub enum TranslationErrorKind {} +pub enum TranslationErrorKind { + /// A negated atom was used in the head of a rule + #[error(r#"negated atom used in head"#)] + HeadAtomNegated, +} From 052e4100335c14e42e7ec63048ca2520ef29be3a Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 17 Jul 2024 01:31:13 +0200 Subject: [PATCH 126/214] Add basic error handling architecture --- nemo/src/parser.rs | 4 +- nemo/src/parser/ast.rs | 20 ++ nemo/src/parser/ast/program.rs | 21 ++- nemo/src/parser/span.rs | 7 + nemo/src/rule_model.rs | 1 + nemo/src/rule_model/components.rs | 7 +- nemo/src/rule_model/components/atom.rs | 9 +- nemo/src/rule_model/components/base.rs | 4 +- nemo/src/rule_model/components/fact.rs | 4 +- .../rule_model/components/import_export.rs | 6 +- nemo/src/rule_model/components/literal.rs | 4 +- nemo/src/rule_model/components/output.rs | 4 +- nemo/src/rule_model/components/rule.rs | 84 ++++++++- nemo/src/rule_model/components/term.rs | 7 +- .../rule_model/components/term/aggregate.rs | 3 +- .../rule_model/components/term/function.rs | 6 +- nemo/src/rule_model/components/term/map.rs | 3 +- .../rule_model/components/term/operation.rs | 28 ++- .../rule_model/components/term/primitive.rs | 7 +- .../components/term/primitive/ground.rs | 6 +- .../components/term/primitive/variable.rs | 11 +- .../term/primitive/variable/existential.rs | 8 +- .../term/primitive/variable/universal.rs | 8 +- nemo/src/rule_model/components/term/tuple.rs | 3 +- nemo/src/rule_model/error.rs | 112 +++++++++++- nemo/src/rule_model/program.rs | 26 ++- nemo/src/rule_model/translation.rs | 172 ++++++++++++++++++ 27 files changed, 512 insertions(+), 63 deletions(-) create mode 100644 nemo/src/rule_model/translation.rs diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index b67bdc5ff..51b2c9135 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -76,12 +76,10 @@ impl<'a> ParserErrorReport<'a> { &'a self, color_error: Color, ) -> impl Iterator)>> { - // report.with_message(format!("")) - self.errors.iter().map(move |error| { let message = format!("expected `{}`", error.context[0].name()); - Report::build(ReportKind::Error, self.label.clone(), 0) + Report::build(ReportKind::Error, self.label.clone(), error.position.offset) .with_message(message.clone()) .with_label( Label::new((self.label.clone(), error.position.range())) diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index edd85be28..332ae66e3 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -6,6 +6,8 @@ pub mod rule; pub mod tag; pub mod token; +use crate::rule_model::origin::Origin; + use super::{span::ProgramSpan, ParserInput, ParserResult}; /// Trait implemented by nodes in the abstract syntax tree @@ -20,4 +22,22 @@ pub trait ProgramAST<'a>: Sync { fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> where Self: Sized + 'a; + + /// Locate a node from a stack of [Origin]s. + fn locate(&'a self, origin_stack: &[Origin]) -> Option<&'a dyn ProgramAST<'a>> + where + Self: Sized + 'a, + { + let mut current_node: &dyn ProgramAST = self; + + for origin in origin_stack { + if let &Origin::External(index) = origin { + current_node = *current_node.children().get(index)?; + } else { + return None; + } + } + + Some(current_node) + } } diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 9cdc2ca7e..c459a9dcc 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -11,16 +11,29 @@ use super::{rule::Rule, ProgramAST}; /// AST representation of a nemo program #[derive(Debug)] pub struct Program<'a> { - rules: Rule<'a>, + statements: Rule<'a>, +} + +impl<'a> Program<'a> { + /// Return an iterator of statements in the program. + pub fn statements(&self) -> &Rule<'a> { + // TODO: This is simply a rule now + &self.statements + } } impl<'a> ProgramAST<'a> for Program<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { - todo!() + // TODO: Fix this once we have statements + let mut result = Vec::<&dyn ProgramAST>::new(); + result.push(&self.statements); + + result } fn span(&self) -> crate::parser::span::ProgramSpan { - todo!() + // TODO: Fix this once we have statements + self.statements.span() } fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> @@ -28,6 +41,6 @@ impl<'a> ProgramAST<'a> for Program<'a> { Self: Sized + 'a, { context(ParserContext::Program, Rule::parse)(input) - .map(|(rest, result)| (rest, Program { rules: result })) + .map(|(rest, result)| (rest, Program { statements: result })) } } diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index a34005875..ceef435a6 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -61,6 +61,13 @@ pub struct CharacterRange { pub end: CharacterPosition, } +impl CharacterRange { + /// Return this information as a [Range]. + pub fn range(&self) -> Range { + self.start.offset..self.end.offset + } +} + /// Maker for a region of text within a string slice #[derive(Debug, Clone, Copy)] pub struct ProgramSpan<'a>(pub(crate) LocatedSpan<&'a str>); diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs index 3c07f0a46..bc061e0f8 100644 --- a/nemo/src/rule_model.rs +++ b/nemo/src/rule_model.rs @@ -9,3 +9,4 @@ pub(crate) mod syntax; pub mod components; pub mod error; pub mod program; +pub mod translation; diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index 24f92b0bc..104f089e4 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -14,7 +14,10 @@ use std::fmt::{Debug, Display}; use term::primitive::variable::Variable; -use super::{error::ValidationError, origin::Origin}; +use super::{ + error::{ValidationError, ValidationErrorBuilder}, + origin::Origin, +}; /// Name of a term #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -54,7 +57,7 @@ pub trait ProgramComponent: Debug + Display { Self: Sized; /// Validate this component - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized; } diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index 7d60a7165..156c2e007 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -2,7 +2,10 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{error::ValidationError, origin::Origin}; +use crate::rule_model::{ + error::{ValidationError, ValidationErrorBuilder}, + origin::Origin, +}; use super::{ term::{primitive::variable::Variable, Term}, @@ -112,7 +115,7 @@ impl ProgramComponent for Atom { self } - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { @@ -121,7 +124,7 @@ impl ProgramComponent for Atom { } for term in self.subterms() { - term.validate()?; + term.validate(builder)?; } Ok(()) diff --git a/nemo/src/rule_model/components/base.rs b/nemo/src/rule_model/components/base.rs index b52dfc7b2..a005bbc9e 100644 --- a/nemo/src/rule_model/components/base.rs +++ b/nemo/src/rule_model/components/base.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::origin::Origin; +use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; use super::ProgramComponent; @@ -64,7 +64,7 @@ impl ProgramComponent for Base { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index cb9de1d1f..ed6b24b3a 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::origin::Origin; +use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; use super::{term::Term, ProgramComponent, Tag}; @@ -94,7 +94,7 @@ impl ProgramComponent for Fact { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index fee9be91b..13eaefd74 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -9,7 +9,7 @@ use std::{fmt::Display, hash::Hash}; use file_formats::FileFormat; -use crate::rule_model::origin::Origin; +use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; use super::{term::map::Map, ProgramComponent, Tag}; @@ -123,7 +123,7 @@ impl ProgramComponent for ImportDirective { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { @@ -195,7 +195,7 @@ impl ProgramComponent for ExportDirective { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index 032bc5642..e82e84f3d 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::error::ValidationError; +use crate::rule_model::error::{ValidationError, ValidationErrorBuilder}; use super::{atom::Atom, term::operation::Operation, ProgramComponent}; @@ -58,7 +58,7 @@ impl ProgramComponent for Literal { } } - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/output.rs b/nemo/src/rule_model/components/output.rs index 8c0bac8a8..028871d84 100644 --- a/nemo/src/rule_model/components/output.rs +++ b/nemo/src/rule_model/components/output.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::origin::Origin; +use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; use super::{ProgramComponent, Tag}; @@ -66,7 +66,7 @@ impl ProgramComponent for Output { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 947d80bff..d7b6438f6 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -1,10 +1,24 @@ //! This module defines [Rule] and [RuleBuilder] -use std::{fmt::Display, hash::Hash}; - -use crate::rule_model::origin::Origin; - -use super::{atom::Atom, literal::Literal, term::operation::Operation, ProgramComponent}; +use std::{collections::HashSet, fmt::Display, hash::Hash}; + +use nemo_physical::aggregates::operation; + +use crate::rule_model::{ + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, +}; + +use super::{ + atom::Atom, + literal::Literal, + term::{ + operation::Operation, + primitive::{variable::Variable, Primitive}, + Term, + }, + IterableVariables, ProgramComponent, +}; /// Rule /// @@ -66,6 +80,46 @@ impl Rule { pub fn head_mut(&mut self) -> &mut Vec { &mut self.head } + + /// Return a list of "safe" variables. + /// + /// A variable is considered safe, + /// if it occurs in a positive body atom, + /// or is derived via the equality operation + /// from other safe variables. + pub fn safe_variables(&self) -> HashSet<&Variable> { + let mut result = HashSet::new(); + + for literal in &self.body { + if let Literal::Positive(atom) = literal { + for term in atom.subterms() { + if let Term::Primitive(Primitive::Variable(variable)) = term { + result.insert(variable); + } + } + } + } + + loop { + let current_count = result.len(); + + for literal in &self.body { + if let Literal::Operation(operation) = literal { + if let Some((variable, term)) = operation.variable_assignment() { + if term.variables().all(|variable| result.contains(variable)) { + result.insert(variable); + } + } + } + } + + if result.len() == current_count { + break; + } + } + + result + } } impl Display for Rule { @@ -125,11 +179,27 @@ impl ProgramComponent for Rule { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { - todo!() + let safe_variables = self.safe_variables(); + let head_variables = self + .head + .iter() + .flat_map(|atom| atom.variables()) + .collect::>(); + + for &head_variable in &head_variables { + if !safe_variables.contains(head_variable) { + builder.report_error( + head_variable.origin(), + ValidationErrorKind::HeadUnsafe(head_variable.clone()), + ); + } + } + + Ok(()) } } diff --git a/nemo/src/rule_model/components/term.rs b/nemo/src/rule_model/components/term.rs index a379ffe6d..19bbd5969 100644 --- a/nemo/src/rule_model/components/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -26,7 +26,10 @@ use primitive::{ }; use tuple::Tuple; -use crate::rule_model::{error::ValidationError, origin::Origin}; +use crate::rule_model::{ + error::{ValidationError, ValidationErrorBuilder}, + origin::Origin, +}; use super::{IterableVariables, ProgramComponent}; @@ -196,7 +199,7 @@ impl ProgramComponent for Term { } } - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index d4d6c4a7c..09214e1c7 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -8,6 +8,7 @@ use strum_macros::EnumIter; use crate::rule_model::{ components::{IterableVariables, ProgramComponent}, + error::ValidationErrorBuilder, origin::Origin, syntax::aggregates, }; @@ -156,7 +157,7 @@ impl ProgramComponent for Aggregate { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index 08a6756b5..00871002b 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -4,7 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ components::{IterableVariables, ProgramComponent, Tag}, - error::ValidationError, + error::{ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -119,7 +119,7 @@ impl ProgramComponent for FunctionTerm { self } - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { @@ -128,7 +128,7 @@ impl ProgramComponent for FunctionTerm { } for term in self.subterms() { - term.validate()? + term.validate(builder)? } Ok(()) diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index c07e5c33c..3188ade7f 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -4,6 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ components::{IterableVariables, ProgramComponent, Tag}, + error::ValidationErrorBuilder, origin::Origin, }; @@ -103,7 +104,7 @@ impl ProgramComponent for Map { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index 4eee15d17..552c91b5a 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -8,10 +8,14 @@ use operation_kind::OperationKind; use crate::rule_model::{ components::{IterableVariables, ProgramComponent}, + error::ValidationErrorBuilder, origin::Origin, }; -use super::{primitive::variable::Variable, Term}; +use super::{ + primitive::{variable::Variable, Primitive}, + Term, +}; /// Operation /// @@ -42,6 +46,26 @@ impl Operation { pub fn new_from_name(operation: &str, subterms: Vec) -> Option { Some(Self::new(OperationKind::from_name(operation)?, subterms)) } + + /// Check whether this operation has the form of an assignment of a variable to a term. + /// If so return the variable and the term as a pair or `None` otherwise. + /// + /// # Panics + /// Panics if this component is invalid. + pub fn variable_assignment(&self) -> Option<(&Variable, &Term)> { + if self.kind != OperationKind::Equal { + return None; + } + + let left = self.subterms.get(0).expect("invalid program component"); + let right = self.subterms.get(1).expect("invalid program component"); + + if let Term::Primitive(Primitive::Variable(variable)) = left { + Some((variable, right)) + } else { + None + } + } } // Helper functions related to the display implementation @@ -176,7 +200,7 @@ impl ProgramComponent for Operation { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive.rs b/nemo/src/rule_model/components/term/primitive.rs index 9b49d4e97..d24a11bdc 100644 --- a/nemo/src/rule_model/components/term/primitive.rs +++ b/nemo/src/rule_model/components/term/primitive.rs @@ -11,6 +11,7 @@ use variable::{existential::ExistentialVariable, universal::UniversalVariable, V use crate::rule_model::{ components::{IterableVariables, ProgramComponent}, + error::ValidationErrorBuilder, origin::Origin, }; @@ -127,13 +128,13 @@ impl ProgramComponent for Primitive { } } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { match self { - Primitive::Variable(variable) => variable.validate(), - Primitive::Ground(ground) => ground.validate(), + Primitive::Variable(variable) => variable.validate(builder), + Primitive::Ground(ground) => ground.validate(builder), } } } diff --git a/nemo/src/rule_model/components/term/primitive/ground.rs b/nemo/src/rule_model/components/term/primitive/ground.rs index 26f20fa03..a8a68415e 100644 --- a/nemo/src/rule_model/components/term/primitive/ground.rs +++ b/nemo/src/rule_model/components/term/primitive/ground.rs @@ -5,7 +5,9 @@ use std::{fmt::Display, hash::Hash}; use nemo_physical::datavalues::{AnyDataValue, IriDataValue}; use crate::rule_model::{ - components::ProgramComponent, error::ValidationError, origin::Origin, + components::ProgramComponent, + error::{ValidationError, ValidationErrorBuilder}, + origin::Origin, }; /// Primitive ground term @@ -116,7 +118,7 @@ impl ProgramComponent for GroundTerm { self } - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs index 861cb1b31..3209ee546 100644 --- a/nemo/src/rule_model/components/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -5,7 +5,10 @@ use std::fmt::Display; use existential::ExistentialVariable; use universal::UniversalVariable; -use crate::rule_model::{error::ValidationError, origin::Origin}; +use crate::rule_model::{ + error::{ValidationError, ValidationErrorBuilder}, + origin::Origin, +}; use super::ProgramComponent; @@ -117,16 +120,16 @@ impl ProgramComponent for Variable { } } - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { match &self { Variable::Universal(universal) => { - universal.validate()?; + universal.validate(builder)?; } Variable::Existential(existential) => { - existential.validate()?; + existential.validate(builder)?; } } diff --git a/nemo/src/rule_model/components/term/primitive/variable/existential.rs b/nemo/src/rule_model/components/term/primitive/variable/existential.rs index 683925c68..f5cdac4d9 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/existential.rs @@ -2,7 +2,11 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{components::ProgramComponent, error::ValidationError, origin::Origin}; +use crate::rule_model::{ + components::ProgramComponent, + error::{ValidationError, ValidationErrorBuilder}, + origin::Origin, +}; use super::VariableName; @@ -77,7 +81,7 @@ impl ProgramComponent for ExistentialVariable { self } - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/variable/universal.rs b/nemo/src/rule_model/components/term/primitive/variable/universal.rs index 8d7b98bbb..97b309ad0 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/universal.rs @@ -2,7 +2,11 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{components::ProgramComponent, error::ValidationError, origin::Origin}; +use crate::rule_model::{ + components::ProgramComponent, + error::{ValidationError, ValidationErrorBuilder}, + origin::Origin, +}; use super::VariableName; @@ -97,7 +101,7 @@ impl ProgramComponent for UniversalVariable { self } - fn validate(&self) -> Result<(), ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/tuple.rs b/nemo/src/rule_model/components/term/tuple.rs index d7b182a29..980074d48 100644 --- a/nemo/src/rule_model/components/term/tuple.rs +++ b/nemo/src/rule_model/components/term/tuple.rs @@ -4,6 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ components::{IterableVariables, ProgramComponent}, + error::ValidationErrorBuilder, origin::Origin, }; @@ -100,7 +101,7 @@ impl ProgramComponent for Tuple { self } - fn validate(&self) -> Result<(), crate::rule_model::error::ValidationError> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 31fe04a6e..37f79879b 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -1,21 +1,25 @@ -//! This module defines [ProgramValidationError] +//! This module defines different kinds of errors that can occur +//! while working with nemo programs. pub mod translation_error; pub mod validation_error; use std::fmt::Display; +use translation_error::TranslationErrorKind; use validation_error::ValidationErrorKind; -use super::components::ProgramComponent; +use crate::parser::{ast::ProgramAST, span::CharacterRange}; + +use super::origin::Origin; /// Error that occurs during validation of a program. #[derive(Debug)] pub struct ValidationError { /// The kind of error kind: ValidationErrorKind, - /// stack of components in which the error occurred - context: Vec>, // Just use (ComponentType, Origin) instead + /// Stack of [Origin] from which the original AST node can be derived + origin_stack: Vec, } impl Display for ValidationError { @@ -24,11 +28,101 @@ impl Display for ValidationError { } } -/// -#[derive(Debug)] +/// Builder for [ValidationError] +#[derive(Debug, Default)] pub struct ValidationErrorBuilder { - current_context: Vec>, + /// Current stack of [Origin] + origin_stack: Vec, + /// Current stack of [ValidationError]s + error_stack: Vec, +} + +impl ValidationErrorBuilder { + /// Push an [Origin] onto the stack. + pub fn push_origin(&mut self, origin: Origin) { + self.origin_stack.push(origin); + } + + /// Pop the origin stack. + pub fn pop_origin(&mut self) { + self.origin_stack.pop(); + } + + /// Add a new error. + pub fn report_error(&mut self, origin: &Origin, error_kind: ValidationErrorKind) { + let mut origin_stack = self.origin_stack.clone(); + origin_stack.push(origin.clone()); + + self.error_stack.push(ValidationError { + kind: error_kind, + origin_stack, + }) + } + + /// Finish building and return a list of [ValidationError]s. + pub fn finalize(self) -> Vec { + println!("Stack: {:?}", self.error_stack); + self.error_stack + } +} + +/// Error that occurs while translating the ast into the logical representation +#[derive(Debug, Copy, Clone)] +pub struct TranslationError { + /// The type of error that occurred + kind: TranslationErrorKind, + /// Range signifying the program part that should be highlighted + range: CharacterRange, } -// #[derive(Debug)] -// pub struct ProgramValidationErrors {} +impl TranslationError { + /// Create a new [TranslationError] from a given . + pub fn new<'a, Node: ProgramAST<'a>>(ast: &'a Node, kind: TranslationErrorKind) -> Self { + Self { + kind, + range: ast.span().range(), + } + } +} + +/// Error that may occur while translating or validating a nemo program +#[derive(Debug)] +pub enum ProgramError { + /// Error occurred while translating + /// the AST representation into the logical representation + TranslationError(TranslationError), + /// Error occurred while validating a certain program component + ValidationError(ValidationError), +} + +impl ProgramError { + /// Return the message of the error + pub fn message(&self) -> String { + match self { + ProgramError::TranslationError(error) => error.kind.to_string(), + ProgramError::ValidationError(error) => error.kind.to_string(), + } + } + + /// Return the [CharacterRange] associated with this error + pub fn range<'a, Node: ProgramAST<'a>>(&self, ast: &'a Node) -> CharacterRange { + match self { + ProgramError::TranslationError(error) => error.range, + ProgramError::ValidationError(error) => ast + .locate(&error.origin_stack) + .expect("invalid origin") + .span() + .range(), + } + } + + /// Return the error code of the message + pub fn error_code(&self) -> usize { + 123 + } + + /// Return an optional note that may be attached to the error + pub fn note(&self) -> Option { + None + } +} diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index fd6cfe90a..bc17f0d36 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -7,7 +7,6 @@ use crate::{io::parser::ast, rule_model::components::term::tuple::Tuple}; use super::{ components::{ atom::Atom, - base::Base, fact::Fact, import_export::{ExportDirective, ImportDirective}, literal::Literal, @@ -38,6 +37,31 @@ pub struct Program { outputs: Vec, } +/// Builder for [Program]s +#[derive(Debug, Default)] +pub struct ProgramBuilder { + /// Rules + rules: Vec, +} + +impl ProgramBuilder { + /// Finish building and return a [Program]. + pub fn finalize(self) -> Program { + Program { + imports: Vec::default(), + exports: Vec::default(), + rules: self.rules, + facts: Vec::default(), + outputs: Vec::default(), + } + } + + /// Add a [Rule]. + pub fn add_rule(&mut self, rule: Rule) { + self.rules.push(rule) + } +} + impl Program { /// Build a [Program] from an [ast::program::Program]. pub fn from_ast(ast_program: ast::program::Program) -> Self { diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs new file mode 100644 index 000000000..c16ca2da3 --- /dev/null +++ b/nemo/src/rule_model/translation.rs @@ -0,0 +1,172 @@ +//! This module defines [ASTProgramTranslation]. + +use std::ops::Range; + +use ariadne::{Color, Label, Report, ReportKind, Source}; + +use crate::{ + parser::ast, + rule_model::{components::rule::RuleBuilder, origin::Origin, program::ProgramBuilder}, +}; + +use super::{ + components::{atom::Atom, rule::Rule, ProgramComponent}, + error::{ProgramError, TranslationError, ValidationErrorBuilder}, + program::Program, +}; + +/// Object for handling the translation of the ast representation +/// of a nemo program into its logical representation +#[derive(Debug)] +pub struct ASTProgramTranslation<'a> { + /// Original input string + input: &'a str, + /// Label of the input file + input_label: String, + + /// Builder for [ValidationError]s + validation_error_builder: ValidationErrorBuilder, + + /// Errors + errors: Vec, +} + +impl<'a> ASTProgramTranslation<'a> { + /// Initialize the [ASTProgramTranslation] + pub fn initialize(input: &'a str, input_label: String) -> Self { + Self { + input, + input_label, + validation_error_builder: ValidationErrorBuilder::default(), + errors: Vec::default(), + } + } +} + +/// Report of all [ProgramError]s occurred +/// during the translation and validation of the AST +#[derive(Debug)] +pub struct ProgramErrorReport<'a> { + /// Original input string + input: &'a str, + /// Label of the input file + label: String, + + /// Errors + errors: Vec, +} + +impl<'a> ProgramErrorReport<'a> { + /// Print the given reports. + pub fn eprint<'s, ReportIterator>( + &'s self, + reports: ReportIterator, + ) -> Result<(), std::io::Error> + where + ReportIterator: Iterator)>>, + { + for report in reports { + report.eprint((self.label.clone(), Source::from(self.input)))?; + } + + Ok(()) + } + + /// Build a [Report] for each error. + pub fn build_reports( + &'a self, + ast: &'a ast::program::Program<'a>, + color_error: Color, + color_note: Color, + color_hint: Color, + ) -> impl Iterator)>> { + self.errors.iter().map(move |error| { + let range = error.range(ast); + + let report = Report::build(ReportKind::Error, self.label.clone(), range.start.offset) + .with_code(error.error_code()) + .with_message(error.message()) + .with_label( + Label::new((self.label.clone(), range.range())) + .with_message(error.message()) + .with_color(color_error), + ); + + report.finish() + }) + } +} + +impl<'a> ASTProgramTranslation<'a> { + /// Translate the given [ProgramAST] into a [Program]. + pub fn translate( + mut self, + ast: &ast::program::Program<'a>, + ) -> Result> { + let mut program_builder = ProgramBuilder::default(); + + for (statement_index, rule) in vec![ast.statements()].into_iter().enumerate() { + let origin = Origin::External(statement_index); + + match self.build_rule(origin, rule) { + Ok(new_rule) => program_builder.add_rule(new_rule), + Err(translation_error) => self + .errors + .push(ProgramError::TranslationError(translation_error)), + } + } + + self.errors.extend( + self.validation_error_builder + .finalize() + .into_iter() + .map(ProgramError::ValidationError), + ); + + if self.errors.is_empty() { + Ok(program_builder.finalize()) + } else { + Err(ProgramErrorReport { + input: self.input, + label: self.input_label, + errors: self.errors, + }) + } + } + + fn build_rule( + &mut self, + origin: Origin, + rule: &ast::rule::Rule<'a>, + ) -> Result { + self.validation_error_builder.push_origin(origin); + let mut rule_builder = RuleBuilder::default().origin(origin); + + let mut expression_counter: usize = 0; + for expression in rule.head() { + let origin_expression = Origin::External(expression_counter); + rule_builder.add_head_atom_mut(self.build_head_atom(origin_expression, expression)?); + + expression_counter += 1; + } + + let rule = rule_builder.finalize(); + + let _ = rule.validate(&mut self.validation_error_builder); + self.validation_error_builder.pop_origin(); + Ok(rule) + } + + fn build_head_atom( + &mut self, + origin: Origin, + head: &ast::expression::Expression<'a>, + ) -> Result { + self.validation_error_builder.push_origin(origin); + + let result = Atom::new("test", vec![]).set_origin(origin); + + self.validation_error_builder.pop_origin(); + Ok(result) + } +} From 78d2d0c855d3ee6ee63f913cd1f2b1d74a39583c Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 17 Jul 2024 01:31:25 +0200 Subject: [PATCH 127/214] Add basic error handling architecture --- nemo-cli/src/main.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 5e6d3ac8b..620d4376e 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -37,6 +37,7 @@ use nemo::{ meta::timing::{TimedCode, TimedDisplay}, model::{ExportDirective, Program}, parser::ParserErrorReport, + rule_model, }; /// Set exports according to command-line parameter. @@ -167,7 +168,7 @@ fn run(mut cli: CliApp) -> Result<(), Error> { filename: rules.to_string_lossy().to_string(), })?; - let program = + let program_ast = match nemo::parser::Parser::initialize(&rules_content, rules.to_string_lossy().to_string()) .parse() { @@ -178,6 +179,24 @@ fn run(mut cli: CliApp) -> Result<(), Error> { } }; + let program = match rule_model::translation::ASTProgramTranslation::initialize( + &rules_content, + rules.to_string_lossy().to_string(), + ) + .translate(&program_ast) + { + Ok(program) => program, + Err(report) => { + report.eprint(report.build_reports( + &program_ast, + Color::Red, + Color::Green, + Color::Green, + ))?; + std::process::exit(1); + } + }; + // let mut program = parse_program(rules_content)?; // let (ast, errors) = parse_program_str(&rules_content); From 29fc1369d5b72cc2b91561e29c79d68eecf9d82b Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 17 Jul 2024 01:39:52 +0200 Subject: [PATCH 128/214] Add constant to the parser --- nemo/src/parser/ast/expression.rs | 12 +- nemo/src/parser/ast/expression/basic.rs | 1 + nemo/src/parser/ast/expression/basic/blank.rs | 2 +- .../parser/ast/expression/basic/constant.rs | 107 ++++++++++++++++++ nemo/src/parser/context.rs | 3 + 5 files changed, 118 insertions(+), 7 deletions(-) create mode 100644 nemo/src/parser/ast/expression/basic/constant.rs diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index b3274a948..fb506c757 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -5,7 +5,7 @@ pub mod complex; pub mod sequence; use basic::{ - blank::Blank, boolean::Boolean, iri::Iri, number::Number, rdf_literal::RdfLiteral, + blank::Blank, boolean::Boolean, constant::Constant, number::Number, rdf_literal::RdfLiteral, string::StringLiteral, variable::Variable, }; use complex::{atom::Atom, tuple::Tuple}; @@ -29,8 +29,8 @@ pub enum Expression<'a> { Blank(Blank<'a>), /// Boolean Boolean(Boolean<'a>), - /// Iri - Iri(Iri<'a>), + /// Constant + Constant(Constant<'a>), /// Number Number(Number<'a>), /// Rdf literal @@ -49,7 +49,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Expression::Atom(expression) => expression.children(), Expression::Blank(expression) => expression.children(), Expression::Boolean(expression) => expression.children(), - Expression::Iri(expression) => expression.children(), + Expression::Constant(expression) => expression.children(), Expression::Number(expression) => expression.children(), Expression::RdfLiteral(expression) => expression.children(), Expression::String(expression) => expression.children(), @@ -63,7 +63,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Expression::Atom(expression) => expression.span(), Expression::Blank(expression) => expression.span(), Expression::Boolean(expression) => expression.span(), - Expression::Iri(expression) => expression.span(), + Expression::Constant(expression) => expression.span(), Expression::Number(expression) => expression.span(), Expression::RdfLiteral(expression) => expression.span(), Expression::String(expression) => expression.span(), @@ -83,7 +83,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { map(Tuple::parse, Self::Tuple), map(Blank::parse, Self::Blank), map(Boolean::parse, Self::Boolean), - map(Iri::parse, Self::Iri), + map(Constant::parse, Self::Constant), map(Number::parse, Self::Number), map(RdfLiteral::parse, Self::RdfLiteral), map(StringLiteral::parse, Self::String), diff --git a/nemo/src/parser/ast/expression/basic.rs b/nemo/src/parser/ast/expression/basic.rs index 6a3a710b3..fc40ad1fd 100644 --- a/nemo/src/parser/ast/expression/basic.rs +++ b/nemo/src/parser/ast/expression/basic.rs @@ -2,6 +2,7 @@ pub mod blank; pub mod boolean; +pub mod constant; pub mod iri; pub mod number; pub mod rdf_literal; diff --git a/nemo/src/parser/ast/expression/basic/blank.rs b/nemo/src/parser/ast/expression/basic/blank.rs index 04002b275..aa12b5219 100644 --- a/nemo/src/parser/ast/expression/basic/blank.rs +++ b/nemo/src/parser/ast/expression/basic/blank.rs @@ -1,4 +1,4 @@ -//! This module defines [Blank] +//! This module defines [Blank]. use nom::{branch::alt, sequence::pair}; diff --git a/nemo/src/parser/ast/expression/basic/constant.rs b/nemo/src/parser/ast/expression/basic/constant.rs new file mode 100644 index 000000000..b534f6a1f --- /dev/null +++ b/nemo/src/parser/ast/expression/basic/constant.rs @@ -0,0 +1,107 @@ +//! This module defines [Constant] + +use nom::{branch::alt, combinator::map}; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::iri::Iri; + +// Type of constants +#[derive(Debug)] +enum ConstantKind<'a> { + /// Plain constant + Plain(Token<'a>), + /// Iri constant + Iri(Iri<'a>), +} + +/// AST node representing a constant +#[derive(Debug)] +pub struct Constant<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// The constant + constant: ConstantKind<'a>, +} + +impl<'a> Constant<'a> { + /// Return the name of the constant. + pub fn name(&self) -> String { + match &self.constant { + ConstantKind::Plain(token) => token.to_string(), + ConstantKind::Iri(iri) => iri.content(), + } + } +} + +impl<'a> ProgramAST<'a> for Constant<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + ParserContext::Constant, + alt(( + map(Token::name, ConstantKind::Plain), + map(Iri::parse, ConstantKind::Iri), + )), + )(input) + .map(|(rest, constant)| { + let rest_span = rest.span; + + ( + rest, + Constant { + span: input_span.until_rest(&rest_span), + constant, + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::basic::constant::Constant, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_constant() { + let test = vec![ + ("abc", "abc".to_string()), + ("", "http://example.com".to_string()), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Constant::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.name()); + } + } +} diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index df6825fe4..c58c2995b 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -31,6 +31,9 @@ pub enum ParserContext { /// Blank node #[assoc(name = "blank")] Blank, + /// Constant + #[assoc(name = "constant")] + Constant, /// Boolean #[assoc(name = "boolean")] Boolean, From c2f486e1d65bf037df5fbd71fc44419f3d4e4940 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 17 Jul 2024 04:06:02 +0200 Subject: [PATCH 129/214] Start translation and validation (WIP) --- Cargo.lock | 7 + nemo-cli/src/example.rls | 1 + nemo-cli/src/main.rs | 53 +---- nemo-cli/src/sample.tao | 13 -- nemo/Cargo.toml | 1 + nemo/src/parser/ast.rs | 5 +- nemo/src/parser/ast/expression.rs | 25 ++- nemo/src/parser/ast/expression/basic/blank.rs | 34 ++-- .../parser/ast/expression/basic/boolean.rs | 8 +- .../parser/ast/expression/basic/constant.rs | 8 +- nemo/src/parser/ast/expression/basic/iri.rs | 8 +- .../src/parser/ast/expression/basic/number.rs | 8 +- .../ast/expression/basic/rdf_literal.rs | 8 +- .../src/parser/ast/expression/basic/string.rs | 8 +- .../parser/ast/expression/basic/variable.rs | 8 +- .../src/parser/ast/expression/complex/atom.rs | 8 +- .../parser/ast/expression/complex/tuple.rs | 8 +- nemo/src/parser/ast/program.rs | 8 +- nemo/src/parser/ast/rule.rs | 8 +- nemo/src/parser/ast/token.rs | 5 + nemo/src/rule_model/components/rule.rs | 58 ++++-- nemo/src/rule_model/error.rs | 59 ++++-- nemo/src/rule_model/error/hint.rs | 14 ++ .../src/rule_model/error/translation_error.rs | 27 ++- nemo/src/rule_model/error/validation_error.rs | 48 +++-- nemo/src/rule_model/translation.rs | 183 ++++++++++++++++-- 26 files changed, 470 insertions(+), 151 deletions(-) create mode 100644 nemo-cli/src/example.rls delete mode 100644 nemo-cli/src/sample.tao create mode 100644 nemo/src/rule_model/error/hint.rs diff --git a/Cargo.lock b/Cargo.lock index 3f7b40f70..745294dbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1505,6 +1505,7 @@ dependencies = [ "sanitise-file-name", "serde", "serde_json", + "similar-string", "strum", "strum_macros", "test-log", @@ -2444,6 +2445,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "similar-string" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3ac42455f28f7f9fc2ca816746b7143356f51ae195abb35d5bb4ac3808c7fa3" + [[package]] name = "slab" version = "0.4.9" diff --git a/nemo-cli/src/example.rls b/nemo-cli/src/example.rls new file mode 100644 index 000000000..bd9c18b8d --- /dev/null +++ b/nemo-cli/src/example.rls @@ -0,0 +1 @@ +abc(?x, ?test, ?testt) :- p(?x, ?test) . \ No newline at end of file diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 620d4376e..8cd65d6ab 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -187,16 +187,13 @@ fn run(mut cli: CliApp) -> Result<(), Error> { { Ok(program) => program, Err(report) => { - report.eprint(report.build_reports( - &program_ast, - Color::Red, - Color::Green, - Color::Green, - ))?; + report.eprint(report.build_reports(&program_ast, Color::Red))?; std::process::exit(1); } }; + std::process::exit(0); + // let mut program = parse_program(rules_content)?; // let (ast, errors) = parse_program_str(&rules_content); @@ -374,9 +371,6 @@ fn run(mut cli: CliApp) -> Result<(), Error> { fn main() { let cli = CliApp::parse(); - // test_error_message(); - // return; - cli.logging.initialize_logging(); log::info!("Version: {}", clap::crate_version!()); log::debug!("Rule files: {:?}", cli.rules); @@ -386,44 +380,3 @@ fn main() { std::process::exit(1) }) } - -fn test_error_message() { - use ariadne::{Color, ColorGenerator, Fmt, Label, Report, ReportKind, Source}; - - let mut colors = ColorGenerator::new(); - - // Generate & choose some colours for each of our elements - let a = colors.next(); - let b = colors.next(); - let out = Color::Fixed(81); - - Report::build(ReportKind::Error, "sample.tao", 12) - .with_code(3) - .with_message(format!("Incompatible types")) - .with_label( - Label::new(("sample.tao", 32..33)) - .with_message(format!("This is of type {}", "Nat".fg(a))) - .with_color(a), - ) - .with_label( - Label::new(("sample.tao", 42..45)) - .with_message(format!("This is of type {}", "Str".fg(b))) - .with_color(b), - ) - .with_label( - Label::new(("sample.tao", 11..48)) - .with_message(format!( - "The values are outputs of this {} expression", - "match".fg(out), - )) - .with_color(out), - ) - .with_note(format!( - "Outputs of {} expressions must coerce to the same type", - "match".fg(out) - )) - .with_help("Test") - .finish() - .print(("sample.tao", Source::from(include_str!("sample.tao")))) - .unwrap(); -} diff --git a/nemo-cli/src/sample.tao b/nemo-cli/src/sample.tao deleted file mode 100644 index 5cefbc6a4..000000000 --- a/nemo-cli/src/sample.tao +++ /dev/null @@ -1,13 +0,0 @@ -123.10 - -abc(?x, ?y) :- p(12, ?) . - -% def five = match () in { -% () => 5, -% () => "5", -% } -% -% def six = -% five -% + 1 -a(1, 2, ?). \ No newline at end of file diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 3f11c31a8..931f60544 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -53,6 +53,7 @@ enum-assoc = "1.1.0" ariadne = "0.4.1" strum = "0.26.3" strum_macros = "0.26.4" +similar-string = "1.4.3" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 332ae66e3..948d74157 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -8,7 +8,7 @@ pub mod token; use crate::rule_model::origin::Origin; -use super::{span::ProgramSpan, ParserInput, ParserResult}; +use super::{context::ParserContext, span::ProgramSpan, ParserInput, ParserResult}; /// Trait implemented by nodes in the abstract syntax tree pub trait ProgramAST<'a>: Sync { @@ -23,6 +23,9 @@ pub trait ProgramAST<'a>: Sync { where Self: Sized + 'a; + /// Return [ParserContext] indicating the type of node. + fn context(&self) -> ParserContext; + /// Locate a node from a stack of [Origin]s. fn locate(&'a self, origin_stack: &[Origin]) -> Option<&'a dyn ProgramAST<'a>> where diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index fb506c757..f82c98352 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -43,6 +43,25 @@ pub enum Expression<'a> { Variable(Variable<'a>), } +impl<'a> Expression<'a> { + /// Return the [ParserContext] of the underlying expression type. + pub fn context_type(&self) -> ParserContext { + match self { + Expression::Atom(expression) => expression.context(), + Expression::Blank(expression) => expression.context(), + Expression::Boolean(expression) => expression.context(), + Expression::Constant(expression) => expression.context(), + Expression::Number(expression) => expression.context(), + Expression::RdfLiteral(expression) => expression.context(), + Expression::String(expression) => expression.context(), + Expression::Tuple(expression) => expression.context(), + Expression::Variable(expression) => expression.context(), + } + } +} + +const CONTEXT: ParserContext = ParserContext::Expression; + impl<'a> ProgramAST<'a> for Expression<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { match self { @@ -77,7 +96,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Self: Sized + 'a, { context( - ParserContext::Expression, + CONTEXT, alt(( map(Atom::parse, Self::Atom), map(Tuple::parse, Self::Tuple), @@ -91,6 +110,10 @@ impl<'a> ProgramAST<'a> for Expression<'a> { )), )(input) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/basic/blank.rs b/nemo/src/parser/ast/expression/basic/blank.rs index aa12b5219..4139b0b07 100644 --- a/nemo/src/parser/ast/expression/basic/blank.rs +++ b/nemo/src/parser/ast/expression/basic/blank.rs @@ -32,6 +32,8 @@ impl<'a> Blank<'a> { } } +const CONTEXT: ParserContext = ParserContext::Blank; + impl<'a> ProgramAST<'a> for Blank<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { Vec::default() @@ -47,21 +49,23 @@ impl<'a> ProgramAST<'a> for Blank<'a> { { let input_span = input.span; - context( - ParserContext::Blank, - pair(Token::blank_node_prefix, Self::parse_name), - )(input) - .map(|(rest, (_, name))| { - let rest_span = rest.span; - - ( - rest, - Blank { - span: input_span.until_rest(&rest_span), - name, - }, - ) - }) + context(CONTEXT, pair(Token::blank_node_prefix, Self::parse_name))(input).map( + |(rest, (_, name))| { + let rest_span = rest.span; + + ( + rest, + Blank { + span: input_span.until_rest(&rest_span), + name, + }, + ) + }, + ) + } + + fn context(&self) -> ParserContext { + CONTEXT } } diff --git a/nemo/src/parser/ast/expression/basic/boolean.rs b/nemo/src/parser/ast/expression/basic/boolean.rs index f4f6dcd48..2caf3e9fb 100644 --- a/nemo/src/parser/ast/expression/basic/boolean.rs +++ b/nemo/src/parser/ast/expression/basic/boolean.rs @@ -54,6 +54,8 @@ impl<'a> Boolean<'a> { } } +const CONTEXT: ParserContext = ParserContext::Boolean; + impl<'a> ProgramAST<'a> for Boolean<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { Vec::default() @@ -69,7 +71,7 @@ impl<'a> ProgramAST<'a> for Boolean<'a> { { let input_span = input.span; - context(ParserContext::Boolean, Self::parse_boolean_value)(input).map(|(rest, value)| { + context(CONTEXT, Self::parse_boolean_value)(input).map(|(rest, value)| { let rest_span = rest.span; ( @@ -81,6 +83,10 @@ impl<'a> ProgramAST<'a> for Boolean<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/basic/constant.rs b/nemo/src/parser/ast/expression/basic/constant.rs index b534f6a1f..29633230c 100644 --- a/nemo/src/parser/ast/expression/basic/constant.rs +++ b/nemo/src/parser/ast/expression/basic/constant.rs @@ -41,6 +41,8 @@ impl<'a> Constant<'a> { } } +const CONTEXT: ParserContext = ParserContext::Constant; + impl<'a> ProgramAST<'a> for Constant<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { Vec::default() @@ -57,7 +59,7 @@ impl<'a> ProgramAST<'a> for Constant<'a> { let input_span = input.span; context( - ParserContext::Constant, + CONTEXT, alt(( map(Token::name, ConstantKind::Plain), map(Iri::parse, ConstantKind::Iri), @@ -75,6 +77,10 @@ impl<'a> ProgramAST<'a> for Constant<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/basic/iri.rs b/nemo/src/parser/ast/expression/basic/iri.rs index 6fd5d973a..dad5d68df 100644 --- a/nemo/src/parser/ast/expression/basic/iri.rs +++ b/nemo/src/parser/ast/expression/basic/iri.rs @@ -28,6 +28,8 @@ impl<'a> Iri<'a> { } } +const CONTEXT: ParserContext = ParserContext::Iri; + impl<'a> ProgramAST<'a> for Iri<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { Vec::default() @@ -44,7 +46,7 @@ impl<'a> ProgramAST<'a> for Iri<'a> { let input_span = input.span; context( - ParserContext::Iri, + CONTEXT, tuple((Token::open_chevrons, Token::iri, Token::closed_chevrons)), )(input) .map(|(rest, (_, content, _))| { @@ -59,6 +61,10 @@ impl<'a> ProgramAST<'a> for Iri<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/basic/number.rs b/nemo/src/parser/ast/expression/basic/number.rs index c384dde7c..01e724f5b 100644 --- a/nemo/src/parser/ast/expression/basic/number.rs +++ b/nemo/src/parser/ast/expression/basic/number.rs @@ -108,6 +108,8 @@ impl<'a> Number<'a> { } } +const CONTEXT: ParserContext = ParserContext::Number; + impl<'a> ProgramAST<'a> for Number<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { Vec::default() @@ -124,7 +126,7 @@ impl<'a> ProgramAST<'a> for Number<'a> { let input_span = input.span; context( - ParserContext::Number, + CONTEXT, tuple(( opt(Self::parse_sign), Self::parse_integer, @@ -151,6 +153,10 @@ impl<'a> ProgramAST<'a> for Number<'a> { }, ) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/basic/rdf_literal.rs b/nemo/src/parser/ast/expression/basic/rdf_literal.rs index 6abd71895..15e7a4242 100644 --- a/nemo/src/parser/ast/expression/basic/rdf_literal.rs +++ b/nemo/src/parser/ast/expression/basic/rdf_literal.rs @@ -43,6 +43,8 @@ impl<'a> RdfLiteral<'a> { } } +const CONTEXT: ParserContext = ParserContext::RdfLiteral; + impl<'a> ProgramAST<'a> for RdfLiteral<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { Vec::default() @@ -59,7 +61,7 @@ impl<'a> ProgramAST<'a> for RdfLiteral<'a> { let input_span = input.span; context( - ParserContext::RdfLiteral, + CONTEXT, tuple((Self::parse_content, Token::double_caret, Iri::parse)), )(input) .map(|(rest, (content, _, tag))| { @@ -75,6 +77,10 @@ impl<'a> ProgramAST<'a> for RdfLiteral<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs index 4e3a9b27d..74c62b1ee 100644 --- a/nemo/src/parser/ast/expression/basic/string.rs +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -49,6 +49,8 @@ impl<'a> StringLiteral<'a> { } } +const CONTEXT: ParserContext = ParserContext::String; + impl<'a> ProgramAST<'a> for StringLiteral<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { Vec::default() @@ -65,7 +67,7 @@ impl<'a> ProgramAST<'a> for StringLiteral<'a> { let input_span = input.span; context( - ParserContext::String, + CONTEXT, pair(Self::parse_string, opt(Self::parse_language_tag)), )(input) .map(|(rest, (content, language_tag))| { @@ -81,6 +83,10 @@ impl<'a> ProgramAST<'a> for StringLiteral<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/basic/variable.rs b/nemo/src/parser/ast/expression/basic/variable.rs index 402cc63ae..43354a5d4 100644 --- a/nemo/src/parser/ast/expression/basic/variable.rs +++ b/nemo/src/parser/ast/expression/basic/variable.rs @@ -75,6 +75,8 @@ impl<'a> Variable<'a> { } } +const CONTEXT: ParserContext = ParserContext::Variable; + impl<'a> ProgramAST<'a> for Variable<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { Vec::default() @@ -91,7 +93,7 @@ impl<'a> ProgramAST<'a> for Variable<'a> { let input_span = input.span; context( - ParserContext::Variable, + CONTEXT, pair(Self::parse_variable_prefix, opt(Self::parse_variable_name)), )(input) .map(|(rest, (kind, name))| { @@ -107,6 +109,10 @@ impl<'a> ProgramAST<'a> for Variable<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/complex/atom.rs b/nemo/src/parser/ast/expression/complex/atom.rs index 22168bea0..84a63ff95 100644 --- a/nemo/src/parser/ast/expression/complex/atom.rs +++ b/nemo/src/parser/ast/expression/complex/atom.rs @@ -40,6 +40,8 @@ impl<'a> Atom<'a> { } } +const CONTEXT: ParserContext = ParserContext::Atom; + impl<'a> ProgramAST<'a> for Atom<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { let mut result: Vec<&dyn ProgramAST> = vec![]; @@ -61,7 +63,7 @@ impl<'a> ProgramAST<'a> for Atom<'a> { let input_span = input.span; context( - ParserContext::Atom, + CONTEXT, pair( Tag::parse, delimited( @@ -84,6 +86,10 @@ impl<'a> ProgramAST<'a> for Atom<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/expression/complex/tuple.rs b/nemo/src/parser/ast/expression/complex/tuple.rs index 32dfede7a..e86396c50 100644 --- a/nemo/src/parser/ast/expression/complex/tuple.rs +++ b/nemo/src/parser/ast/expression/complex/tuple.rs @@ -32,6 +32,8 @@ impl<'a> Tuple<'a> { } } +const CONTEXT: ParserContext = ParserContext::Tuple; + impl<'a> ProgramAST<'a> for Tuple<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { let mut result: Vec<&dyn ProgramAST> = vec![]; @@ -53,7 +55,7 @@ impl<'a> ProgramAST<'a> for Tuple<'a> { let input_span = input.span; context( - ParserContext::Tuple, + CONTEXT, delimited( pair(Token::open_parenthesis, opt(Token::whitespace)), terminated( @@ -79,6 +81,10 @@ impl<'a> ProgramAST<'a> for Tuple<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index c459a9dcc..710d9d9d0 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -22,6 +22,8 @@ impl<'a> Program<'a> { } } +const CONTEXT: ParserContext = ParserContext::Program; + impl<'a> ProgramAST<'a> for Program<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { // TODO: Fix this once we have statements @@ -40,7 +42,11 @@ impl<'a> ProgramAST<'a> for Program<'a> { where Self: Sized + 'a, { - context(ParserContext::Program, Rule::parse)(input) + context(CONTEXT, Rule::parse)(input) .map(|(rest, result)| (rest, Program { statements: result })) } + + fn context(&self) -> ParserContext { + CONTEXT + } } diff --git a/nemo/src/parser/ast/rule.rs b/nemo/src/parser/ast/rule.rs index a607532f0..f55cc0307 100644 --- a/nemo/src/parser/ast/rule.rs +++ b/nemo/src/parser/ast/rule.rs @@ -42,6 +42,8 @@ impl<'a> Rule<'a> { } } +const CONTEXT: ParserContext = ParserContext::Rule; + impl<'a> ProgramAST<'a> for Rule<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { let mut result = Vec::<&dyn ProgramAST>::new(); @@ -64,7 +66,7 @@ impl<'a> ProgramAST<'a> for Rule<'a> { let input_span = input.span; context( - ParserContext::Rule, + CONTEXT, tuple(( ExpressionSequenceSimple::parse, tuple((opt(Token::whitespace), Token::arrow, opt(Token::whitespace))), @@ -85,6 +87,10 @@ impl<'a> ProgramAST<'a> for Rule<'a> { ) }) } + + fn context(&self) -> ParserContext { + CONTEXT + } } #[cfg(test)] diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 31b58f0a1..af6634dd1 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -197,6 +197,11 @@ macro_rules! string_token { } impl<'a> Token<'a> { + /// Return the [ProgramSpan] of this token. + pub fn span(&self) -> ProgramSpan<'a> { + self.span + } + /// Return the [TokenKind] of this token. pub fn kind(&self) -> TokenKind { self.kind diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index d7b6438f6..546583ba4 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -2,10 +2,10 @@ use std::{collections::HashSet, fmt::Display, hash::Hash}; -use nemo_physical::aggregates::operation; +use similar_string::find_best_similarity; use crate::rule_model::{ - error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + error::{hint::Hint, validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, }; @@ -184,19 +184,49 @@ impl ProgramComponent for Rule { Self: Sized, { let safe_variables = self.safe_variables(); - let head_variables = self - .head - .iter() - .flat_map(|atom| atom.variables()) - .collect::>(); - - for &head_variable in &head_variables { - if !safe_variables.contains(head_variable) { - builder.report_error( - head_variable.origin(), - ValidationErrorKind::HeadUnsafe(head_variable.clone()), - ); + + for atom in &self.head { + builder.push_origin(atom.origin().clone()); + + for term in atom.subterms() { + if let Term::Primitive(Primitive::Variable(head_variable)) = term { + if !safe_variables.contains(head_variable) { + let head_variable_name = head_variable + .name() + .expect("anonymous variables not allowed in the head"); + + let hint = if let Some(closest_option) = find_best_similarity( + head_variable_name.clone(), + &safe_variables + .iter() + .filter_map(|variable| variable.name()) + .collect::>(), + ) { + if head_variable_name.len() > 2 + && closest_option.0.len() > 2 + && closest_option.1 > 0.75 + { + vec![Hint::SimilarExists { + kind: "variable".to_string(), + name: closest_option.0, + }] + } else { + vec![] + } + } else { + vec![] + }; + + builder.report_error( + head_variable.origin(), + ValidationErrorKind::HeadUnsafe(head_variable.clone()), + hint, + ); + } + } } + + builder.pop_origin(); } Ok(()) diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 37f79879b..1ab31f50a 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -1,15 +1,20 @@ //! This module defines different kinds of errors that can occur //! while working with nemo programs. +pub mod hint; pub mod translation_error; pub mod validation_error; use std::fmt::Display; +use hint::Hint; use translation_error::TranslationErrorKind; use validation_error::ValidationErrorKind; -use crate::parser::{ast::ProgramAST, span::CharacterRange}; +use crate::parser::{ + ast::ProgramAST, + span::{CharacterRange, ProgramSpan}, +}; use super::origin::Origin; @@ -20,6 +25,8 @@ pub struct ValidationError { kind: ValidationErrorKind, /// Stack of [Origin] from which the original AST node can be derived origin_stack: Vec, + /// List of hints + hints: Vec, } impl Display for ValidationError { @@ -49,38 +56,46 @@ impl ValidationErrorBuilder { } /// Add a new error. - pub fn report_error(&mut self, origin: &Origin, error_kind: ValidationErrorKind) { + pub fn report_error( + &mut self, + origin: &Origin, + error_kind: ValidationErrorKind, + hints: Vec, + ) { let mut origin_stack = self.origin_stack.clone(); origin_stack.push(origin.clone()); self.error_stack.push(ValidationError { kind: error_kind, origin_stack, + hints, }) } /// Finish building and return a list of [ValidationError]s. pub fn finalize(self) -> Vec { - println!("Stack: {:?}", self.error_stack); self.error_stack } } /// Error that occurs while translating the ast into the logical representation -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TranslationError { /// The type of error that occurred kind: TranslationErrorKind, /// Range signifying the program part that should be highlighted range: CharacterRange, + /// List of hints + hints: Vec, } impl TranslationError { - /// Create a new [TranslationError] from a given . - pub fn new<'a, Node: ProgramAST<'a>>(ast: &'a Node, kind: TranslationErrorKind) -> Self { + /// Create a new [TranslationError] from a given [ProgramSPan]. + pub fn new<'a>(span: ProgramSpan<'a>, kind: TranslationErrorKind, hints: Vec) -> Self { Self { kind, - range: ast.span().range(), + range: span.range(), + hints, } } } @@ -96,7 +111,7 @@ pub enum ProgramError { } impl ProgramError { - /// Return the message of the error + /// Return the message of the error. pub fn message(&self) -> String { match self { ProgramError::TranslationError(error) => error.kind.to_string(), @@ -104,7 +119,7 @@ impl ProgramError { } } - /// Return the [CharacterRange] associated with this error + /// Return the [CharacterRange] associated with this error. pub fn range<'a, Node: ProgramAST<'a>>(&self, ast: &'a Node) -> CharacterRange { match self { ProgramError::TranslationError(error) => error.range, @@ -116,13 +131,31 @@ impl ProgramError { } } - /// Return the error code of the message + /// Return the error code of the message. pub fn error_code(&self) -> usize { - 123 + match self { + ProgramError::TranslationError(error) => error.kind.code(), + ProgramError::ValidationError(error) => error.kind.code(), + } } - /// Return an optional note that may be attached to the error + /// Return an optional note that may be attached to the error. pub fn note(&self) -> Option { - None + match self { + ProgramError::TranslationError(error) => error.kind.note(), + ProgramError::ValidationError(error) => error.kind.note(), + } + .map(|note| note.to_string()) + } + + /// Return a list of hints that fit the error message. + pub fn hints(&self) -> Vec { + match self { + ProgramError::TranslationError(error) => &error.hints, + ProgramError::ValidationError(error) => &error.hints, + } + .iter() + .map(|hint| hint.message().to_string()) + .collect() } } diff --git a/nemo/src/rule_model/error/hint.rs b/nemo/src/rule_model/error/hint.rs new file mode 100644 index 000000000..dc3e69f8b --- /dev/null +++ b/nemo/src/rule_model/error/hint.rs @@ -0,0 +1,14 @@ +//! This module defines [Hint] +#![allow(missing_docs)] + +use enum_assoc::Assoc; + +/// Hints for error messages +#[derive(Assoc, Debug)] +#[func(pub fn message(&self) -> String)] +pub enum Hint { + #[assoc(message = "unnamed universal variables may be expressed with an underscore `_`".to_string())] + AnonymousVariables, + #[assoc(message = format!("similar {} exists: `{}`", _kind, _name))] + SimilarExists { kind: String, name: String }, +} diff --git a/nemo/src/rule_model/error/translation_error.rs b/nemo/src/rule_model/error/translation_error.rs index cc342a0ba..f7e6db7b2 100644 --- a/nemo/src/rule_model/error/translation_error.rs +++ b/nemo/src/rule_model/error/translation_error.rs @@ -1,13 +1,34 @@ //! This module defines [TranslationErrorKind] +#![allow(missing_docs)] +use enum_assoc::Assoc; use thiserror::Error; /// Types of errors that occur /// while translating the ASP representation of a nemo program /// into its logical representation. -#[derive(Error, Debug, Copy, Clone)] +#[derive(Assoc, Error, Debug, Clone)] +#[func(pub fn note(&self) -> Option<&'static str>)] +#[func(pub fn code(&self) -> usize)] pub enum TranslationErrorKind { /// A negated atom was used in the head of a rule - #[error(r#"negated atom used in head"#)] - HeadAtomNegated, + #[error(r#"{0} used in rule head"#)] + #[assoc(note = "rule head must only use atoms")] + #[assoc(code = 101)] + HeadNonAtom(String), + /// An undefined prefix was used + #[error(r#"unknown prefix: `{0}`"#)] + #[assoc(note = "prefix must be defined using @prefix")] + #[assoc(code = 102)] + UnknownPrefix(String), + /// Unnamed non-anonymous variable + #[error(r#"unnamed variable"#)] + #[assoc(note = "variables starting with ? or ! must have a name")] + #[assoc(code = 103)] + UnnamedVariable, + /// Named non-anonymous variable + #[error(r#"anonymous variable with name: ``"#)] + #[assoc(note = "anonymous variables cannot have a name")] + #[assoc(code = 104)] + NamedAnonymous(String), } diff --git a/nemo/src/rule_model/error/validation_error.rs b/nemo/src/rule_model/error/validation_error.rs index 1861c15e4..cc0f1679c 100644 --- a/nemo/src/rule_model/error/validation_error.rs +++ b/nemo/src/rule_model/error/validation_error.rs @@ -1,5 +1,7 @@ //! This module defines [ValidationErrorKind]. +#![allow(missing_docs)] +use enum_assoc::Assoc; use thiserror::Error; use crate::rule_model::components::term::{ @@ -7,51 +9,71 @@ use crate::rule_model::components::term::{ }; /// Types of errors that occur while building the logical rule model -#[derive(Error, Debug)] +#[derive(Assoc, Error, Debug)] +#[func(pub fn note(&self) -> Option<&'static str>)] +#[func(pub fn code(&self) -> usize)] pub enum ValidationErrorKind { /// An existentially quantified variable occurs in the body of a rule. #[error(r#"existential variable used in rule body: `{0}`"#)] + #[assoc(code = 201)] BodyExistential(Variable), /// Unsafe variable used in the head of the rule. #[error(r#"unsafe variable used in rule head: `{0}`"#)] + #[assoc( + note = "every universal variable in the head must occur at a safe position in the body" + )] + #[assoc(code = 202)] HeadUnsafe(Variable), /// Anonymous variable used in the head of the rule. #[error(r#"anonymous variable used in rule head"#)] + #[assoc(code = 203)] HeadAnonymous, /// Operation with unsafe variable #[error(r#"unsafe variable used in computation: `{0}`"#)] + #[assoc(code = 204)] OperationUnsafe(Variable), /// Unsafe variable used in multiple negative literals #[error(r#"unsafe variable used in multiple negative literals: `{0}`"#)] + #[assoc(code = 205)] MultipleNegativeLiteralsUnsafe(Variable), /// Aggregate is used in body #[error(r#"aggregate used in rule body: `{0}`"#)] + #[assoc(code = 206)] BodyAggregate(Aggregate), - /// Unsupported feature: Multiple aggregates in one rule - #[error(r#"multiple aggregates in one rule is currently unsupported"#)] - AggregateMultiple, - /// Unsupported feature: Aggregates combined with existential rules - #[error(r#"aggregates and existential variables in one rule is currently unsupported"#)] - AggregatesAndExistentials, /// A variable is both universally and existentially quantified #[error(r#"variable is both universal and existential: `{0}`"#)] + #[assoc(code = 207)] VariableMultipleQuantifiers(String), /// Fact contains non-ground term #[error(r#"non-ground term used in fact: `{0}`"#)] + #[assoc(code = 208)] FactNonGround(Term), - /// Atom used without any arguments - #[error(r#"atoms without arguments are currently unsupported"#)] - AtomNoArguments, - /// Non-primitive terms are currently unsupported - #[error(r#"complex terms are currently unsupported"#)] - ComplexTerm, /// Invalid variable name was used + #[assoc(code = 209)] #[error(r#"variable name is invalid: `{0}`"#)] InvalidVariableName(String), /// Invalid tag was used + #[assoc(code = 210)] #[error(r#"tag is invalid: `{0}`"#)] InvalidTermTag(String), /// Invalid predicate name was used + #[assoc(code = 211)] #[error(r#"predicate name is invalid: `{0}"#)] InvalidPredicateName(String), + /// Unsupported feature: Multiple aggregates in one rule + #[error(r#"multiple aggregates in one rule is currently unsupported"#)] + #[assoc(code = 999)] + AggregateMultiple, + /// Unsupported feature: Aggregates combined with existential rules + #[error(r#"aggregates and existential variables in one rule is currently unsupported"#)] + #[assoc(code = 998)] + AggregatesAndExistentials, + /// Atom used without any arguments + #[assoc(code = 997)] + #[error(r#"atoms without arguments are currently unsupported"#)] + AtomNoArguments, + /// Non-primitive terms are currently unsupported + #[assoc(code = 996)] + #[error(r#"complex terms are currently unsupported"#)] + ComplexTerm, } diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index c16ca2da3..caf8149b3 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -1,17 +1,22 @@ //! This module defines [ASTProgramTranslation]. -use std::ops::Range; +use std::{collections::HashMap, ops::Range}; use ariadne::{Color, Label, Report, ReportKind, Source}; use crate::{ - parser::ast, - rule_model::{components::rule::RuleBuilder, origin::Origin, program::ProgramBuilder}, + parser::ast::{self, ProgramAST}, + rule_model::{ + components::rule::RuleBuilder, error::hint::Hint, origin::Origin, program::ProgramBuilder, + }, }; use super::{ - components::{atom::Atom, rule::Rule, ProgramComponent}, - error::{ProgramError, TranslationError, ValidationErrorBuilder}, + components::{atom::Atom, literal::Literal, rule::Rule, term::Term, ProgramComponent}, + error::{ + translation_error::TranslationErrorKind, ProgramError, TranslationError, + ValidationErrorBuilder, + }, program::Program, }; @@ -24,6 +29,11 @@ pub struct ASTProgramTranslation<'a> { /// Label of the input file input_label: String, + /// Prefix mapping + prefix_mapping: HashMap, + /// Base + base: Option, + /// Builder for [ValidationError]s validation_error_builder: ValidationErrorBuilder, @@ -37,6 +47,8 @@ impl<'a> ASTProgramTranslation<'a> { Self { input, input_label, + prefix_mapping: HashMap::new(), + base: None, validation_error_builder: ValidationErrorBuilder::default(), errors: Vec::default(), } @@ -77,20 +89,27 @@ impl<'a> ProgramErrorReport<'a> { &'a self, ast: &'a ast::program::Program<'a>, color_error: Color, - color_note: Color, - color_hint: Color, ) -> impl Iterator)>> { self.errors.iter().map(move |error| { let range = error.range(ast); - let report = Report::build(ReportKind::Error, self.label.clone(), range.start.offset) - .with_code(error.error_code()) - .with_message(error.message()) - .with_label( - Label::new((self.label.clone(), range.range())) - .with_message(error.message()) - .with_color(color_error), - ); + let mut report = + Report::build(ReportKind::Error, self.label.clone(), range.start.offset) + .with_code(error.error_code()) + .with_message(error.message()) + .with_label( + Label::new((self.label.clone(), range.range())) + .with_message(error.message()) + .with_color(color_error), + ); + if let Some(note) = error.note() { + report = report.with_note(note); + } + if !error.hints().is_empty() { + for hint in error.hints() { + report = report.with_help(hint); + } + } report.finish() }) @@ -150,13 +169,49 @@ impl<'a> ASTProgramTranslation<'a> { expression_counter += 1; } - let rule = rule_builder.finalize(); + for expression in rule.body() { + let origin_expression = Origin::External(expression_counter); + rule_builder + .add_body_literal_mut(self.build_body_literal(origin_expression, expression)?); + + expression_counter += 1; + } + + let rule = rule_builder.finalize().set_origin(origin); let _ = rule.validate(&mut self.validation_error_builder); self.validation_error_builder.pop_origin(); Ok(rule) } + fn build_body_literal( + &mut self, + origin: Origin, + head: &ast::expression::Expression<'a>, + ) -> Result { + self.validation_error_builder.push_origin(origin); + + let result = if let ast::expression::Expression::Atom(atom) = head { + let mut subterms = Vec::new(); + for (expression_index, expression) in atom.expressions().enumerate() { + let term_origin = Origin::External(expression_index); + subterms.push(self.build_inner_term(term_origin, expression)?); + } + + Literal::Positive(Atom::new(&self.resolve_tag(atom.tag())?, subterms)) + } else { + return Err(TranslationError::new( + head.span(), + TranslationErrorKind::HeadNonAtom(head.context_type().name().to_string()), + vec![], + )); + } + .set_origin(origin); + + self.validation_error_builder.pop_origin(); + Ok(result) + } + fn build_head_atom( &mut self, origin: Origin, @@ -164,9 +219,103 @@ impl<'a> ASTProgramTranslation<'a> { ) -> Result { self.validation_error_builder.push_origin(origin); - let result = Atom::new("test", vec![]).set_origin(origin); + let result = if let ast::expression::Expression::Atom(atom) = head { + let mut subterms = Vec::new(); + for (expression_index, expression) in atom.expressions().enumerate() { + let term_origin = Origin::External(expression_index); + subterms.push(self.build_inner_term(term_origin, expression)?); + } + + Atom::new(&self.resolve_tag(atom.tag())?, subterms) + } else { + return Err(TranslationError::new( + head.span(), + TranslationErrorKind::HeadNonAtom(head.context_type().name().to_string()), + vec![], + )); + } + .set_origin(origin); self.validation_error_builder.pop_origin(); Ok(result) } + + fn build_inner_term( + &self, + origin: Origin, + expression: &ast::expression::Expression, + ) -> Result { + Ok(match expression { + ast::expression::Expression::Atom(atom) => todo!(), + ast::expression::Expression::Blank(blank) => todo!(), + ast::expression::Expression::Boolean(boolean) => todo!(), + ast::expression::Expression::Constant(constant) => todo!(), + ast::expression::Expression::Number(number) => todo!(), + ast::expression::Expression::RdfLiteral(rdf_literal) => todo!(), + ast::expression::Expression::String(string) => todo!(), + ast::expression::Expression::Tuple(tuple) => todo!(), + ast::expression::Expression::Variable(variable) => match variable.kind() { + ast::expression::basic::variable::VariableType::Universal => { + if let Some(variable_name) = variable.name() { + Term::universal_variable(&variable_name) + } else { + return Err(TranslationError::new( + variable.span(), + TranslationErrorKind::UnnamedVariable, + vec![Hint::AnonymousVariables], + )); + } + } + ast::expression::basic::variable::VariableType::Existential => { + if let Some(variable_name) = variable.name() { + Term::existential_variable(&variable_name) + } else { + return Err(TranslationError::new( + variable.span(), + TranslationErrorKind::UnnamedVariable, + vec![], + )); + } + } + ast::expression::basic::variable::VariableType::Anonymous => { + if variable.name().is_none() { + Term::anonymous_variable() + } else { + return Err(TranslationError::new( + variable.span(), + TranslationErrorKind::NamedAnonymous(variable.span().0.to_string()), + vec![], + )); + } + } + }, + } + .set_origin(origin)) + } + + fn resolve_tag(&self, tag: &ast::tag::Tag<'a>) -> Result { + Ok(match tag { + ast::tag::Tag::Plain(token) => { + let token_string = token.to_string(); + + if let Some(base) = &self.base { + format!("{base}{token_string}") + } else { + token_string + } + } + ast::tag::Tag::Prefixed { prefix, tag } => { + if let Some(expanded_prefix) = self.prefix_mapping.get(&prefix.to_string()) { + format!("{expanded_prefix}{}", tag.to_string()) + } else { + return Err(TranslationError::new( + prefix.span(), + TranslationErrorKind::UnknownPrefix(prefix.to_string()), + vec![], + )); + } + } + ast::tag::Tag::Iri(iri) => iri.content(), + }) + } } From afd364caeae0306ba56be81e8be9e253b9eb939d Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Sun, 21 Jul 2024 15:05:44 +0200 Subject: [PATCH 130/214] Finish parser --- nemo/src/api.rs | 4 +- nemo/src/parser/ast.rs | 7 +- nemo/src/parser/ast/attribute.rs | 113 ++++ nemo/src/parser/ast/comment.rs | 7 + nemo/src/parser/ast/comment/closed.rs | 103 ++++ nemo/src/parser/ast/comment/doc.rs | 110 ++++ nemo/src/parser/ast/comment/line.rs | 103 ++++ nemo/src/parser/ast/comment/toplevel.rs | 111 ++++ nemo/src/parser/ast/comment/wsoc.rs | 99 ++++ nemo/src/parser/ast/directive.rs | 146 ++++++ nemo/src/parser/ast/directive/base.rs | 101 ++++ nemo/src/parser/ast/directive/declare.rs | 146 ++++++ nemo/src/parser/ast/directive/export.rs | 135 +++++ nemo/src/parser/ast/directive/import.rs | 135 +++++ nemo/src/parser/ast/directive/output.rs | 101 ++++ nemo/src/parser/ast/directive/prefix.rs | 119 +++++ nemo/src/parser/ast/directive/unknown.rs | 112 ++++ nemo/src/parser/ast/expression.rs | 147 ++++-- nemo/src/parser/ast/expression/basic/blank.rs | 32 +- .../parser/ast/expression/basic/boolean.rs | 5 +- .../parser/ast/expression/basic/constant.rs | 2 +- nemo/src/parser/ast/expression/basic/iri.rs | 8 +- .../src/parser/ast/expression/basic/number.rs | 38 +- .../ast/expression/basic/rdf_literal.rs | 8 +- .../src/parser/ast/expression/basic/string.rs | 7 +- .../parser/ast/expression/basic/variable.rs | 2 +- nemo/src/parser/ast/expression/complex.rs | 6 + .../ast/expression/complex/aggregation.rs | 151 ++++++ .../ast/expression/complex/arithmetic.rs | 364 +++++++++++++ .../src/parser/ast/expression/complex/atom.rs | 31 +- .../parser/ast/expression/complex/infix.rs | 190 +++++++ nemo/src/parser/ast/expression/complex/map.rs | 143 +++++ .../parser/ast/expression/complex/negation.rs | 88 ++++ .../ast/expression/complex/operation.rs | 140 +++++ .../parser/ast/expression/complex/tuple.rs | 24 +- .../ast/expression/sequence/key_value.rs | 1 - nemo/src/parser/ast/program.rs | 111 +++- nemo/src/parser/ast/rule.rs | 39 +- .../parser/ast/{expression => }/sequence.rs | 1 + nemo/src/parser/ast/sequence/declare.rs | 130 +++++ nemo/src/parser/ast/sequence/key_value.rs | 114 ++++ .../ast/{expression => }/sequence/one.rs | 23 +- .../ast/{expression => }/sequence/simple.rs | 17 +- nemo/src/parser/ast/statement.rs | 165 ++++++ nemo/src/parser/ast/tag.rs | 74 +-- nemo/src/parser/ast/tag/aggregation.rs | 122 +++++ nemo/src/parser/ast/tag/datatype.rs | 118 +++++ nemo/src/parser/ast/tag/operation.rs | 117 +++++ nemo/src/parser/ast/tag/parameter.rs | 110 ++++ nemo/src/parser/ast/tag/structure.rs | 131 +++++ nemo/src/parser/ast/token.rs | 61 ++- nemo/src/parser/context.rs | 71 ++- nemo/src/parser/span.rs | 14 + nemo/src/rule_model/components.rs | 1 + nemo/src/rule_model/components/datatype.rs | 27 + .../components/import_export/attributes.rs | 5 +- .../components/import_export/compression.rs | 1 - .../components/import_export/file_formats.rs | 1 - .../rule_model/components/term/aggregate.rs | 1 - .../rule_model/components/term/operation.rs | 5 - .../term/operation/operation_kind.rs | 56 +- nemo/src/rule_model/program.rs | 494 +++++++++--------- nemo/src/rule_model/syntax.rs | 1 + nemo/src/rule_model/syntax/builtins.rs | 2 +- nemo/src/rule_model/syntax/datatypes.rs | 12 + nemo/src/rule_model/syntax/import_export.rs | 1 - .../syntax/import_export/value_formats.rs | 22 - nemo/src/rule_model/translation.rs | 75 +-- 68 files changed, 4577 insertions(+), 584 deletions(-) create mode 100644 nemo/src/parser/ast/attribute.rs create mode 100644 nemo/src/parser/ast/comment.rs create mode 100644 nemo/src/parser/ast/comment/closed.rs create mode 100644 nemo/src/parser/ast/comment/doc.rs create mode 100644 nemo/src/parser/ast/comment/line.rs create mode 100644 nemo/src/parser/ast/comment/toplevel.rs create mode 100644 nemo/src/parser/ast/comment/wsoc.rs create mode 100644 nemo/src/parser/ast/directive.rs create mode 100644 nemo/src/parser/ast/directive/base.rs create mode 100644 nemo/src/parser/ast/directive/declare.rs create mode 100644 nemo/src/parser/ast/directive/export.rs create mode 100644 nemo/src/parser/ast/directive/import.rs create mode 100644 nemo/src/parser/ast/directive/output.rs create mode 100644 nemo/src/parser/ast/directive/prefix.rs create mode 100644 nemo/src/parser/ast/directive/unknown.rs create mode 100644 nemo/src/parser/ast/expression/complex/aggregation.rs create mode 100644 nemo/src/parser/ast/expression/complex/arithmetic.rs create mode 100644 nemo/src/parser/ast/expression/complex/infix.rs create mode 100644 nemo/src/parser/ast/expression/complex/map.rs create mode 100644 nemo/src/parser/ast/expression/complex/negation.rs create mode 100644 nemo/src/parser/ast/expression/complex/operation.rs delete mode 100644 nemo/src/parser/ast/expression/sequence/key_value.rs rename nemo/src/parser/ast/{expression => }/sequence.rs (87%) create mode 100644 nemo/src/parser/ast/sequence/declare.rs create mode 100644 nemo/src/parser/ast/sequence/key_value.rs rename nemo/src/parser/ast/{expression => }/sequence/one.rs (82%) rename nemo/src/parser/ast/{expression => }/sequence/simple.rs (82%) create mode 100644 nemo/src/parser/ast/statement.rs create mode 100644 nemo/src/parser/ast/tag/aggregation.rs create mode 100644 nemo/src/parser/ast/tag/datatype.rs create mode 100644 nemo/src/parser/ast/tag/operation.rs create mode 100644 nemo/src/parser/ast/tag/parameter.rs create mode 100644 nemo/src/parser/ast/tag/structure.rs create mode 100644 nemo/src/rule_model/components/datatype.rs create mode 100644 nemo/src/rule_model/syntax/datatypes.rs delete mode 100644 nemo/src/rule_model/syntax/import_export/value_formats.rs diff --git a/nemo/src/api.rs b/nemo/src/api.rs index ef5640814..de768364f 100644 --- a/nemo/src/api.rs +++ b/nemo/src/api.rs @@ -52,8 +52,8 @@ pub fn load(file: PathBuf) -> Result { /// # Error /// Returns an appropriate [Error] variant on parsing and feature check issues. pub fn load_string(input: String) -> Result { - let (ast, _errors) = parse_program_str(&input); - let _program = Program::from_ast(ast); + // let (ast, _errors) = parse_program_str(&input); + // let _program = Program::from_ast(ast); todo!("ExecutionEngine has to use the new rule model") // ExecutionEngine::initialize(&program, ImportManager::new(ResourceProviders::default())) } diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 948d74157..4da79a46a 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -1,8 +1,13 @@ //! This module defines the abstract syntax tree representation of a nemo program. +pub mod attribute; +pub mod comment; +pub mod directive; pub mod expression; pub mod program; pub mod rule; +pub mod sequence; +pub mod statement; pub mod tag; pub mod token; @@ -16,7 +21,7 @@ pub trait ProgramAST<'a>: Sync { fn children(&self) -> Vec<&dyn ProgramAST>; /// Return the region of text this node originates from. - fn span(&self) -> ProgramSpan; + fn span(&self) -> ProgramSpan<'a>; /// Parse the given input into this type of node fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> diff --git a/nemo/src/parser/ast/attribute.rs b/nemo/src/parser/ast/attribute.rs new file mode 100644 index 000000000..50799b8df --- /dev/null +++ b/nemo/src/parser/ast/attribute.rs @@ -0,0 +1,113 @@ +//! This module defines [Attribute]. + +use nom::{ + character::complete::newline, + sequence::{delimited, pair, terminated, tuple}, +}; + +use crate::parser::{ + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::{comment::wsoc::WSoC, expression::complex::atom::Atom, token::Token, ProgramAST}; + +/// Attribute of a rule +#[derive(Debug)] +pub struct Attribute<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// [Atom] containing the content of the directive + content: Atom<'a>, +} + +impl<'a> Attribute<'a> { + /// Return the [Atom] that contains the content of the attribute. + pub fn content(&self) -> &Atom<'a> { + &self.content + } +} + +const CONTEXT: ParserContext = ParserContext::Attribute; + +impl<'a> ProgramAST<'a> for Attribute<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![self.content()] + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + terminated( + delimited( + tuple((Token::hash, Token::open_bracket, WSoC::parse)), + Atom::parse, + pair(WSoC::parse, Token::closed_bracket), + ), + newline, + ), + )(input) + .map(|(rest, content)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + content, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{attribute::Attribute, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_attribute() { + let test = vec![ + ("#[test(1, 2, 3)]\n", ("test".to_string(), 3)), + ("#[ abc(1) ]\n", ("abc".to_string(), 1)), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Attribute::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + ( + result.1.content.tag().to_string(), + result.1.content.expressions().count() + ) + ); + } + } +} diff --git a/nemo/src/parser/ast/comment.rs b/nemo/src/parser/ast/comment.rs new file mode 100644 index 000000000..4acc21e80 --- /dev/null +++ b/nemo/src/parser/ast/comment.rs @@ -0,0 +1,7 @@ +//! This module defines the ways to provide comments to certain syntax elements. + +pub mod closed; +pub mod doc; +pub mod line; +pub mod toplevel; +pub mod wsoc; diff --git a/nemo/src/parser/ast/comment/closed.rs b/nemo/src/parser/ast/comment/closed.rs new file mode 100644 index 000000000..854609ef7 --- /dev/null +++ b/nemo/src/parser/ast/comment/closed.rs @@ -0,0 +1,103 @@ +//! This module defines [ClosedComment]. + +use nom::{bytes::complete::take_until, sequence::delimited}; + +use crate::parser::{ + ast::{ + token::{Token, TokenKind}, + ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Closed comment +#[derive(Debug)] +pub struct ClosedComment<'a> { + /// [ProgramSpan] associated with this comment + span: ProgramSpan<'a>, + + /// Part of the comment that contains the content + content: ProgramSpan<'a>, +} + +const CONTEXT: ParserContext = ParserContext::Comment; + +impl<'a> ClosedComment<'a> { + /// Return the content of the comment + pub fn content(&self) -> String { + self.content.0.to_string() + } +} + +impl<'a> ProgramAST<'a> for ClosedComment<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + delimited( + Token::open_comment, + take_until(TokenKind::CloseComment.name()), + Token::close_comment, + ), + )(input) + .map(|(rest, content)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + content: content.span, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{comment::closed::ClosedComment, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_closed_comment() { + let test = vec![ + ("/*my comment*/", "my comment".to_string()), + ("/* my comment */", " my comment ".to_string()), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(ClosedComment::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.content()); + } + } +} diff --git a/nemo/src/parser/ast/comment/doc.rs b/nemo/src/parser/ast/comment/doc.rs new file mode 100644 index 000000000..bfab5ca89 --- /dev/null +++ b/nemo/src/parser/ast/comment/doc.rs @@ -0,0 +1,110 @@ +//! This module defines [DocComment]. + +use nom::{ + character::complete::{line_ending, not_line_ending}, + combinator::opt, + multi::separated_list1, + sequence::{pair, preceded}, +}; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Doc comment that is attached to e.g. rules +#[derive(Debug)] +pub struct DocComment<'a> { + /// [ProgramSpan] associated with this comment + span: ProgramSpan<'a>, + + /// Each line of the comment + content: Vec>, +} + +impl<'a> DocComment<'a> { + /// Return the content of the comment + pub fn content(&self) -> Vec { + self.content + .iter() + .map(|comment| comment.0.to_string()) + .collect() + } +} +const CONTEXT: ParserContext = ParserContext::DocComment; + +impl<'a> ProgramAST<'a> for DocComment<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + separated_list1( + line_ending, + preceded( + pair(Token::doc_comment, opt(Token::whitespace)), + not_line_ending, + ), + ), + )(input) + .map(|(rest, result)| { + let rest_span = rest.span; + let content = result.into_iter().map(|comment| comment.span).collect(); + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + content, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{comment::doc::DocComment, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_doc_comment() { + let test = vec![ + ("/// my comment", 1), + ("///my comment\r\n/// my other comment", 2), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(DocComment::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.content().len()); + } + } +} diff --git a/nemo/src/parser/ast/comment/line.rs b/nemo/src/parser/ast/comment/line.rs new file mode 100644 index 000000000..aa80e95c3 --- /dev/null +++ b/nemo/src/parser/ast/comment/line.rs @@ -0,0 +1,103 @@ +//! This module defines [LineComment]. + +use nom::{ + character::complete::not_line_ending, + combinator::opt, + sequence::{pair, preceded}, +}; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Line comment +#[derive(Debug)] +pub struct LineComment<'a> { + /// [ProgramSpan] associated with this comment + span: ProgramSpan<'a>, + + /// Part of the comment that contains the content + content: ProgramSpan<'a>, +} + +const CONTEXT: ParserContext = ParserContext::Comment; + +impl<'a> LineComment<'a> { + /// Return the content of the comment + pub fn content(&self) -> String { + self.content.0.to_string() + } +} + +impl<'a> ProgramAST<'a> for LineComment<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + preceded( + pair(Token::comment, opt(Token::whitespace)), + not_line_ending, + ), + )(input) + .map(|(rest, content)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + content: content.span, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{comment::line::LineComment, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_line_comment() { + let test = vec![ + ("// my comment", "my comment".to_string()), + ("//my comment", "my comment".to_string()), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(LineComment::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.content()); + } + } +} diff --git a/nemo/src/parser/ast/comment/toplevel.rs b/nemo/src/parser/ast/comment/toplevel.rs new file mode 100644 index 000000000..469582210 --- /dev/null +++ b/nemo/src/parser/ast/comment/toplevel.rs @@ -0,0 +1,111 @@ +//! This module defines [TopLevelComment]. + +use nom::{ + character::complete::{line_ending, not_line_ending}, + combinator::opt, + multi::separated_list1, + sequence::{pair, preceded}, +}; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Doc comment that is attached to e.g. rules +#[derive(Debug)] +pub struct TopLevelComment<'a> { + /// [ProgramSpan] associated with this comment + span: ProgramSpan<'a>, + + /// Each line of the comment + content: Vec>, +} + +const CONTEXT: ParserContext = ParserContext::TopLevelComment; + +impl<'a> TopLevelComment<'a> { + /// Return the content of the comment + pub fn content(&self) -> Vec { + self.content + .iter() + .map(|comment| comment.0.to_string()) + .collect() + } +} + +impl<'a> ProgramAST<'a> for TopLevelComment<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + separated_list1( + line_ending, + preceded( + pair(Token::toplevel_comment, opt(Token::whitespace)), + not_line_ending, + ), + ), + )(input) + .map(|(rest, result)| { + let rest_span = rest.span; + let content = result.into_iter().map(|result| result.span).collect(); + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + content, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{comment::toplevel::TopLevelComment, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_toplevel_comment() { + let test = vec![ + ("//! my comment", 1), + ("//!my comment\r\n//! my other comment", 2), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(TopLevelComment::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.content().len()); + } + } +} diff --git a/nemo/src/parser/ast/comment/wsoc.rs b/nemo/src/parser/ast/comment/wsoc.rs new file mode 100644 index 000000000..2d3342452 --- /dev/null +++ b/nemo/src/parser/ast/comment/wsoc.rs @@ -0,0 +1,99 @@ +//! This module defines [WSoC]. + +use nom::{ + branch::alt, + combinator::{map, opt}, + multi::many0, + sequence::preceded, +}; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::{closed::ClosedComment, line::LineComment}; + +/// Type of comment that can appear in any "whit-space position" +#[derive(Debug)] +pub enum WhiteSpaceComment<'a> { + /// Line comment + Line(LineComment<'a>), + /// Closed comment + Closed(ClosedComment<'a>), +} + +/// Represents a series of whitespaces or comments +#[derive(Debug)] +pub struct WSoC<'a> { + /// [ProgramSpan] associated with this comment + _span: ProgramSpan<'a>, + /// comments + comments: Vec>, +} + +impl<'a> WSoC<'a> { + /// Return comments contained within this object. + pub fn comments(&self) -> &Vec> { + &self.comments + } + + /// Parse one or more white-spaces optionally followed by a comment. + pub fn parse_whitespace_comment( + input: ParserInput<'a>, + ) -> ParserResult<'a, Option> { + preceded( + Token::whitespace, + opt(alt(( + map(LineComment::parse, WhiteSpaceComment::Line), + map(ClosedComment::parse, WhiteSpaceComment::Closed), + ))), + )(input) + } + + /// Parse whitespace or comments. + pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + let input_span = input.span; + + many0(Self::parse_whitespace_comment)(input).map(|(rest, comments)| { + let rest_span = rest.span; + + ( + rest, + Self { + _span: input_span.until_rest(&rest_span), + comments: comments.into_iter().filter_map(|comment| comment).collect(), + }, + ) + }) + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ast::comment::wsoc::WSoC, input::ParserInput, ParserState}; + + #[test] + fn parse_wsoc() { + let test = vec![ + ("", 0), + (" \n ", 0), + (" // my comment \n // Another comment \n ", 2), + (" /* a comment */", 1), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(WSoC::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.comments().len()); + } + } +} diff --git a/nemo/src/parser/ast/directive.rs b/nemo/src/parser/ast/directive.rs new file mode 100644 index 000000000..7b49decc9 --- /dev/null +++ b/nemo/src/parser/ast/directive.rs @@ -0,0 +1,146 @@ +//! This module defines [Directive]s. + +use base::Base; +use declare::Declare; +use export::Export; +use import::Import; +use nom::{branch::alt, combinator::map}; +use output::Output; +use prefix::Prefix; +use unknown::UnknownDirective; + +use crate::parser::{ + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::ProgramAST; + +pub mod base; +pub mod declare; +pub mod export; +pub mod import; +pub mod output; +pub mod prefix; +pub mod unknown; + +/// Type of directives +#[derive(Debug)] +pub enum Directive<'a> { + /// Base + Base(Base<'a>), + /// Declare + Declare(Declare<'a>), + /// Export + Export(Export<'a>), + /// Import + Import(Import<'a>), + /// Output + Output(Output<'a>), + /// Prefix + Prefix(Prefix<'a>), + /// Unknown + Unknown(UnknownDirective<'a>), +} + +impl<'a> Directive<'a> { + /// Return the context of the underlying directive. + pub fn context_type(&self) -> ParserContext { + match self { + Directive::Base(directive) => directive.context(), + Directive::Declare(directive) => directive.context(), + Directive::Export(directive) => directive.context(), + Directive::Import(directive) => directive.context(), + Directive::Output(directive) => directive.context(), + Directive::Prefix(directive) => directive.context(), + Directive::Unknown(directive) => directive.context(), + } + } +} + +const CONTEXT: ParserContext = ParserContext::Directive; + +impl<'a> ProgramAST<'a> for Directive<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![match self { + Directive::Base(directive) => directive, + Directive::Declare(directive) => directive, + Directive::Export(directive) => directive, + Directive::Import(directive) => directive, + Directive::Output(directive) => directive, + Directive::Prefix(directive) => directive, + Directive::Unknown(directive) => directive, + }] + } + + fn span(&self) -> ProgramSpan<'a> { + match self { + Directive::Base(directive) => directive.span(), + Directive::Declare(directive) => directive.span(), + Directive::Export(directive) => directive.span(), + Directive::Import(directive) => directive.span(), + Directive::Output(directive) => directive.span(), + Directive::Prefix(directive) => directive.span(), + Directive::Unknown(directive) => directive.span(), + } + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + context( + CONTEXT, + alt(( + map(Base::parse, Directive::Base), + map(Declare::parse, Directive::Declare), + map(Export::parse, Directive::Export), + map(Import::parse, Directive::Import), + map(Output::parse, Directive::Output), + map(Prefix::parse, Directive::Prefix), + map(UnknownDirective::parse, Directive::Unknown), + )), + )(input) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{directive::Directive, ProgramAST}, + context::ParserContext, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_directive() { + let test = vec![ + ("@base ", ParserContext::Base), + ("@declare test(a:int)", ParserContext::Declare), + ("@export test :- csv {}", ParserContext::Export), + ("@import test :- csv {}", ParserContext::Import), + ("@output test", ParserContext::Output), + ("@prefix test: ", ParserContext::Prefix), + ("@test something", ParserContext::UnknownDirective), + ]; + + for (input, expect) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Directive::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(result.1.context_type(), expect); + } + } +} diff --git a/nemo/src/parser/ast/directive/base.rs b/nemo/src/parser/ast/directive/base.rs new file mode 100644 index 000000000..1448ddb1a --- /dev/null +++ b/nemo/src/parser/ast/directive/base.rs @@ -0,0 +1,101 @@ +//! This module defines the [Base] directive. + +use nom::sequence::{preceded, tuple}; + +use crate::parser::{ + ast::{comment::wsoc::WSoC, expression::basic::iri::Iri, token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Base directive, indicating a global prefix +#[derive(Debug)] +pub struct Base<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// The global prefix + iri: Iri<'a>, +} + +impl<'a> Base<'a> { + /// Return the base iri. + pub fn iri(&self) -> &Iri<'a> { + &self.iri + } +} + +const CONTEXT: ParserContext = ParserContext::Base; + +impl<'a> ProgramAST<'a> for Base<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![&self.iri] + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + preceded( + tuple(( + Token::at, + Token::directive_base, + WSoC::parse_whitespace_comment, + WSoC::parse, + )), + Iri::parse, + ), + )(input) + .map(|(rest, iri)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + iri, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{directive::base::Base, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_base() { + let test = vec![("@base ", "test".to_string())]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Base::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.iri().content()); + } + } +} diff --git a/nemo/src/parser/ast/directive/declare.rs b/nemo/src/parser/ast/directive/declare.rs new file mode 100644 index 000000000..09618c600 --- /dev/null +++ b/nemo/src/parser/ast/directive/declare.rs @@ -0,0 +1,146 @@ +//! This module defines the [Declare] directive. + +use nom::sequence::{delimited, pair, preceded, tuple}; + +use crate::{ + parser::{ + ast::{ + comment::wsoc::WSoC, + sequence::declare::DeclareSequence, + tag::{parameter::Parameter, structure::StructureTag}, + token::Token, + ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, + }, + rule_model::components::datatype::DataType, +}; + +/// Declare directive, associating atom positions with names and data types +#[derive(Debug)] +pub struct Declare<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Predicate this statement applies to + predicate: StructureTag<'a>, + /// The declaration + declaration: DeclareSequence<'a>, +} + +impl<'a> Declare<'a> { + /// Return the predicate this statement applies to. + pub fn predicate(&self) -> &StructureTag<'a> { + &self.predicate + } + + /// Return an iterator over the name-type pairs. + pub fn name_type_pairs(&self) -> impl Iterator + '_ { + self.declaration + .iter() + .map(|(parameter_name, tag_datatype)| { + (parameter_name.parameter().clone(), tag_datatype.data_type()) + }) + } +} + +const CONTEXT: ParserContext = ParserContext::Declare; + +impl<'a> ProgramAST<'a> for Declare<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + let mut result = Vec::<&dyn ProgramAST>::new(); + result.push(&self.predicate); + + for (parameter, data_type) in self.declaration.iter() { + result.push(parameter); + result.push(data_type); + } + + result + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + preceded( + tuple(( + Token::at, + Token::directive_declare, + WSoC::parse_whitespace_comment, + WSoC::parse, + )), + pair( + StructureTag::parse, + delimited( + tuple((WSoC::parse, Token::open_parenthesis, WSoC::parse)), + DeclareSequence::parse, + tuple((WSoC::parse, Token::closed_parenthesis, WSoC::parse)), + ), + ), + ), + )(input) + .map(|(rest, (predicate, declaration))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + predicate, + declaration, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{directive::declare::Declare, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_declare() { + let test = vec![( + "@declare test(_: any, a: int, _: float, b: string)", + ("test".to_string(), 4), + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Declare::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + ( + result.1.predicate().to_string(), + result.1.name_type_pairs().count() + ) + ); + } + } +} diff --git a/nemo/src/parser/ast/directive/export.rs b/nemo/src/parser/ast/directive/export.rs new file mode 100644 index 000000000..a7d200221 --- /dev/null +++ b/nemo/src/parser/ast/directive/export.rs @@ -0,0 +1,135 @@ +//! This module defines the [Export] directive. + +use nom::sequence::{preceded, separated_pair, tuple}; + +use crate::parser::{ + ast::{ + comment::wsoc::WSoC, expression::complex::map::Map, tag::structure::StructureTag, + token::Token, ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Export directive +#[derive(Debug)] +pub struct Export<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Predicate that is being exported + predicate: StructureTag<'a>, + /// Map of instructions + instructions: Map<'a>, +} + +impl<'a> Export<'a> { + /// Return the predicate. + pub fn predicate(&self) -> &StructureTag<'a> { + &self.predicate + } + + /// Return the instructions. + pub fn instructions(&self) -> &Map<'a> { + &self.instructions + } + + /// Parse the left part of the export directive. + fn parse_left_part(input: ParserInput<'a>) -> ParserResult<'a, StructureTag<'a>> { + preceded( + tuple(( + Token::at, + Token::directive_export, + WSoC::parse_whitespace_comment, + WSoC::parse, + )), + StructureTag::parse, + )(input) + } + + /// Parse the right part of the export directive. + fn parse_right_part(input: ParserInput<'a>) -> ParserResult<'a, Map<'a>> { + Map::parse(input) + } +} + +const CONTEXT: ParserContext = ParserContext::Export; + +impl<'a> ProgramAST<'a> for Export<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![&self.predicate, &self.instructions] + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + separated_pair( + Self::parse_left_part, + tuple((WSoC::parse, Token::arrow, WSoC::parse)), + Self::parse_right_part, + ), + )(input) + .map(|(rest, (predicate, instructions))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + predicate, + instructions, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{directive::export::Export, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_export() { + let test = vec![( + "@export predicate :- csv { resource: \"test.csv\" }", + ("predicate".to_string(), "csv".to_string()), + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Export::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + ( + result.1.predicate().to_string(), + result.1.instructions().tag().unwrap().to_string() + ) + ); + } + } +} diff --git a/nemo/src/parser/ast/directive/import.rs b/nemo/src/parser/ast/directive/import.rs new file mode 100644 index 000000000..dac2ffe0a --- /dev/null +++ b/nemo/src/parser/ast/directive/import.rs @@ -0,0 +1,135 @@ +//! This module defines the [Import] directive. + +use nom::sequence::{preceded, separated_pair, tuple}; + +use crate::parser::{ + ast::{ + comment::wsoc::WSoC, expression::complex::map::Map, tag::structure::StructureTag, + token::Token, ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Import directive +#[derive(Debug)] +pub struct Import<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Predicate that is being Imported + predicate: StructureTag<'a>, + /// Map of instructions + instructions: Map<'a>, +} + +impl<'a> Import<'a> { + /// Return the predicate. + pub fn predicate(&self) -> &StructureTag<'a> { + &self.predicate + } + + /// Return the instructions. + pub fn instructions(&self) -> &Map<'a> { + &self.instructions + } + + /// Parse the left part of the import directive. + fn parse_left_part(input: ParserInput<'a>) -> ParserResult<'a, StructureTag<'a>> { + preceded( + tuple(( + Token::at, + Token::directive_import, + WSoC::parse_whitespace_comment, + WSoC::parse, + )), + StructureTag::parse, + )(input) + } + + /// Parse the right part of the import directive. + fn parse_right_part(input: ParserInput<'a>) -> ParserResult<'a, Map<'a>> { + Map::parse(input) + } +} + +const CONTEXT: ParserContext = ParserContext::Import; + +impl<'a> ProgramAST<'a> for Import<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![&self.predicate, &self.instructions] + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + separated_pair( + Self::parse_left_part, + tuple((WSoC::parse, Token::arrow, WSoC::parse)), + Self::parse_right_part, + ), + )(input) + .map(|(rest, (predicate, instructions))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + predicate, + instructions, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{directive::import::Import, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_import() { + let test = vec![( + "@import predicate :- csv { resource: \"test.csv\" }", + ("predicate".to_string(), "csv".to_string()), + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Import::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + ( + result.1.predicate().to_string(), + result.1.instructions().tag().unwrap().to_string() + ) + ); + } + } +} diff --git a/nemo/src/parser/ast/directive/output.rs b/nemo/src/parser/ast/directive/output.rs new file mode 100644 index 000000000..3e01d4f24 --- /dev/null +++ b/nemo/src/parser/ast/directive/output.rs @@ -0,0 +1,101 @@ +//! This module defines the [Output] directive. + +use nom::sequence::{preceded, tuple}; + +use crate::parser::{ + ast::{comment::wsoc::WSoC, tag::structure::StructureTag, token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Output directive +#[derive(Debug)] +pub struct Output<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// The predicate + predicate: StructureTag<'a>, +} + +impl<'a> Output<'a> { + /// Return the output predicate. + pub fn predicate(&self) -> &StructureTag<'a> { + &self.predicate + } +} + +const CONTEXT: ParserContext = ParserContext::Output; + +impl<'a> ProgramAST<'a> for Output<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![&self.predicate] + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + preceded( + tuple(( + Token::at, + Token::directive_output, + WSoC::parse_whitespace_comment, + WSoC::parse, + )), + StructureTag::parse, + ), + )(input) + .map(|(rest, predicate)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + predicate, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{directive::output::Output, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_attribute() { + let test = vec![("@output test", "test".to_string())]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Output::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.predicate().to_string()); + } + } +} diff --git a/nemo/src/parser/ast/directive/prefix.rs b/nemo/src/parser/ast/directive/prefix.rs new file mode 100644 index 000000000..f4ccf2e26 --- /dev/null +++ b/nemo/src/parser/ast/directive/prefix.rs @@ -0,0 +1,119 @@ +//! This module defines the [Prefix] directive. + +use nom::sequence::{preceded, separated_pair, tuple}; + +use crate::parser::{ + ast::{comment::wsoc::WSoC, expression::basic::iri::Iri, token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Prefix directive +#[derive(Debug)] +pub struct Prefix<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// The prefix + prefix: Token<'a>, + /// Its value + value: Iri<'a>, +} + +impl<'a> Prefix<'a> { + /// Return the defined prefix. + pub fn prefix(&self) -> String { + self.prefix.to_string() + } + + /// Return the value of the prefix. + pub fn value(&self) -> &Iri<'a> { + &self.value + } +} + +const CONTEXT: ParserContext = ParserContext::Prefix; + +impl<'a> ProgramAST<'a> for Prefix<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + self.value.children() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + preceded( + tuple(( + Token::at, + Token::directive_prefix, + WSoC::parse_whitespace_comment, + WSoC::parse, + )), + separated_pair( + Token::name, + tuple((WSoC::parse, Token::colon, WSoC::parse)), + Iri::parse, + ), + ), + )(input) + .map(|(rest, (prefix, value))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + prefix, + value, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{directive::prefix::Prefix, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_prefix() { + let test = vec![( + "@prefix owl: ", + ( + "owl".to_string(), + "http://www.w3.org/2002/07/owl#".to_string(), + ), + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Prefix::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, (result.1.prefix(), result.1.value().content())); + } + } +} diff --git a/nemo/src/parser/ast/directive/unknown.rs b/nemo/src/parser/ast/directive/unknown.rs new file mode 100644 index 000000000..d3a7d1991 --- /dev/null +++ b/nemo/src/parser/ast/directive/unknown.rs @@ -0,0 +1,112 @@ +//! This module defines the [UnknownDirective] directive. + +use nom::{ + bytes::complete::is_not, + combinator::recognize, + sequence::{pair, preceded, separated_pair}, +}; + +use crate::parser::{ + ast::{comment::wsoc::WSoC, token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Unknown directive specified by a user +#[derive(Debug)] +pub struct UnknownDirective<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Name of the directive + name: Token<'a>, + /// Content + content: ProgramSpan<'a>, +} + +impl<'a> UnknownDirective<'a> { + /// Return the name of the directive. + pub fn name(&self) -> String { + self.name.to_string() + } + + /// Return the content of the directive. + pub fn content(&self) -> String { + self.content.0.to_string() + } +} + +const CONTEXT: ParserContext = ParserContext::UnknownDirective; + +impl<'a> ProgramAST<'a> for UnknownDirective<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + separated_pair( + preceded(Token::at, Token::name), + pair(WSoC::parse_whitespace_comment, WSoC::parse), + recognize(is_not(".")), + ), + )(input) + .map(|(rest, (name, content))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + name, + content: content.span, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{directive::unknown::UnknownDirective, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_unknown_directive() { + let test = vec![( + "@test something", + ("test".to_string(), "something".to_string()), + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(UnknownDirective::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, (result.1.name(), result.1.content())); + } + } +} diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index f82c98352..78821bc59 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -2,13 +2,15 @@ pub mod basic; pub mod complex; -pub mod sequence; use basic::{ blank::Blank, boolean::Boolean, constant::Constant, number::Number, rdf_literal::RdfLiteral, string::StringLiteral, variable::Variable, }; -use complex::{atom::Atom, tuple::Tuple}; +use complex::{ + aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, infix::InfixExpression, map::Map, + negation::Negation, operation::Operation, tuple::Tuple, +}; use nom::{branch::alt, combinator::map}; use crate::parser::{ @@ -23,6 +25,10 @@ use super::ProgramAST; /// An expression that is the building block of rules. #[derive(Debug)] pub enum Expression<'a> { + /// Aggregation + Aggregation(Aggregation<'a>), + /// Arithmetic + Arithmetic(Arithmetic<'a>), /// Atom Atom(Atom<'a>), /// Blank @@ -31,8 +37,16 @@ pub enum Expression<'a> { Boolean(Boolean<'a>), /// Constant Constant(Constant<'a>), + /// Infix + Infix(InfixExpression<'a>), + /// Map + Map(Map<'a>), + /// Negation + Negation(Negation<'a>), /// Number Number(Number<'a>), + /// Operation + Operation(Operation<'a>), /// Rdf literal RdfLiteral(RdfLiteral<'a>), /// String @@ -47,43 +61,86 @@ impl<'a> Expression<'a> { /// Return the [ParserContext] of the underlying expression type. pub fn context_type(&self) -> ParserContext { match self { + Expression::Aggregation(expression) => expression.context(), + Expression::Arithmetic(expression) => expression.context(), Expression::Atom(expression) => expression.context(), Expression::Blank(expression) => expression.context(), Expression::Boolean(expression) => expression.context(), Expression::Constant(expression) => expression.context(), + Expression::Infix(expression) => expression.context(), + Expression::Map(expression) => expression.context(), Expression::Number(expression) => expression.context(), + Expression::Negation(expression) => expression.context(), + Expression::Operation(expression) => expression.context(), Expression::RdfLiteral(expression) => expression.context(), Expression::String(expression) => expression.context(), Expression::Tuple(expression) => expression.context(), Expression::Variable(expression) => expression.context(), } } + + /// Parse basic expressions. + pub fn parse_basic(input: ParserInput<'a>) -> ParserResult<'a, Self> { + alt(( + map(Blank::parse, Self::Blank), + map(Boolean::parse, Self::Boolean), + map(Constant::parse, Self::Constant), + map(Number::parse, Self::Number), + map(RdfLiteral::parse, Self::RdfLiteral), + map(StringLiteral::parse, Self::String), + map(Variable::parse, Self::Variable), + ))(input) + } + + /// Parse complex expressions, except arithmetic and infix. + pub fn parse_complex(input: ParserInput<'a>) -> ParserResult<'a, Self> { + alt(( + map(Tuple::parse, Self::Tuple), + map(Aggregation::parse, Self::Aggregation), + map(Operation::parse, Self::Operation), + map(Atom::parse, Self::Atom), + map(Map::parse, Self::Map), + map(Negation::parse, Self::Negation), + ))(input) + } } const CONTEXT: ParserContext = ParserContext::Expression; impl<'a> ProgramAST<'a> for Expression<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { - match self { - Expression::Atom(expression) => expression.children(), - Expression::Blank(expression) => expression.children(), - Expression::Boolean(expression) => expression.children(), - Expression::Constant(expression) => expression.children(), - Expression::Number(expression) => expression.children(), - Expression::RdfLiteral(expression) => expression.children(), - Expression::String(expression) => expression.children(), - Expression::Tuple(expression) => expression.children(), - Expression::Variable(expression) => expression.children(), - } + vec![match self { + Expression::Aggregation(expression) => expression, + Expression::Arithmetic(expression) => expression, + Expression::Atom(expression) => expression, + Expression::Blank(expression) => expression, + Expression::Boolean(expression) => expression, + Expression::Constant(expression) => expression, + Expression::Infix(expression) => expression, + Expression::Map(expression) => expression, + Expression::Number(expression) => expression, + Expression::Negation(expression) => expression, + Expression::Operation(expression) => expression, + Expression::RdfLiteral(expression) => expression, + Expression::String(expression) => expression, + Expression::Tuple(expression) => expression, + Expression::Variable(expression) => expression, + }] } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { match self { + Expression::Aggregation(expression) => expression.span(), + Expression::Arithmetic(expression) => expression.span(), Expression::Atom(expression) => expression.span(), Expression::Blank(expression) => expression.span(), Expression::Boolean(expression) => expression.span(), Expression::Constant(expression) => expression.span(), + Expression::Infix(expression) => expression.span(), + Expression::Map(expression) => expression.span(), Expression::Number(expression) => expression.span(), + Expression::Negation(expression) => expression.span(), + Expression::Operation(expression) => expression.span(), Expression::RdfLiteral(expression) => expression.span(), Expression::String(expression) => expression.span(), Expression::Tuple(expression) => expression.span(), @@ -98,15 +155,10 @@ impl<'a> ProgramAST<'a> for Expression<'a> { context( CONTEXT, alt(( - map(Atom::parse, Self::Atom), - map(Tuple::parse, Self::Tuple), - map(Blank::parse, Self::Blank), - map(Boolean::parse, Self::Boolean), - map(Constant::parse, Self::Constant), - map(Number::parse, Self::Number), - map(RdfLiteral::parse, Self::RdfLiteral), - map(StringLiteral::parse, Self::String), - map(Variable::parse, Self::Variable), + Self::parse_complex, + map(InfixExpression::parse, Self::Infix), + map(Arithmetic::parse, Self::Arithmetic), + Self::parse_basic, )), )(input) } @@ -122,41 +174,42 @@ mod test { use crate::parser::{ ast::{expression::Expression, ProgramAST}, + context::ParserContext, input::ParserInput, ParserState, }; #[test] - fn parse_expression_tuple() { - let test = vec!["(1,2)"]; - - for input in test { + fn parse_expression() { + let test = vec![ + ("#sum(1 + POW(?x, 2); ?y, ?z)", ParserContext::Aggregation), + ("(1 + 2)", ParserContext::Arithmetic), + ("test(?x, (1,), (1 + 2))", ParserContext::Atom), + ("_:12", ParserContext::Blank), + ("true", ParserContext::Boolean), + ("constant", ParserContext::Constant), + ("(1 + 2) = 2 + 1", ParserContext::Infix), + ("{a:1,b:POW(1, 2)}", ParserContext::Map), + ("12", ParserContext::Number), + ("~test(1)", ParserContext::Negation), + ("substr(\"string\", 1+?x)", ParserContext::Operation), + ( + "\"true\"^^", + ParserContext::RdfLiteral, + ), + ("\"string\"", ParserContext::String), + ("(1,)", ParserContext::Tuple), + ("?variable", ParserContext::Variable), + ]; + + for (input, expect) in test { let parser_input = ParserInput::new(input, ParserState::default()); let result = all_consuming(Expression::parse)(parser_input); - println!("{:?}", result); - - assert!(result.is_ok()); - - let result = result.unwrap(); - assert!(matches!(result.1, Expression::Tuple(_))); - } - } - - #[test] - fn parse_expression_atom() { - let test = vec!["abc(1,2)"]; - - for input in test { - let parser_input = ParserInput::new(input, ParserState::default()); - let result = all_consuming(Expression::parse)(parser_input); - - println!("{:?}", result); - assert!(result.is_ok()); let result = result.unwrap(); - assert!(matches!(result.1, Expression::Atom(_))); + assert_eq!(result.1.context_type(), expect); } } } diff --git a/nemo/src/parser/ast/expression/basic/blank.rs b/nemo/src/parser/ast/expression/basic/blank.rs index 4139b0b07..891a0117e 100644 --- a/nemo/src/parser/ast/expression/basic/blank.rs +++ b/nemo/src/parser/ast/expression/basic/blank.rs @@ -1,6 +1,6 @@ //! This module defines [Blank]. -use nom::{branch::alt, sequence::pair}; +use nom::{branch::alt, sequence::preceded}; use crate::parser::{ ast::{token::Token, ProgramAST}, @@ -39,7 +39,7 @@ impl<'a> ProgramAST<'a> for Blank<'a> { Vec::default() } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } @@ -49,19 +49,21 @@ impl<'a> ProgramAST<'a> for Blank<'a> { { let input_span = input.span; - context(CONTEXT, pair(Token::blank_node_prefix, Self::parse_name))(input).map( - |(rest, (_, name))| { - let rest_span = rest.span; - - ( - rest, - Blank { - span: input_span.until_rest(&rest_span), - name, - }, - ) - }, - ) + context( + CONTEXT, + preceded(Token::blank_node_prefix, Self::parse_name), + )(input) + .map(|(rest, name)| { + let rest_span = rest.span; + + ( + rest, + Blank { + span: input_span.until_rest(&rest_span), + name, + }, + ) + }) } fn context(&self) -> ParserContext { diff --git a/nemo/src/parser/ast/expression/basic/boolean.rs b/nemo/src/parser/ast/expression/basic/boolean.rs index 2caf3e9fb..5a247be2e 100644 --- a/nemo/src/parser/ast/expression/basic/boolean.rs +++ b/nemo/src/parser/ast/expression/basic/boolean.rs @@ -48,7 +48,8 @@ impl<'a> Boolean<'a> { alt((Token::boolean_true, Token::boolean_false))(input).map(|(rest, result)| { ( rest, - BooleanValue::token(result.kind()).expect("unexpected token"), + BooleanValue::token(result.kind()) + .expect(&format!("unexpected token: {:?}", result.kind())), ) }) } @@ -61,7 +62,7 @@ impl<'a> ProgramAST<'a> for Boolean<'a> { Vec::default() } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/constant.rs b/nemo/src/parser/ast/expression/basic/constant.rs index 29633230c..6cdb8d945 100644 --- a/nemo/src/parser/ast/expression/basic/constant.rs +++ b/nemo/src/parser/ast/expression/basic/constant.rs @@ -48,7 +48,7 @@ impl<'a> ProgramAST<'a> for Constant<'a> { Vec::default() } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/iri.rs b/nemo/src/parser/ast/expression/basic/iri.rs index dad5d68df..9f43d28ab 100644 --- a/nemo/src/parser/ast/expression/basic/iri.rs +++ b/nemo/src/parser/ast/expression/basic/iri.rs @@ -1,7 +1,7 @@ //! This module defines [Iri] #![allow(missing_docs)] -use nom::sequence::tuple; +use nom::sequence::delimited; use crate::parser::{ ast::{token::Token, ProgramAST}, @@ -35,7 +35,7 @@ impl<'a> ProgramAST<'a> for Iri<'a> { Vec::default() } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } @@ -47,9 +47,9 @@ impl<'a> ProgramAST<'a> for Iri<'a> { context( CONTEXT, - tuple((Token::open_chevrons, Token::iri, Token::closed_chevrons)), + delimited(Token::open_chevrons, Token::iri, Token::closed_chevrons), )(input) - .map(|(rest, (_, content, _))| { + .map(|(rest, content)| { let rest_span = rest.span; ( diff --git a/nemo/src/parser/ast/expression/basic/number.rs b/nemo/src/parser/ast/expression/basic/number.rs index 01e724f5b..fe668dc54 100644 --- a/nemo/src/parser/ast/expression/basic/number.rs +++ b/nemo/src/parser/ast/expression/basic/number.rs @@ -4,7 +4,7 @@ use enum_assoc::Assoc; use nom::{ branch::alt, combinator::opt, - sequence::{pair, tuple}, + sequence::{pair, preceded, tuple}, }; use crate::parser::{ @@ -55,15 +55,15 @@ pub struct Number<'a> { span: ProgramSpan<'a>, /// Sign of the integer part - integer_sign: NumberSign, + _integer_sign: NumberSign, /// The integer part of the number - integer: Token<'a>, + _integer: Token<'a>, /// The fractional part of the number - fractional: Option>, + _fractional: Option>, /// Sign and exponent of the number - exponent: Option<(NumberSign, Token<'a>)>, + _exponent: Option<(NumberSign, Token<'a>)>, /// Type - type_marker: Option, + _type_marker: Option, } impl<'a> Number<'a> { @@ -72,7 +72,8 @@ impl<'a> Number<'a> { alt((Token::plus, Token::minus))(input).map(|(rest, sign)| { ( rest, - NumberSign::token(&sign.kind()).expect("unknown token"), + NumberSign::token(&sign.kind()) + .expect(&format!("unexpected token: {:?}", sign.kind())), ) }) } @@ -84,17 +85,16 @@ impl<'a> Number<'a> { /// Parse the fractional part of the number. fn parse_fractional(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { - pair(Token::dot, Token::digits)(input).map(|(rest, (_, result))| (rest, result)) + preceded(Token::dot, Token::digits)(input) } /// Parse the exponent part of the number. fn parse_exponent(input: ParserInput<'a>) -> ParserResult<'a, (NumberSign, Token<'a>)> { - tuple(( + preceded( alt((Token::exponent_lower, Token::exponent_upper)), - opt(Self::parse_sign), - Self::parse_integer, - ))(input) - .map(|(rest, (_, sign, integer))| (rest, (sign.unwrap_or_default(), integer))) + pair(opt(Self::parse_sign), Self::parse_integer), + )(input) + .map(|(rest, (sign, integer))| (rest, (sign.unwrap_or_default(), integer))) } /// Parse the type marker of the number. @@ -115,7 +115,7 @@ impl<'a> ProgramAST<'a> for Number<'a> { Vec::default() } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } @@ -143,11 +143,11 @@ impl<'a> ProgramAST<'a> for Number<'a> { rest, Number { span: input_span.until_rest(&rest_span), - integer_sign: integer_sign.unwrap_or_default(), - integer, - fractional, - exponent, - type_marker, + _integer_sign: integer_sign.unwrap_or_default(), + _integer: integer, + _fractional: fractional, + _exponent: exponent, + _type_marker: type_marker, }, ) }, diff --git a/nemo/src/parser/ast/expression/basic/rdf_literal.rs b/nemo/src/parser/ast/expression/basic/rdf_literal.rs index 15e7a4242..a5adcb1ea 100644 --- a/nemo/src/parser/ast/expression/basic/rdf_literal.rs +++ b/nemo/src/parser/ast/expression/basic/rdf_literal.rs @@ -1,7 +1,7 @@ //! This module defines [RdfLiteral] #![allow(missing_docs)] -use nom::sequence::tuple; +use nom::sequence::{separated_pair, tuple}; use crate::parser::{ ast::{token::Token, ProgramAST}, @@ -50,7 +50,7 @@ impl<'a> ProgramAST<'a> for RdfLiteral<'a> { Vec::default() } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } @@ -62,9 +62,9 @@ impl<'a> ProgramAST<'a> for RdfLiteral<'a> { context( CONTEXT, - tuple((Self::parse_content, Token::double_caret, Iri::parse)), + separated_pair(Self::parse_content, Token::double_caret, Iri::parse), )(input) - .map(|(rest, (content, _, tag))| { + .map(|(rest, (content, tag))| { let rest_span = rest.span; ( diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs index 74c62b1ee..93615b112 100644 --- a/nemo/src/parser/ast/expression/basic/string.rs +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -3,7 +3,7 @@ use nom::{ combinator::opt, - sequence::{pair, tuple}, + sequence::{delimited, pair}, }; use crate::parser::{ @@ -39,8 +39,7 @@ impl<'a> StringLiteral<'a> { /// Parse the main part of the string. pub fn parse_string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { - tuple((Token::quote, Token::string, Token::quote))(input) - .map(|(rest, (_, content, _))| (rest, content)) + delimited(Token::quote, Token::string, Token::quote)(input) } /// Parse the language tag of the string. @@ -56,7 +55,7 @@ impl<'a> ProgramAST<'a> for StringLiteral<'a> { Vec::default() } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/variable.rs b/nemo/src/parser/ast/expression/basic/variable.rs index 43354a5d4..aea97e761 100644 --- a/nemo/src/parser/ast/expression/basic/variable.rs +++ b/nemo/src/parser/ast/expression/basic/variable.rs @@ -82,7 +82,7 @@ impl<'a> ProgramAST<'a> for Variable<'a> { Vec::default() } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex.rs b/nemo/src/parser/ast/expression/complex.rs index 83969c72f..9ea488770 100644 --- a/nemo/src/parser/ast/expression/complex.rs +++ b/nemo/src/parser/ast/expression/complex.rs @@ -1,4 +1,10 @@ //! This module defines complex expressions like tuples or maps. +pub mod aggregation; +pub mod arithmetic; pub mod atom; +pub mod infix; +pub mod map; +pub mod negation; +pub mod operation; pub mod tuple; diff --git a/nemo/src/parser/ast/expression/complex/aggregation.rs b/nemo/src/parser/ast/expression/complex/aggregation.rs new file mode 100644 index 000000000..60ac63d56 --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/aggregation.rs @@ -0,0 +1,151 @@ +//! This module defines [Aggregation]. + +use nom::{ + combinator::opt, + sequence::{delimited, pair, preceded, tuple}, +}; + +use crate::{ + parser::{ + ast::{ + comment::wsoc::WSoC, expression::Expression, + sequence::simple::ExpressionSequenceSimple, tag::aggregation::AggregationTag, + token::Token, ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, + }, + rule_model::components::term::aggregate::AggregateKind, +}; + +/// A known Aggregation applied to a series of [Expression]s. +/// +/// This has the same structure as an [Atom]. +#[derive(Debug)] +pub struct Aggregation<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Type of Aggregation, + tag: AggregationTag<'a>, + /// Aggregate expression + aggregate: Box>, + /// List of distinct variables + distinct: Option>, +} + +impl<'a> Aggregation<'a> { + /// Return the aggregate expression + pub fn aggregate(&self) -> &Expression<'a> { + &self.aggregate + } + + /// Return the expressions specifying the distinct variables + pub fn distinct(&self) -> impl Iterator> { + self.distinct.iter().flat_map(|distinct| distinct.iter()) + } + + /// Return the type of this Aggregation. + pub fn kind(&self) -> Option { + self.tag.operation() + } +} + +const CONTEXT: ParserContext = ParserContext::Aggregation; + +impl<'a> ProgramAST<'a> for Aggregation<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + let mut result: Vec<&dyn ProgramAST> = vec![]; + result.push(&self.tag); + result.push(&*self.aggregate); + + for expression in self.distinct() { + result.push(expression) + } + + result + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + pair( + preceded(Token::hash, AggregationTag::parse), + delimited( + pair(Token::open_parenthesis, WSoC::parse), + pair( + Expression::parse, + opt(preceded( + tuple((WSoC::parse, Token::semicolon, WSoC::parse)), + ExpressionSequenceSimple::parse, + )), + ), + pair(WSoC::parse, Token::closed_parenthesis), + ), + ), + )(input) + .map(|(rest, (tag, (aggregate, distinct)))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + tag, + aggregate: Box::new(aggregate), + distinct, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::{ + parser::{ + ast::{expression::complex::aggregation::Aggregation, ProgramAST}, + input::ParserInput, + ParserState, + }, + rule_model::components::term::aggregate::AggregateKind, + }; + + #[test] + fn parse_aggregation() { + let test = vec![ + ("#sum(?x)", (AggregateKind::SumOfNumbers, 0)), + ("#max(?x; ?y, ?z)", (AggregateKind::MaxNumber, 2)), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Aggregation::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + (result.1.kind().unwrap(), result.1.distinct().count()) + ); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex/arithmetic.rs b/nemo/src/parser/ast/expression/complex/arithmetic.rs new file mode 100644 index 000000000..7702801e9 --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/arithmetic.rs @@ -0,0 +1,364 @@ +//! This module defines [Arithmetic]. +#![allow(missing_docs)] + +use enum_assoc::Assoc; +use nom::{ + branch::alt, + combinator::map, + multi::{many0, many1}, + sequence::{delimited, pair, preceded, separated_pair, tuple}, +}; +use nom_supreme::error::{BaseErrorKind, Expectation}; + +use crate::parser::{ + ast::{ + comment::wsoc::WSoC, + expression::{ + basic::{number::Number, variable::Variable}, + Expression, + }, + token::{Token, TokenKind}, + ProgramAST, + }, + context::{context, ParserContext}, + error::ParserErrorTree, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::operation::Operation; + +/// Types of arithmetic operations +#[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] +#[func(pub fn token(token: TokenKind) -> Option)] +pub enum ArithmeticOperation { + /// Addition + #[assoc(token = TokenKind::Plus)] + Addition, + /// Subtraction + #[assoc(token = TokenKind::Minus)] + Subtraction, + /// Multiplication + #[assoc(token = TokenKind::Star)] + Multiplication, + /// Division + #[assoc(token = TokenKind::Division)] + Division, +} + +impl ArithmeticOperation { + /// Parse additive operation. + pub fn parse_additive<'a>(input: ParserInput<'a>) -> ParserResult<'a, Self> { + alt((Token::plus, Token::minus))(input).map(|(rest, result)| { + ( + rest, + ArithmeticOperation::token(result.kind()) + .expect(&format!("unexpected token: {:?}", result.kind())), + ) + }) + } + + /// Parse multiplicative operation. + pub fn parse_multiplicative<'a>(input: ParserInput<'a>) -> ParserResult<'a, Self> { + alt((Token::star, Token::division))(input).map(|(rest, result)| { + ( + rest, + ArithmeticOperation::token(result.kind()) + .expect(&format!("unexpected token: {:?}", result.kind())), + ) + }) + } +} + +/// Arithmetic expression on numbers +#[derive(Debug)] +pub struct Arithmetic<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Type of arithmetic operation + kind: ArithmeticOperation, + /// Left input + left: Box>, + /// Right input + right: Box>, +} + +impl<'a> Arithmetic<'a> { + /// Return the kind of arithmetic operation. + pub fn kind(&self) -> ArithmeticOperation { + self.kind + } + + /// Return the left part of the operation. + pub fn left(&self) -> &Expression<'a> { + &self.left + } + + /// Return the right part of the operation. + pub fn right(&self) -> &Expression<'a> { + &self.right + } +} + +#[derive(Debug)] +struct ArithmeticChain<'a> { + initial: Expression<'a>, + sequence: Vec<(ArithmeticOperation, Expression<'a>)>, +} + +impl<'a> ArithmeticChain<'a> { + fn fold(mut self, input_span: &ProgramSpan<'a>) -> Expression<'a> { + if self.sequence.is_empty() { + self.initial + } else { + let sequence_rest = self.sequence.split_off(1); + let sequence_first = self.sequence.remove(0); + + let start = Arithmetic { + span: input_span.enclose(&self.initial.span(), &sequence_first.1.span()), + kind: sequence_first.0, + left: Box::new(self.initial), + right: Box::new(sequence_first.1), + }; + + Expression::Arithmetic(sequence_rest.into_iter().fold( + start, + |acc, (kind, expression)| Arithmetic { + span: input_span.enclose(&acc.span, &expression.span()), + kind, + left: Box::new(Expression::Arithmetic(acc)), + right: Box::new(expression), + }, + )) + } + } +} + +impl<'a> Arithmetic<'a> { + fn parse_non_arithmetic(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + alt((Expression::parse_complex, Expression::parse_basic))(input) + } + + /// Parse an expression enclosed in parenthesis. + fn parse_parenthesized_expression(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + delimited( + pair(Token::open_parenthesis, WSoC::parse), + Expression::parse, + pair(WSoC::parse, Token::closed_parenthesis), + )(input) + } + + /// Parse an arithmetic expression enclosed in parenthesis. + fn parse_parenthesized_arithmetic(input: ParserInput<'a>) -> ParserResult<'a, Self> { + delimited( + pair(Token::open_parenthesis, WSoC::parse), + Self::parse, + pair(WSoC::parse, Token::closed_parenthesis), + )(input) + } + + /// Parse factor. + fn parse_factor(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + alt(( + Self::parse_non_arithmetic, + Self::parse_parenthesized_expression, + ))(input) + + // let input_span = input.span; + + // alt(( + // map( + // tuple(( + // Self::parse_non_arithmetic, + // delimited( + // WSoC::parse, + // ArithmeticOperation::parse_multiplicative, + // WSoC::parse, + // ), + // Self::parse_non_arithmetic, + // )), + // |(left, kind, right)| (Box::new(left), kind, Box::new(right)), + // ), + // map(Self::parse_parenthesized, |arithmetic| { + // (arithmetic.left, arithmetic.kind, arithmetic.right) + // }), + // ))(input) + // .map(|(rest, (left, kind, right))| { + // let rest_span = rest.span; + + // ( + // rest, + // Self { + // span: input_span.until_rest(&rest_span), + // kind, + // left, + // right, + // }, + // ) + // }) + } + + /// + fn parse_product(input: ParserInput<'a>) -> ParserResult<'a, ArithmeticChain<'a>> { + let input_span = input.span; + + pair( + Self::parse_factor, + many0(preceded( + WSoC::parse, + separated_pair( + ArithmeticOperation::parse_multiplicative, + WSoC::parse, + Self::parse_factor, + ), + )), + )(input) + .map(|(rest, (initial, sequence))| (rest, ArithmeticChain { initial, sequence })) + } + + fn parse_sum(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + let input_span = input.span; + + pair( + Self::parse_product, + many0(preceded( + WSoC::parse, + separated_pair( + ArithmeticOperation::parse_additive, + WSoC::parse, + Self::parse_product, + ), + )), + )(input) + .map(|(rest, (initial, sequence))| { + ( + rest, + ArithmeticChain { + initial: initial.fold(&input_span), + sequence: sequence + .into_iter() + .map(|(operation, chain)| (operation, chain.fold(&input_span))) + .collect(), + } + .fold(&input_span), + ) + }) + } +} + +const CONTEXT: ParserContext = ParserContext::Arithmetic; + +impl<'a> ProgramAST<'a> for Arithmetic<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + todo!() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + // let input_span = input.span; + + // context( + // CONTEXT, + // pair( + // StructureTag::parse, + // delimited( + // pair(Token::open_parenthesis, WSoC::parse), + // ExpressionSequenceSimple::parse, + // pair(WSoC::parse, Token::closed_parenthesis), + // ), + // ), + // )(input) + // .map(|(rest, (tag, expressions))| { + // let rest_span = rest.span; + + // ( + // rest, + // Self { + // span: input_span.until_rest(&rest_span), + // tag, + // expressions, + // }, + // ) + // }) + + let arithmetic_parser = |input: ParserInput<'a>| { + if let Ok((rest, expression)) = Self::parse_sum(input.clone()) { + if let Expression::Arithmetic(result) = expression { + return Ok((rest, result)); + } + } + + Err(nom::Err::Error(ParserErrorTree::Base { + location: input, + kind: BaseErrorKind::Expected(Expectation::Tag("arithmetic expression")), + })) + }; + + context(CONTEXT, arithmetic_parser)(input) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{ + expression::{complex::arithmetic::Arithmetic, Expression}, + ProgramAST, + }, + input::ParserInput, + ParserState, + }; + + /// Count the number of expressions contained in an arithmetic expression + fn count_expression<'a>(expression: &Expression<'a>) -> usize { + if let Expression::Arithmetic(arithmetic) = expression { + count_expression(arithmetic.left()) + count_expression(arithmetic.right()) + } else { + 1 + } + } + + #[test] + fn parse_arithmetic() { + let test = vec![ + ("1 * 2", 2), + ("1 * 2 * ?y", 3), + ("(1 * 2)", 2), + ("1 * (2 / ?y)", 3), + ("(1 / 2) * ?y", 3), + ("1 + 2", 2), + ("1 + 2 + ?x", 3), + ("1 + 2 * (3 * ?y)", 4), + ("1 + (2 * 3) * ?y + 4", 5), + ("(1 + (2 * ((3 * ?y))))", 4), + // ("1 + 2 * POW(3, 4)", 3), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Arithmetic::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + count_expression(&Expression::Arithmetic(result.1)) + ); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex/atom.rs b/nemo/src/parser/ast/expression/complex/atom.rs index 84a63ff95..708850da4 100644 --- a/nemo/src/parser/ast/expression/complex/atom.rs +++ b/nemo/src/parser/ast/expression/complex/atom.rs @@ -1,19 +1,16 @@ //! This module defines [Atom]. -use nom::{ - combinator::opt, - sequence::{delimited, pair}, -}; +use nom::sequence::{delimited, pair}; use crate::parser::{ ast::{ - expression::{sequence::simple::ExpressionSequenceSimple, Expression}, - tag::Tag, - token::Token, - ProgramAST, + comment::wsoc::WSoC, expression::Expression, sequence::simple::ExpressionSequenceSimple, + tag::structure::StructureTag, token::Token, ProgramAST, }, context::{context, ParserContext}, + input::ParserInput, span::ProgramSpan, + ParserResult, }; /// A possibly tagged sequence of [Expression]s. @@ -23,7 +20,7 @@ pub struct Atom<'a> { span: ProgramSpan<'a>, /// Tag of this Atom - tag: Tag<'a>, + tag: StructureTag<'a>, /// List of underlying expressions expressions: ExpressionSequenceSimple<'a>, } @@ -35,7 +32,7 @@ impl<'a> Atom<'a> { } /// Return the tag of this atom. - pub fn tag(&self) -> &Tag<'a> { + pub fn tag(&self) -> &StructureTag<'a> { &self.tag } } @@ -44,7 +41,9 @@ const CONTEXT: ParserContext = ParserContext::Atom; impl<'a> ProgramAST<'a> for Atom<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { - let mut result: Vec<&dyn ProgramAST> = vec![]; + let mut result = Vec::<&dyn ProgramAST>::new(); + result.push(&self.tag); + for expression in &self.expressions { result.push(expression) } @@ -52,11 +51,11 @@ impl<'a> ProgramAST<'a> for Atom<'a> { result } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } - fn parse(input: crate::parser::input::ParserInput<'a>) -> crate::parser::ParserResult<'a, Self> + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> where Self: Sized + 'a, { @@ -65,11 +64,11 @@ impl<'a> ProgramAST<'a> for Atom<'a> { context( CONTEXT, pair( - Tag::parse, + StructureTag::parse, delimited( - pair(Token::open_parenthesis, opt(Token::whitespace)), + pair(Token::open_parenthesis, WSoC::parse), ExpressionSequenceSimple::parse, - pair(opt(Token::whitespace), Token::closed_parenthesis), + pair(WSoC::parse, Token::closed_parenthesis), ), ), )(input) diff --git a/nemo/src/parser/ast/expression/complex/infix.rs b/nemo/src/parser/ast/expression/complex/infix.rs new file mode 100644 index 000000000..98911eeb3 --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/infix.rs @@ -0,0 +1,190 @@ +//! This module defines [InfixExpression]. +#![allow(missing_docs)] + +use enum_assoc::Assoc; +use nom::{ + branch::alt, + combinator::map, + sequence::{delimited, tuple}, +}; + +use crate::parser::{ + ast::{ + comment::wsoc::WSoC, + expression::Expression, + token::{Token, TokenKind}, + ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::{ + aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, map::Map, negation::Negation, + operation::Operation, tuple::Tuple, +}; + +/// Types of infix expression connectives +#[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] +#[func(pub fn token(token: TokenKind) -> Option)] +pub enum InfixExpressionKind { + /// Equality + #[assoc(token = TokenKind::Equal)] + Equality, + /// Inequality + #[assoc(token = TokenKind::Unequal)] + Inequality, + /// Greater than or equal + #[assoc(token = TokenKind::GreaterEqual)] + GreaterEqual, + /// Grater than + #[assoc(token = TokenKind::Greater)] + Greater, + /// Less than or equal + #[assoc(token = TokenKind::LessEqual)] + LessEqual, + /// Less than + #[assoc(token = TokenKind::Less)] + Less, +} + +/// Expressions connected by an infix operation +#[derive(Debug)] +pub struct InfixExpression<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Kind of infix expression + kind: InfixExpressionKind, + /// Left part of the expression + left: Box>, + /// Right part of the expression + right: Box>, +} + +impl<'a> InfixExpression<'a> { + /// Return the pair of [Expression]s. + pub fn pair(&self) -> (&Expression<'a>, &Expression<'a>) { + (&self.left, &self.right) + } + + /// Return the [InfixExpressionKind] of this expression. + pub fn kind(&self) -> InfixExpressionKind { + self.kind + } + + /// Parse a [InfixExpressionKind]. + fn parse_infix_kind(input: ParserInput<'a>) -> ParserResult<'a, InfixExpressionKind> { + alt(( + Token::equal, + Token::unequal, + Token::greater_equal, + Token::greater, + Token::less_equal, + Token::less, + ))(input) + .map(|(rest, result)| { + ( + rest, + InfixExpressionKind::token(result.kind()) + .expect(&format!("unexpected token: {:?}", result.kind())), + ) + }) + } + + /// Parse non-infix [Expression]s + pub fn parse_non_infix(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + alt(( + map(Arithmetic::parse, Expression::Arithmetic), + map(Aggregation::parse, Expression::Aggregation), + map(Operation::parse, Expression::Operation), + map(Atom::parse, Expression::Atom), + map(Tuple::parse, Expression::Tuple), + map(Map::parse, Expression::Map), + map(Negation::parse, Expression::Negation), + Expression::parse_basic, + ))(input) + } +} + +const CONTEXT: ParserContext = ParserContext::Infix; + +impl<'a> ProgramAST<'a> for InfixExpression<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![&*self.left, &*self.right] + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + tuple(( + Self::parse_non_infix, + delimited(WSoC::parse, Self::parse_infix_kind, WSoC::parse), + Self::parse_non_infix, + )), + )(input) + .map(|(rest, (left, kind, right))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + kind, + left: Box::new(left), + right: Box::new(right), + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::complex::infix::InfixExpression, ProgramAST}, + input::ParserInput, + ParserState, + }; + + use super::InfixExpressionKind; + + #[test] + fn parse_infix() { + let test = vec![ + ("?x=7", InfixExpressionKind::Equality), + ("?x != ?y", InfixExpressionKind::Inequality), + ("?x < ?y", InfixExpressionKind::Less), + ("?x <= ?y", InfixExpressionKind::LessEqual), + ("?x > ?y", InfixExpressionKind::Greater), + ("?x >= ?y", InfixExpressionKind::GreaterEqual), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(InfixExpression::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.kind()); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex/map.rs b/nemo/src/parser/ast/expression/complex/map.rs new file mode 100644 index 000000000..cdb403fbb --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/map.rs @@ -0,0 +1,143 @@ +//! This module defines [Map]. + +use nom::{ + combinator::opt, + sequence::{delimited, pair, terminated}, +}; + +use crate::parser::{ + ast::{ + comment::wsoc::WSoC, expression::Expression, sequence::key_value::KeyValueSequence, + tag::structure::StructureTag, token::Token, ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// A possibly tagged sequence of [Expression]s. +#[derive(Debug)] +pub struct Map<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Tag of this map, if it exists + tag: Option>, + /// List of key-value pairs + key_value: KeyValueSequence<'a>, +} + +impl<'a> Map<'a> { + /// Return an iterator over the underlying [Expression]s. + pub fn key_value(&self) -> impl Iterator, Expression<'a>)> { + self.key_value.iter() + } + + /// Return the tag of this Map. + pub fn tag(&self) -> Option<&StructureTag<'a>> { + self.tag.as_ref() + } +} + +const CONTEXT: ParserContext = ParserContext::Map; + +impl<'a> ProgramAST<'a> for Map<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + let mut result: Vec<&dyn ProgramAST> = vec![]; + + if let Some(tag) = &self.tag { + result.push(tag) + } + + for (key, value) in &self.key_value { + result.push(key); + result.push(value); + } + + result + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + pair( + opt(terminated(StructureTag::parse, opt(WSoC::parse))), + delimited( + pair(Token::open_brace, WSoC::parse), + KeyValueSequence::parse, + pair(WSoC::parse, Token::closed_brace), + ), + ), + )(input) + .map(|(rest, (tag, key_value))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + tag, + key_value, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::complex::map::Map, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_map() { + let test = vec![ + ("{?x: 7}", (None, 1)), + ("abc { ?x: 7 }", (Some("abc".to_string()), 1)), + ( + "abc { ?x: 7, ?y: 12, ?z: 13 }", + (Some("abc".to_string()), 3), + ), + ( + "abc { ?x : 7 , ?y : 13 , ?z : 15 }", + (Some("abc".to_string()), 3), + ), + ("{a:1, b: POW(1, 2)}", (None, 2)), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Map::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + ( + result.1.tag().as_ref().map(|tag| tag.to_string()), + result.1.key_value().count() + ) + ); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex/negation.rs b/nemo/src/parser/ast/expression/complex/negation.rs new file mode 100644 index 000000000..fee98cdaa --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/negation.rs @@ -0,0 +1,88 @@ +//! This module defines [Negation]. + +use nom::sequence::preceded; + +use crate::parser::{ + ast::{expression::Expression, token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// A possibly tagged sequence of [Expression]s. +#[derive(Debug)] +pub struct Negation<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// The negated expression + expression: Box>, +} + +impl<'a> Negation<'a> { + /// Return the negated [Expression]. + pub fn expression(&self) -> &Expression<'a> { + &self.expression + } +} + +const CONTEXT: ParserContext = ParserContext::Negation; + +impl<'a> ProgramAST<'a> for Negation<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![&*self.expression] + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context(CONTEXT, preceded(Token::tilde, Expression::parse))(input).map( + |(rest, expression)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + expression: Box::new(expression), + }, + ) + }, + ) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::complex::negation::Negation, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_negation() { + let test = vec!["~a(?x)", "~abc(?x, ?y)"]; + + for input in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Negation::parse)(parser_input); + + assert!(result.is_ok()); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex/operation.rs b/nemo/src/parser/ast/expression/complex/operation.rs new file mode 100644 index 000000000..1099e08b6 --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/operation.rs @@ -0,0 +1,140 @@ +//! This module defines [Operation]. + +use nom::sequence::{delimited, pair}; + +use crate::{ + parser::{ + ast::{ + comment::wsoc::WSoC, expression::Expression, + sequence::simple::ExpressionSequenceSimple, tag::operation::OperationTag, token::Token, + ProgramAST, + }, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, + }, + rule_model::components::term::operation::operation_kind::OperationKind, +}; + +/// A known operation applied to a series of [Expression]s. +/// +/// This has the same structure as an [Operation]. +#[derive(Debug)] +pub struct Operation<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Type of operation + tag: OperationTag<'a>, + /// List of underlying expressions + expressions: ExpressionSequenceSimple<'a>, +} + +impl<'a> Operation<'a> { + /// Return an iterator over the underlying [Expression]s. + pub fn expressions(&self) -> impl Iterator> { + self.expressions.iter() + } + + /// Return the type of this operation. + pub fn kind(&self) -> OperationKind { + self.tag.operation() + } +} + +const CONTEXT: ParserContext = ParserContext::Operation; + +impl<'a> ProgramAST<'a> for Operation<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + let mut result = Vec::<&dyn ProgramAST>::new(); + result.push(&self.tag); + + for expression in &self.expressions { + result.push(expression) + } + + result + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + pair( + OperationTag::parse, + delimited( + pair(Token::open_parenthesis, WSoC::parse), + ExpressionSequenceSimple::parse, + pair(WSoC::parse, Token::closed_parenthesis), + ), + ), + )(input) + .map(|(rest, (tag, expressions))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + tag, + expressions, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + use strum::IntoEnumIterator; + + use crate::{ + parser::{ + ast::{expression::complex::operation::Operation, ProgramAST}, + input::ParserInput, + ParserState, + }, + rule_model::components::term::operation::operation_kind::OperationKind, + }; + + #[test] + fn parse_operation() { + println!( + "{:?}", + OperationKind::iter() + .map(|kind| kind.name()) + .collect::>() + ); + + let test = vec![ + ("SUM(1)", OperationKind::NumericSum), + ("strlen(1)", OperationKind::StringLength), + ("StrRev( 1 )", OperationKind::StringReverse), + ("IsNumeric( 1 , 2 )", OperationKind::CheckIsNumeric), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Operation::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.kind()); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex/tuple.rs b/nemo/src/parser/ast/expression/complex/tuple.rs index e86396c50..4ae1d433e 100644 --- a/nemo/src/parser/ast/expression/complex/tuple.rs +++ b/nemo/src/parser/ast/expression/complex/tuple.rs @@ -7,12 +7,13 @@ use nom::{ use crate::parser::{ ast::{ - expression::{sequence::one::ExpressionSequenceOne, Expression}, - token::Token, - ProgramAST, + comment::wsoc::WSoC, expression::Expression, sequence::one::ExpressionSequenceOne, + token::Token, ProgramAST, }, context::{context, ParserContext}, + input::ParserInput, span::ProgramSpan, + ParserResult, }; /// A sequence of [Expression]s. @@ -36,7 +37,8 @@ const CONTEXT: ParserContext = ParserContext::Tuple; impl<'a> ProgramAST<'a> for Tuple<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { - let mut result: Vec<&dyn ProgramAST> = vec![]; + let mut result = Vec::<&dyn ProgramAST>::new(); + for expression in &self.expressions { result.push(expression) } @@ -44,11 +46,11 @@ impl<'a> ProgramAST<'a> for Tuple<'a> { result } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } - fn parse(input: crate::parser::input::ParserInput<'a>) -> crate::parser::ParserResult<'a, Self> + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> where Self: Sized + 'a, { @@ -57,16 +59,12 @@ impl<'a> ProgramAST<'a> for Tuple<'a> { context( CONTEXT, delimited( - pair(Token::open_parenthesis, opt(Token::whitespace)), + pair(Token::open_parenthesis, WSoC::parse), terminated( ExpressionSequenceOne::parse, - opt(tuple(( - opt(Token::whitespace), - Token::comma, - opt(Token::whitespace), - ))), + opt(tuple((WSoC::parse, Token::comma, WSoC::parse))), ), - pair(opt(Token::whitespace), Token::closed_parenthesis), + pair(WSoC::parse, Token::closed_parenthesis), ), )(input) .map(|(rest, expressions)| { diff --git a/nemo/src/parser/ast/expression/sequence/key_value.rs b/nemo/src/parser/ast/expression/sequence/key_value.rs deleted file mode 100644 index a0bc22eb2..000000000 --- a/nemo/src/parser/ast/expression/sequence/key_value.rs +++ /dev/null @@ -1 +0,0 @@ -//! This module defines \ No newline at end of file diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 710d9d9d0..ec2d46f27 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -1,24 +1,46 @@ //! This module defines [Program]. +use nom::{ + combinator::opt, + multi::separated_list0, + sequence::{delimited, pair}, +}; + use crate::parser::{ context::{context, ParserContext}, input::ParserInput, + span::ProgramSpan, ParserResult, }; -use super::{rule::Rule, ProgramAST}; +use super::{ + comment::{toplevel::TopLevelComment, wsoc::WSoC}, + statement::Statement, + ProgramAST, +}; /// AST representation of a nemo program #[derive(Debug)] pub struct Program<'a> { - statements: Rule<'a>, + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Top level comment + comment: Option>, + /// Statements + statements: Vec>, } impl<'a> Program<'a> { + /// Return the top-level comment attached to this program, + /// if there is any + pub fn comment(&self) -> Option<&TopLevelComment<'a>> { + self.comment.as_ref() + } + /// Return an iterator of statements in the program. - pub fn statements(&self) -> &Rule<'a> { - // TODO: This is simply a rule now - &self.statements + pub fn statements(&self) -> impl Iterator> { + self.statements.iter() } } @@ -28,25 +50,92 @@ impl<'a> ProgramAST<'a> for Program<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { // TODO: Fix this once we have statements let mut result = Vec::<&dyn ProgramAST>::new(); - result.push(&self.statements); + + if let Some(comment) = self.comment() { + result.push(comment); + } + + for statement in self.statements() { + result.push(statement); + } result } - fn span(&self) -> crate::parser::span::ProgramSpan { - // TODO: Fix this once we have statements - self.statements.span() + fn span(&self) -> ProgramSpan<'a> { + self.span } fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> where Self: Sized + 'a, { - context(CONTEXT, Rule::parse)(input) - .map(|(rest, result)| (rest, Program { statements: result })) + let input_span = input.span; + + context( + CONTEXT, + pair( + opt(TopLevelComment::parse), + delimited( + WSoC::parse, + separated_list0(WSoC::parse, Statement::parse), + WSoC::parse, + ), + ), // pair( + // TopLevelComment::parse, + // WSoC::parse, + // // terminated(many0(preceded(WSoC::parse, Statement::parse)), WSoC::parse), + // ), + )(input) + .map(|(rest, (comment, statements))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + comment, + statements, + }, + ) + }) } fn context(&self) -> ParserContext { CONTEXT } } + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ast::ProgramAST, input::ParserInput, ParserState, Program}; + + #[test] + fn parse_program() { + let program = "//! Top-level comment\n\ + // Declarations:\n\ + @declare a(_: int, _: int) .\n\ + @declare b(_: int, _: int) .\n\ + /// A fact\n\ + a(1, 2) .\n\ + \n\ + // Rules:\n\ + \n\ + /// A rule\n\ + b(?y, ?x) :- a(?x, ?y) .\n\ + \n\ + // Some more comments + "; + + let parser_input = ParserInput::new(program, ParserState::default()); + let result = all_consuming(Program::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert!(result.1.comment().is_some()); + assert_eq!(result.1.statements.len(), 4); + } +} diff --git a/nemo/src/parser/ast/rule.rs b/nemo/src/parser/ast/rule.rs index f55cc0307..3f1818ad2 100644 --- a/nemo/src/parser/ast/rule.rs +++ b/nemo/src/parser/ast/rule.rs @@ -1,8 +1,8 @@ //! This module defines [Rule]. use nom::{ - combinator::opt, - sequence::{pair, tuple}, + multi::many0, + sequence::{separated_pair, tuple}, }; use crate::parser::{ @@ -13,9 +13,8 @@ use crate::parser::{ }; use super::{ - expression::{sequence::simple::ExpressionSequenceSimple, Expression}, - token::Token, - ProgramAST, + attribute::Attribute, comment::wsoc::WSoC, expression::Expression, + sequence::simple::ExpressionSequenceSimple, token::Token, ProgramAST, }; /// A rule describing a logical implication @@ -24,6 +23,9 @@ pub struct Rule<'a> { /// [ProgramSpan] associated with this node span: ProgramSpan<'a>, + /// Attributes attached to this rule + attributes: Vec>, + /// Head of the rule head: ExpressionSequenceSimple<'a>, /// Body of the rule, @@ -40,6 +42,11 @@ impl<'a> Rule<'a> { pub fn body(&self) -> impl Iterator> { self.body.iter() } + + /// Return an iterator over the [Attribute]s attached to this rule. + pub fn attributes(&self) -> impl Iterator> { + self.attributes.iter() + } } const CONTEXT: ParserContext = ParserContext::Rule; @@ -55,7 +62,7 @@ impl<'a> ProgramAST<'a> for Rule<'a> { result } - fn span(&self) -> ProgramSpan { + fn span(&self) -> ProgramSpan<'a> { self.span } @@ -68,19 +75,22 @@ impl<'a> ProgramAST<'a> for Rule<'a> { context( CONTEXT, tuple(( - ExpressionSequenceSimple::parse, - tuple((opt(Token::whitespace), Token::arrow, opt(Token::whitespace))), - ExpressionSequenceSimple::parse, - pair(opt(Token::whitespace), Token::dot), + many0(Attribute::parse), + (separated_pair( + ExpressionSequenceSimple::parse, + tuple((WSoC::parse, Token::arrow, WSoC::parse)), + ExpressionSequenceSimple::parse, + )), )), )(input) - .map(|(rest, (head, _, body, _))| { + .map(|(rest, (attributes, (head, body)))| { let rest_span = rest.span; ( rest, Self { span: input_span.until_rest(&rest_span), + attributes, head, body, }, @@ -106,16 +116,15 @@ mod test { #[test] fn parse_rule() { let test = vec![ - ("a(?x, ?y) :- b(?x, ?y) .", (1, 1)), - ("a(?x,?y), d(1), c(1) :- b(?x, ?y), c(1, 2).", (3, 2)), + ("a(?x, ?y) :- b(?x, ?y)", (1, 1)), + ("a(?x,?y), d(1), c(1) :- b(?x, ?y), c(1, 2)", (3, 2)), + ("#[name(\"test\")]\nresult(?x) :- test(?x)", (1, 1)), ]; for (input, expected) in test { let parser_input = ParserInput::new(input, ParserState::default()); let result = all_consuming(Rule::parse)(parser_input); - println!("{:?}", result); - assert!(result.is_ok()); let result = result.unwrap(); diff --git a/nemo/src/parser/ast/expression/sequence.rs b/nemo/src/parser/ast/sequence.rs similarity index 87% rename from nemo/src/parser/ast/expression/sequence.rs rename to nemo/src/parser/ast/sequence.rs index 583d3db63..f7ffe5252 100644 --- a/nemo/src/parser/ast/expression/sequence.rs +++ b/nemo/src/parser/ast/sequence.rs @@ -1,5 +1,6 @@ //! This module defines helper parsers for sequences of expressions. +pub mod declare; pub mod key_value; pub mod one; pub mod simple; diff --git a/nemo/src/parser/ast/sequence/declare.rs b/nemo/src/parser/ast/sequence/declare.rs new file mode 100644 index 000000000..d68c04cad --- /dev/null +++ b/nemo/src/parser/ast/sequence/declare.rs @@ -0,0 +1,130 @@ +//! This module defines [DeclareSequence]. + +use std::vec::IntoIter; + +use nom::{ + multi::separated_list1, + sequence::{separated_pair, tuple}, +}; + +use crate::parser::{ + ast::{ + comment::wsoc::WSoC, + tag::{datatype::DataTypeTag, parameter::ParameterName}, + token::Token, + ProgramAST, + }, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Sequence of name-type declarations +#[derive(Debug)] +pub struct DeclareSequence<'a> { + /// [ProgramSpan] associated with this sequence + _span: ProgramSpan<'a>, + + /// List of name-type pairs + pairs: Vec<(ParameterName<'a>, DataTypeTag<'a>)>, +} + +impl<'a> DeclareSequence<'a> { + /// Return an iterator over the name-type pairs. + pub fn iter(&self) -> impl Iterator, DataTypeTag<'a>)> { + self.into_iter() + } + + /// Parse a single name-type pair + fn parse_name_type_pair( + input: ParserInput<'a>, + ) -> ParserResult<'a, (ParameterName<'a>, DataTypeTag<'a>)> { + separated_pair( + ParameterName::parse, + tuple((WSoC::parse, Token::colon, WSoC::parse)), + DataTypeTag::parse, + )(input) + } + + /// Parse a comma separated list of [Expression]s. + pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + let input_span = input.span; + + separated_list1( + tuple((WSoC::parse, Token::comma, WSoC::parse)), + Self::parse_name_type_pair, + )(input) + .map(|(rest, pairs)| { + let rest_span = rest.span; + + ( + rest, + Self { + _span: input_span.until_rest(&rest_span), + pairs, + }, + ) + }) + } +} + +impl<'a, 'b> IntoIterator for &'b DeclareSequence<'a> { + type Item = &'b (ParameterName<'a>, DataTypeTag<'a>); + type IntoIter = std::slice::Iter<'b, (ParameterName<'a>, DataTypeTag<'a>)>; + + fn into_iter(self) -> Self::IntoIter { + self.pairs.iter() + } +} + +impl<'a> IntoIterator for DeclareSequence<'a> { + type Item = (ParameterName<'a>, DataTypeTag<'a>); + type IntoIter = IntoIter<(ParameterName<'a>, DataTypeTag<'a>)>; + + fn into_iter(self) -> Self::IntoIter { + self.pairs.into_iter() + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::{ + parser::{ + ast::{sequence::declare::DeclareSequence, tag::parameter::Parameter}, + input::ParserInput, + ParserState, + }, + rule_model::components::datatype::DataType, + }; + + #[test] + fn parse_expression_sequence_simple() { + let test = vec![( + "_, test: string, _: int, name: any", + vec![ + (Parameter::Named("test".to_string()), DataType::String), + (Parameter::Unnamed, DataType::Integer), + (Parameter::Named("name".to_string()), DataType::Any), + ], + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(DeclareSequence::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + result + .1 + .into_iter() + .map(|(name, datatype)| (name.parameter().clone(), datatype.data_type())) + .collect::>() + ); + } + } +} diff --git a/nemo/src/parser/ast/sequence/key_value.rs b/nemo/src/parser/ast/sequence/key_value.rs new file mode 100644 index 000000000..7c3d5b46f --- /dev/null +++ b/nemo/src/parser/ast/sequence/key_value.rs @@ -0,0 +1,114 @@ +//! This module defines [KeyValueSequence]. + +use std::vec::IntoIter; + +use nom::{ + multi::separated_list0, + sequence::{separated_pair, tuple}, +}; + +use crate::parser::{ + ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Sequence of comma-delimited expressions +#[derive(Debug)] +pub struct KeyValueSequence<'a> { + /// [ProgramSpan] associated with this sequence + _span: ProgramSpan<'a>, + + /// List of key-value pairs + expressions: Vec<(Expression<'a>, Expression<'a>)>, +} + +impl<'a> KeyValueSequence<'a> { + /// Return an iterator over the [Expression] pairs. + pub fn iter(&self) -> impl Iterator, Expression<'a>)> { + self.into_iter() + } + + /// Parse a single key-value pair + fn parse_key_value_pair( + input: ParserInput<'a>, + ) -> ParserResult<'a, (Expression<'a>, Expression<'a>)> { + separated_pair( + Expression::parse, + tuple((WSoC::parse, Token::colon, WSoC::parse)), + Expression::parse, + )(input) + } + + /// Parse a comma separated list of [Expression]s. + pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + let input_span = input.span; + + separated_list0( + tuple((WSoC::parse, Token::comma, WSoC::parse)), + Self::parse_key_value_pair, + )(input) + .map(|(rest, expressions)| { + let rest_span = rest.span; + + ( + rest, + Self { + _span: input_span.until_rest(&rest_span), + expressions, + }, + ) + }) + } +} + +impl<'a, 'b> IntoIterator for &'b KeyValueSequence<'a> { + type Item = &'b (Expression<'a>, Expression<'a>); + type IntoIter = std::slice::Iter<'b, (Expression<'a>, Expression<'a>)>; + + fn into_iter(self) -> Self::IntoIter { + self.expressions.iter() + } +} + +impl<'a> IntoIterator for KeyValueSequence<'a> { + type Item = (Expression<'a>, Expression<'a>); + type IntoIter = IntoIter<(Expression<'a>, Expression<'a>)>; + + fn into_iter(self) -> Self::IntoIter { + self.expressions.into_iter() + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::sequence::key_value::KeyValueSequence, input::ParserInput, ParserState, + }; + + #[test] + fn parse_expression_sequence_simple() { + let test = vec![ + ("", 0), + ("?x:3", 1), + ("?x: 7, ?y: ?z, ?z: 1", 3), + ("x:3, ?x:12, ?x : 7", 3), + ("x:3, ?x : 2, 2 : 5", 3), + ("x:3 , ?x : 12, 2: 1", 3), + ("x:POW(1,2)", 1), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(KeyValueSequence::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.into_iter().count()); + } + } +} diff --git a/nemo/src/parser/ast/expression/sequence/one.rs b/nemo/src/parser/ast/sequence/one.rs similarity index 82% rename from nemo/src/parser/ast/expression/sequence/one.rs rename to nemo/src/parser/ast/sequence/one.rs index e178d6e7b..9843e7a3e 100644 --- a/nemo/src/parser/ast/expression/sequence/one.rs +++ b/nemo/src/parser/ast/sequence/one.rs @@ -1,16 +1,11 @@ -//! This module defines +//! This module defines [ExpressionSequenceOne]. use std::vec::IntoIter; -use nom::{ - branch::alt, - combinator::{map, opt}, - multi::separated_list1, - sequence::tuple, -}; +use nom::{branch::alt, combinator::map, multi::separated_list1, sequence::tuple}; use crate::parser::{ - ast::{expression::Expression, token::Token, ProgramAST}, + ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, input::ParserInput, span::ProgramSpan, ParserResult, @@ -22,7 +17,7 @@ use crate::parser::{ #[derive(Debug)] pub struct ExpressionSequenceOne<'a> { /// [ProgramSpan] associated with this sequence - span: ProgramSpan<'a>, + _span: ProgramSpan<'a>, /// List of expressions expressions: Vec>, @@ -36,7 +31,7 @@ impl<'a> ExpressionSequenceOne<'a> { /// Parse a sequence of length one. fn parse_sequence_single(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { - tuple((Expression::parse, opt(Token::whitespace), Token::comma))(input) + tuple((Expression::parse, WSoC::parse, Token::comma))(input) .map(|(rest, (result, _, _))| (rest, result)) } @@ -44,9 +39,9 @@ impl<'a> ExpressionSequenceOne<'a> { fn parse_sequence(input: ParserInput<'a>) -> ParserResult<'a, Vec>> { tuple(( Expression::parse, - tuple((opt(Token::whitespace), Token::comma, opt(Token::whitespace))), + tuple((WSoC::parse, Token::comma, WSoC::parse)), separated_list1( - tuple((opt(Token::whitespace), Token::comma, opt(Token::whitespace))), + tuple((WSoC::parse, Token::comma, WSoC::parse)), Expression::parse, ), ))(input) @@ -72,7 +67,7 @@ impl<'a> ExpressionSequenceOne<'a> { ( rest, Self { - span: input_span.until_rest(&rest_span), + _span: input_span.until_rest(&rest_span), expressions, }, ) @@ -103,7 +98,7 @@ mod test { use nom::combinator::all_consuming; use crate::parser::{ - ast::expression::sequence::one::ExpressionSequenceOne, input::ParserInput, ParserState, + ast::sequence::one::ExpressionSequenceOne, input::ParserInput, ParserState, }; #[test] diff --git a/nemo/src/parser/ast/expression/sequence/simple.rs b/nemo/src/parser/ast/sequence/simple.rs similarity index 82% rename from nemo/src/parser/ast/expression/sequence/simple.rs rename to nemo/src/parser/ast/sequence/simple.rs index f81c51916..16ed32ba8 100644 --- a/nemo/src/parser/ast/expression/sequence/simple.rs +++ b/nemo/src/parser/ast/sequence/simple.rs @@ -1,11 +1,11 @@ -//! This module defines +//! This module defines [ExpressionSequenceSimple]. use std::vec::IntoIter; -use nom::{combinator::opt, multi::separated_list1, sequence::tuple}; +use nom::{multi::separated_list1, sequence::tuple}; use crate::parser::{ - ast::{expression::Expression, token::Token, ProgramAST}, + ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, input::ParserInput, span::ProgramSpan, ParserResult, @@ -15,7 +15,7 @@ use crate::parser::{ #[derive(Debug)] pub struct ExpressionSequenceSimple<'a> { /// [ProgramSpan] associated with this sequence - span: ProgramSpan<'a>, + _span: ProgramSpan<'a>, /// List of expressions expressions: Vec>, @@ -32,7 +32,7 @@ impl<'a> ExpressionSequenceSimple<'a> { let input_span = input.span; separated_list1( - tuple((opt(Token::whitespace), Token::comma, opt(Token::whitespace))), + tuple((WSoC::parse, Token::comma, WSoC::parse)), Expression::parse, )(input) .map(|(rest, expressions)| { @@ -41,7 +41,7 @@ impl<'a> ExpressionSequenceSimple<'a> { ( rest, Self { - span: input_span.until_rest(&rest_span), + _span: input_span.until_rest(&rest_span), expressions, }, ) @@ -72,8 +72,7 @@ mod test { use nom::combinator::all_consuming; use crate::parser::{ - ast::expression::sequence::simple::ExpressionSequenceSimple, input::ParserInput, - ParserState, + ast::sequence::simple::ExpressionSequenceSimple, input::ParserInput, ParserState, }; #[test] @@ -90,8 +89,6 @@ mod test { let parser_input = ParserInput::new(input, ParserState::default()); let result = all_consuming(ExpressionSequenceSimple::parse)(parser_input); - println!("{:?}", result); - assert!(result.is_ok()); let result = result.unwrap(); diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs new file mode 100644 index 000000000..af40c1574 --- /dev/null +++ b/nemo/src/parser/ast/statement.rs @@ -0,0 +1,165 @@ +//! This module defines [Statement]. + +use nom::{ + branch::alt, + character::complete::line_ending, + combinator::{map, opt}, + sequence::{delimited, pair, terminated}, +}; + +use crate::parser::{ + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +use super::{ + comment::{doc::DocComment, wsoc::WSoC}, + directive::Directive, + expression::Expression, + rule::Rule, + token::Token, + ProgramAST, +}; + +/// Types of [Statement]s +#[derive(Debug)] +pub enum StatementKind<'a> { + /// Fact + Fact(Expression<'a>), + /// Rule + Rule(Rule<'a>), + /// Directive + Directive(Directive<'a>), +} + +impl<'a> StatementKind<'a> { + /// Return the [ParserContext] of the underlying statement. + pub fn context(&self) -> ParserContext { + match self { + StatementKind::Fact(statement) => statement.context(), + StatementKind::Rule(statement) => statement.context(), + StatementKind::Directive(statement) => statement.context(), + } + } + + /// Parse the [StatementKind]. + pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + alt(( + map(Directive::parse, Self::Directive), + map(Rule::parse, Self::Rule), + map(Expression::parse, Self::Fact), + ))(input) + } +} + +/// Statement in a program +#[derive(Debug)] +pub struct Statement<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Doc comment associated with this statement + comment: Option>, + /// The statement + statement: StatementKind<'a>, +} + +impl<'a> Statement<'a> { + /// Return the comment attached to this statement, + /// if there is any + pub fn comment(&self) -> Option<&DocComment<'a>> { + self.comment.as_ref() + } + + /// Return the [StatementKind]. + pub fn statement(&self) -> &StatementKind<'a> { + &self.statement + } +} + +const CONTEXT: ParserContext = ParserContext::Statement; + +impl<'a> ProgramAST<'a> for Statement<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + vec![match &self.statement { + StatementKind::Fact(statement) => statement, + StatementKind::Rule(statement) => statement, + StatementKind::Directive(statement) => statement, + }] + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + pair( + opt(terminated(DocComment::parse, line_ending)), + delimited( + WSoC::parse, + StatementKind::parse, + pair(WSoC::parse, Token::dot), + ), + ), + )(input) + .map(|(rest, (comment, statement))| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + comment, + statement, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{statement::Statement, ProgramAST}, + context::ParserContext, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_directive() { + let test = vec![ + ("/// A fact \n a(1, 2) .", ParserContext::Expression), + ("/// A rule \n a(1, 2) :- b(2, 1) .", ParserContext::Rule), + ( + "/// A directive \n \t@declare a(_: int, _: int) .", + ParserContext::Directive, + ), + ]; + + for (input, expect) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Statement::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(result.1.statement.context(), expect); + } + } +} diff --git a/nemo/src/parser/ast/tag.rs b/nemo/src/parser/ast/tag.rs index 349144a86..e804a78d5 100644 --- a/nemo/src/parser/ast/tag.rs +++ b/nemo/src/parser/ast/tag.rs @@ -1,69 +1,7 @@ -//! This module defines [Tag]. +//! This module defines tags used to name complex syntax elements. -use nom::{branch::alt, combinator::map, sequence::tuple}; - -use crate::parser::{input::ParserInput, ParserResult}; - -use super::{expression::basic::iri::Iri, token::Token, ProgramAST}; - -/// Tag used to give a name to complex expressions -#[derive(Debug)] -pub enum Tag<'a> { - /// Plain name - Plain(Token<'a>), - /// Prefixed name - Prefixed { prefix: Token<'a>, tag: Token<'a> }, - /// Iri - Iri(Iri<'a>), -} - -impl<'a> Tag<'a> { - /// Parse a [Tag]. - pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { - alt(( - map( - tuple((Token::name, Token::colon, Token::name)), - |(prefix, _, tag)| Self::Prefixed { prefix, tag }, - ), - map(Token::name, Self::Plain), - map(Iri::parse, Self::Iri), - ))(input) - } - - /// Return a string representation of the [Tag]. - /// - /// Note that this does not resolve prefixes. - pub fn to_string(&self) -> String { - match self { - Tag::Plain(token) => token.to_string(), - Tag::Prefixed { prefix, tag } => format!("{}:{}", prefix.to_string(), tag.to_string()), - Tag::Iri(iri) => iri.content(), - } - } -} - -#[cfg(test)] -mod test { - use nom::combinator::all_consuming; - - use crate::parser::{ast::tag::Tag, input::ParserInput, ParserState}; - - #[test] - fn parse_tag() { - let test = vec![ - ("abc", "abc".to_string()), - ("abc:def", "abc:def".to_string()), - ("", "http://example.com".to_string()), - ]; - - for (input, expected) in test { - let parser_input = ParserInput::new(input, ParserState::default()); - let result = all_consuming(Tag::parse)(parser_input); - - assert!(result.is_ok()); - - let result = result.unwrap(); - assert_eq!(expected, result.1.to_string()); - } - } -} +pub mod aggregation; +pub mod datatype; +pub mod operation; +pub mod parameter; +pub mod structure; diff --git a/nemo/src/parser/ast/tag/aggregation.rs b/nemo/src/parser/ast/tag/aggregation.rs new file mode 100644 index 000000000..ef64fd5a8 --- /dev/null +++ b/nemo/src/parser/ast/tag/aggregation.rs @@ -0,0 +1,122 @@ +//! This module defines [AggregationTag]. + +use nom::{branch::alt, bytes::complete::tag_no_case, combinator::map}; +use nom_supreme::error::{BaseErrorKind, Expectation}; +use strum::IntoEnumIterator; + +use crate::{ + parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + error::ParserErrorTree, + input::ParserInput, + span::ProgramSpan, + ParserResult, + }, + rule_model::components::term::aggregate::AggregateKind, +}; + +/// Tags that is used to identify aggregations +#[derive(Debug)] +pub struct AggregationTag<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Type of aggregation, if known + kind: Option, +} + +impl<'a> AggregationTag<'a> { + /// Return the [AggregateKind] that was parsed, if it is known. + pub fn operation(&self) -> Option { + self.kind + } +} + +const CONTEXT: ParserContext = ParserContext::AggregationTag; + +impl<'a> ProgramAST<'a> for AggregationTag<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let keyword_parser = |input: ParserInput<'a>| { + for operation in AggregateKind::iter() { + let result = tag_no_case::<&str, ParserInput<'_>, ParserErrorTree>( + operation.name(), + )(input.clone()); + if let Ok((rest, _matched)) = result { + return Ok((rest, operation)); + } + } + Err(nom::Err::Error(ParserErrorTree::Base { + location: input, + kind: BaseErrorKind::Expected(Expectation::Tag("aggregation name")), + })) + }; + + let input_span = input.span; + + context( + CONTEXT, + alt((map(keyword_parser, Some), map(Token::name, |_| None))), + )(input) + .map(|(rest, kind)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + kind, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::{ + parser::{ + ast::{tag::aggregation::AggregationTag, ProgramAST}, + input::ParserInput, + ParserState, + }, + rule_model::components::term::aggregate::AggregateKind, + }; + + #[test] + fn parse_tag() { + let test = vec![ + ("sum", AggregateKind::SumOfNumbers), + ("COUNT", AggregateKind::CountValues), + ("Min", AggregateKind::MinNumber), + ("Max", AggregateKind::MaxNumber), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(AggregationTag::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(Some(expected), result.1.kind); + } + } +} diff --git a/nemo/src/parser/ast/tag/datatype.rs b/nemo/src/parser/ast/tag/datatype.rs new file mode 100644 index 000000000..12562b36d --- /dev/null +++ b/nemo/src/parser/ast/tag/datatype.rs @@ -0,0 +1,118 @@ +//! This module defines [DataTypeTag]. + +use nom::bytes::complete::tag; +use nom_supreme::error::{BaseErrorKind, Expectation}; +use strum::IntoEnumIterator; + +use crate::{ + parser::{ + ast::ProgramAST, + context::{context, ParserContext}, + error::ParserErrorTree, + input::ParserInput, + span::ProgramSpan, + ParserResult, + }, + rule_model::components::datatype::DataType, +}; + +/// Tags that are used to identify operations +#[derive(Debug)] +pub struct DataTypeTag<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Data type + data_type: DataType, +} + +impl<'a> DataTypeTag<'a> { + /// Return the [DataType] that was parsed. + pub fn data_type(&self) -> DataType { + self.data_type + } +} + +const CONTEXT: ParserContext = ParserContext::DataType; + +impl<'a> ProgramAST<'a> for DataTypeTag<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let keyword_parser = |input: ParserInput<'a>| { + for data_type in DataType::iter() { + let result = + tag::<&str, ParserInput<'_>, ParserErrorTree>(data_type.name())(input.clone()); + if let Ok((rest, _matched)) = result { + return Ok((rest, data_type)); + } + } + Err(nom::Err::Error(ParserErrorTree::Base { + location: input, + kind: BaseErrorKind::Expected(Expectation::Tag("data type")), + })) + }; + + let input_span = input.span; + + context(CONTEXT, keyword_parser)(input).map(|(rest, data_type)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + data_type, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::{ + parser::{ + ast::{tag::datatype::DataTypeTag, ProgramAST}, + input::ParserInput, + ParserState, + }, + rule_model::components::datatype::DataType, + }; + + #[test] + fn parse_datatype() { + let test = vec![ + ("int", DataType::Integer), + ("float", DataType::Float), + ("double", DataType::Double), + ("string", DataType::String), + ("any", DataType::Any), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(DataTypeTag::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.data_type()); + } + } +} diff --git a/nemo/src/parser/ast/tag/operation.rs b/nemo/src/parser/ast/tag/operation.rs new file mode 100644 index 000000000..ef1a65727 --- /dev/null +++ b/nemo/src/parser/ast/tag/operation.rs @@ -0,0 +1,117 @@ +//! This module defines [OperationTag]. + +use nom::bytes::complete::tag_no_case; +use nom_supreme::error::{BaseErrorKind, Expectation}; +use strum::IntoEnumIterator; + +use crate::{ + parser::{ + ast::ProgramAST, + context::{context, ParserContext}, + error::ParserErrorTree, + input::ParserInput, + span::ProgramSpan, + ParserResult, + }, + rule_model::components::term::operation::operation_kind::OperationKind, +}; + +/// Tags that are used to identify operations +#[derive(Debug)] +pub struct OperationTag<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Type of operation + kind: OperationKind, +} + +impl<'a> OperationTag<'a> { + /// Return the [OperationKind] that was parsed. + pub fn operation(&self) -> OperationKind { + self.kind + } +} + +const CONTEXT: ParserContext = ParserContext::OperationTag; + +impl<'a> ProgramAST<'a> for OperationTag<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let keyword_parser = |input: ParserInput<'a>| { + for operation in OperationKind::iter() { + let result = tag_no_case::<&str, ParserInput<'_>, ParserErrorTree>( + operation.name(), + )(input.clone()); + if let Ok((rest, _matched)) = result { + return Ok((rest, operation)); + } + } + Err(nom::Err::Error(ParserErrorTree::Base { + location: input, + kind: BaseErrorKind::Expected(Expectation::Tag("operation name")), + })) + }; + + let input_span = input.span; + + context(CONTEXT, keyword_parser)(input).map(|(rest, kind)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + kind, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::{ + parser::{ + ast::{tag::operation::OperationTag, ProgramAST}, + input::ParserInput, + ParserState, + }, + rule_model::components::term::operation::operation_kind::OperationKind, + }; + + #[test] + fn parse_tag() { + let test = vec![ + ("sum", OperationKind::NumericSum), + ("STRLEN", OperationKind::StringLength), + ("IsNumeric", OperationKind::CheckIsNumeric), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(OperationTag::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.kind); + } + } +} diff --git a/nemo/src/parser/ast/tag/parameter.rs b/nemo/src/parser/ast/tag/parameter.rs new file mode 100644 index 000000000..4ceea26dd --- /dev/null +++ b/nemo/src/parser/ast/tag/parameter.rs @@ -0,0 +1,110 @@ +//! This module defines [ParameterName]. + +use nom::{branch::alt, combinator::map}; + +use crate::parser::{ + ast::{token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Type of parameter +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Parameter { + /// Unnamed parameter + Unnamed, + /// Named parameter + Named(String), +} + +/// Tags that are used to give names to certain objects +#[derive(Debug)] +pub struct ParameterName<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Parameter + parameter: Parameter, +} + +impl<'a> ParameterName<'a> { + /// Return the [Parameter] that was parsed. + pub fn parameter(&self) -> &Parameter { + &self.parameter + } +} + +const CONTEXT: ParserContext = ParserContext::DataType; + +impl<'a> ProgramAST<'a> for ParameterName<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + Vec::default() + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + alt(( + map(Token::underscore, |_| Parameter::Unnamed), + map(Token::name, |token| Parameter::Named(token.to_string())), + )), + )(input) + .map(|(rest, parameter)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + parameter, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{tag::parameter::ParameterName, ProgramAST}, + input::ParserInput, + ParserState, + }; + + use super::Parameter; + + #[test] + fn parse_datatype() { + let test = vec![ + ("test", Parameter::Named("test".to_string())), + ("_", Parameter::Unnamed), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(ParameterName::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(&expected, result.1.parameter()); + } + } +} diff --git a/nemo/src/parser/ast/tag/structure.rs b/nemo/src/parser/ast/tag/structure.rs new file mode 100644 index 000000000..32a911512 --- /dev/null +++ b/nemo/src/parser/ast/tag/structure.rs @@ -0,0 +1,131 @@ +//! This module defines [StructureTag]. + +use nom::{branch::alt, combinator::map, sequence::separated_pair}; + +use crate::parser::{ + ast::{expression::basic::iri::Iri, token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::ProgramSpan, + ParserResult, +}; + +/// Types of [StructureTag]s +#[derive(Debug)] +pub enum StructureTagKind<'a> { + /// Plain name + Plain(Token<'a>), + /// Prefixed name + Prefixed { prefix: Token<'a>, tag: Token<'a> }, + /// Iri + Iri(Iri<'a>), +} + +/// Tags that is used to give a name to complex expressions +#[derive(Debug)] +pub struct StructureTag<'a> { + /// [ProgramSpan] associated with this node + span: ProgramSpan<'a>, + + /// Type of [StructureTag] + kind: StructureTagKind<'a>, +} + +impl<'a> StructureTag<'a> { + /// Return the type of structure tag. + pub fn kind(&self) -> &StructureTagKind<'a> { + &self.kind + } + + /// Return a string representation of the [Tag]. + /// + /// Note that this does not resolve prefixes. + pub fn to_string(&self) -> String { + match &self.kind { + StructureTagKind::Plain(token) => token.to_string(), + StructureTagKind::Prefixed { prefix, tag } => { + format!("{}::{}", prefix.to_string(), tag.to_string()) + } + StructureTagKind::Iri(iri) => iri.content(), + } + } +} + +const CONTEXT: ParserContext = ParserContext::StructureTag; + +impl<'a> ProgramAST<'a> for StructureTag<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + match self.kind() { + StructureTagKind::Plain(_token) => vec![], + StructureTagKind::Prefixed { prefix: _, tag: _ } => vec![], + StructureTagKind::Iri(iri) => iri.children(), + } + } + + fn span(&self) -> ProgramSpan<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + alt(( + map( + separated_pair(Token::name, Token::double_colon, Token::name), + |(prefix, tag)| StructureTagKind::Prefixed { prefix, tag }, + ), + map(Token::name, StructureTagKind::Plain), + map(Iri::parse, StructureTagKind::Iri), + )), + )(input) + .map(|(rest, kind)| { + let rest_span = rest.span; + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + kind, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{tag::structure::StructureTag, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_tag() { + let test = vec![ + ("abc", "abc".to_string()), + ("abc::def", "abc::def".to_string()), + ("", "http://example.com".to_string()), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(StructureTag::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.to_string()); + } + } +} diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index af6634dd1..e0447680b 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -64,6 +64,12 @@ pub enum TokenKind { /// Colon #[assoc(name = ":")] Colon, + /// Double Colon + #[assoc(name = "::")] + DoubleColon, + /// Semicolon + #[assoc(name = ";")] + Semicolon, /// Greater than #[assoc(name = ">")] Greater, @@ -79,6 +85,9 @@ pub enum TokenKind { /// Equal #[assoc(name = "=")] Equal, + /// Unequal + #[assoc(name = "!=")] + Unequal, /// Tilde, used for negation #[assoc(name = "~")] Tilde, @@ -142,15 +151,39 @@ pub enum TokenKind { /// String #[assoc(name = "string")] String, - /// A comment (as single token) - #[assoc(name = "comment")] + /// Token marking a normal comment + #[assoc(name = "//")] Comment, - /// A doc comment attached to e.g. a rule - #[assoc(name = "doc-comment")] + /// Token marking the beginning of a closed comment + #[assoc(name = "/*")] + OpenComment, + /// Token marking the beginning of a closed comment + #[assoc(name = "*/")] + CloseComment, + /// Token marking a doc comment attached to e.g. a rule + #[assoc(name = "///")] DocComment, - /// Toplevel comment describing the rule file - #[assoc(name = "top-level-comment")] + /// Token marking the top level comment + #[assoc(name = "//!")] TopLevelComment, + /// Token for the base directive + #[assoc(name = "base")] + BaseDirective, + /// Token for the declare directive + #[assoc(name = "declare")] + DeclareDirective, + /// Token for the export directive + #[assoc(name = "export")] + ExportDirective, + /// Token for the import directive + #[assoc(name = "import")] + ImportDirective, + /// Token for the output directive + #[assoc(name = "output")] + OutputDirective, + /// Token for the prefix directive + #[assoc(name = "prefix")] + PrefixDirective, /// White spaces #[assoc(name = "whitespace")] Whitespace, @@ -290,18 +323,21 @@ impl<'a> Token<'a> { string_token!(open_chevrons, TokenKind::OpenChevrons); string_token!(closed_chevrons, TokenKind::ClosedChevrons); string_token!(open_bracket, TokenKind::OpenBracket); - string_token!(closed_bracket, TokenKind::ClosedBrace); + string_token!(closed_bracket, TokenKind::ClosedBracket); string_token!(question_mark, TokenKind::QuestionMark); string_token!(exclamation_mark, TokenKind::ExclamationMark); string_token!(dot, TokenKind::Dot); string_token!(comma, TokenKind::Comma); string_token!(arrow, TokenKind::Arrow); string_token!(colon, TokenKind::Colon); + string_token!(double_colon, TokenKind::DoubleColon); + string_token!(semicolon, TokenKind::Semicolon); string_token!(greater, TokenKind::Greater); string_token!(greater_equal, TokenKind::GreaterEqual); string_token!(less, TokenKind::Less); string_token!(less_equal, TokenKind::LessEqual); string_token!(equal, TokenKind::Equal); + string_token!(unequal, TokenKind::Unequal); string_token!(tilde, TokenKind::Tilde); string_token!(double_caret, TokenKind::DoubleCaret); string_token!(hash, TokenKind::Hash); @@ -313,10 +349,21 @@ impl<'a> Token<'a> { string_token!(division, TokenKind::Division); string_token!(boolean_true, TokenKind::True); string_token!(boolean_false, TokenKind::False); + string_token!(comment, TokenKind::Comment); + string_token!(open_comment, TokenKind::OpenComment); + string_token!(close_comment, TokenKind::CloseComment); + string_token!(doc_comment, TokenKind::DocComment); + string_token!(toplevel_comment, TokenKind::TopLevelComment); string_token!(quote, TokenKind::Quote); string_token!(blank_node_prefix, TokenKind::BlankNodePrefix); string_token!(exponent_lower, TokenKind::ExponentLower); string_token!(exponent_upper, TokenKind::ExponentUpper); string_token!(type_marker_double, TokenKind::TypeMarkerDouble); string_token!(type_marker_float, TokenKind::TypeMarkerFloat); + string_token!(directive_base, TokenKind::BaseDirective); + string_token!(directive_declare, TokenKind::DeclareDirective); + string_token!(directive_export, TokenKind::ExportDirective); + string_token!(directive_import, TokenKind::ImportDirective); + string_token!(directive_output, TokenKind::OutputDirective); + string_token!(directive_prefix, TokenKind::PrefixDirective); } diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index c58c2995b..1ec938a2b 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -7,12 +7,15 @@ use nom_supreme::context::ContextError; use super::{ast::token::TokenKind, error::ParserErrorTree, ParserInput, ParserResult}; /// Context, in which a particular parse error occurred -#[derive(Assoc, Debug, Clone, Copy)] +#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] #[func(pub fn name(&self) -> &'static str)] pub enum ParserContext { /// Token #[assoc(name = _kind.name())] Token { kind: TokenKind }, + /// Data type + #[assoc(name = "datatype")] + DataType, /// Number #[assoc(name = "number")] Number, @@ -37,18 +40,84 @@ pub enum ParserContext { /// Boolean #[assoc(name = "boolean")] Boolean, + /// Attribute + #[assoc(name = "attribute")] + Attribute, + /// Base directive + #[assoc(name = "base directive")] + Base, + /// Declare directive + #[assoc(name = "declare directive")] + Declare, + /// Export directive + #[assoc(name = "export directive")] + Export, + /// Import directive + #[assoc(name = "import directive")] + Import, + /// Output directive + #[assoc(name = "output directive")] + Output, + /// Prefix directive + #[assoc(name = "prefix directive")] + Prefix, + /// Unknown directive + #[assoc(name = "directive")] + UnknownDirective, /// Expression #[assoc(name = "expression")] Expression, /// Tuple #[assoc(name = "tuple")] Tuple, + /// Map + #[assoc(name = "map")] + Map, + /// Arithmetic expression + #[assoc(name = "arithmetic expression")] + Arithmetic, /// Atom #[assoc(name = "atom")] Atom, + /// Tag + #[assoc(name = "tag")] + StructureTag, + /// Aggregate tag + #[assoc(name = "aggregate name")] + AggregationTag, + /// Operation tag + #[assoc(name = "operation name")] + OperationTag, + /// Operation + #[assoc(name = "operation")] + Operation, + /// Aggregation + #[assoc(name = "aggregation")] + Aggregation, + /// Negation + #[assoc(name = "negation")] + Negation, + /// Infix + #[assoc(name = "expression")] // TODO: Is there a better name? + Infix, + /// Comment + #[assoc(name = "comment")] + Comment, + /// Doc-comment + #[assoc(name = "doc-comment")] + DocComment, + /// Top-level comment + #[assoc(name = "top-level-comment")] + TopLevelComment, /// Rule #[assoc(name = "rule")] Rule, + /// Directive + #[assoc(name = "directive")] + Directive, + /// Statement + #[assoc(name = "statement")] + Statement, /// Program #[assoc(name = "program")] Program, diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index ceef435a6..c0c7d3e44 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -128,4 +128,18 @@ impl<'a> ProgramSpan<'a> { )) } } + /// TODO: Description and Specify safety conditions + /// Create a [ProgramSpan] that encloses the given [ProgramSpan]s. + /// TODO: Description and Specify safety conditions and verify that this is correct + pub fn enclose(&self, first: &Self, second: &Self) -> Self { + unsafe { + Self(LocatedSpan::new_from_raw_offset( + first.0.location_offset(), + first.0.location_line(), + &self.0 + [..(second.0.location_offset() + second.0.len() - first.0.location_offset())], + (), + )) + } + } } diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index 104f089e4..cfca4f2c7 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -3,6 +3,7 @@ #[macro_use] pub mod atom; pub mod base; +pub mod datatype; pub mod fact; pub mod import_export; pub mod literal; diff --git a/nemo/src/rule_model/components/datatype.rs b/nemo/src/rule_model/components/datatype.rs new file mode 100644 index 000000000..278087728 --- /dev/null +++ b/nemo/src/rule_model/components/datatype.rs @@ -0,0 +1,27 @@ +//! This module defines [DataType]. +#![allow(missing_docs)] + +use enum_assoc::Assoc; +use strum_macros::EnumIter; + +use crate::rule_model::syntax::datatypes; + +#[derive(Assoc, EnumIter, Debug, Copy, Clone, PartialEq, Eq)] +#[func(pub fn name(&self) -> &'static str)] +pub enum DataType { + /// 64bit integer number + #[assoc(name = datatypes::DATATYPE_INT)] + Integer, + /// 32bit floating point number + #[assoc(name = datatypes::DATATYPE_FLOAT)] + Float, + /// 64bit floating point number + #[assoc(name = datatypes::DATATYPE_DOUBLE)] + Double, + /// String + #[assoc(name = datatypes::DATATYPE_STRING)] + String, + /// Any data value + #[assoc(name = datatypes::DATATYPE_ANY)] + Any, +} diff --git a/nemo/src/rule_model/components/import_export/attributes.rs b/nemo/src/rule_model/components/import_export/attributes.rs index d83f8e00f..159bb857c 100644 --- a/nemo/src/rule_model/components/import_export/attributes.rs +++ b/nemo/src/rule_model/components/import_export/attributes.rs @@ -10,18 +10,17 @@ use crate::rule_model::syntax::import_export::attributes; /// Supported attributes in import/export directives #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq, Hash)] #[func(pub fn name(&self) -> &'static str)] -#[func(pub fn from_name(name: &str) -> Option)] pub enum ImportExportAttribute { /// Location of the file #[assoc(name = attributes::ATTRIBUTE_NAME_RESOURCE)] Resource, - /// Datatypes of the input relations + /// Data types of the input relations #[assoc(name = attributes::ATTRIBUTE_NAME_FORMAT)] Format, /// Base IRI #[assoc(name = attributes::ATTRIBUTE_NAME_BASE)] Base, - /// Delimiter used to seperate values + /// Delimiter used to separate values #[assoc(name = attributes::ATTRIBUTE_NAME_DSV_DELIMITER)] Delimiter, /// Compression format diff --git a/nemo/src/rule_model/components/import_export/compression.rs b/nemo/src/rule_model/components/import_export/compression.rs index 9ccc45d8f..2e7c9bcc3 100644 --- a/nemo/src/rule_model/components/import_export/compression.rs +++ b/nemo/src/rule_model/components/import_export/compression.rs @@ -10,7 +10,6 @@ use crate::rule_model::syntax::import_export::compression; /// Compression formats #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] #[func(pub fn name(&self) -> &'static str)] -#[func(pub fn from_name(name: &str) -> Option)] pub enum CompressionFormat { /// No compression #[assoc(name = compression::VALUE_COMPRESSION_NONE)] diff --git a/nemo/src/rule_model/components/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs index 51db58466..7d9c7ce99 100644 --- a/nemo/src/rule_model/components/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -22,7 +22,6 @@ pub(crate) enum AttributeRequirement { /// Supported file formats #[derive(Assoc, Debug, Copy, Clone, Eq, PartialEq, Hash)] #[func(pub fn name(&self) -> &'static str)] -#[func(pub fn from_name(name: &str) -> Option)] #[func(pub fn extension(&self) -> &'static str)] #[func(pub fn attributes(&self) -> HashMap)] pub enum FileFormat { diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 09214e1c7..2b44682fc 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -18,7 +18,6 @@ use super::{primitive::variable::Variable, Term}; /// Aggregate operation on logical values #[derive(Assoc, EnumIter, Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[func(pub fn name(&self) -> &'static str)] -#[func(pub fn from_name(name: &str) -> Option)] pub enum AggregateKind { /// Count of distinct values #[assoc(name = aggregates::AGGREGATE_COUNT)] diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index 552c91b5a..eb5c17d7c 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -42,11 +42,6 @@ impl Operation { } } - /// Create a new [Operation] giving the string name of the operation. - pub fn new_from_name(operation: &str, subterms: Vec) -> Option { - Some(Self::new(OperationKind::from_name(operation)?, subterms)) - } - /// Check whether this operation has the form of an assignment of a variable to a term. /// If so return the variable and the term as a pair or `None` otherwise. /// diff --git a/nemo/src/rule_model/components/term/operation/operation_kind.rs b/nemo/src/rule_model/components/term/operation/operation_kind.rs index ea48ad193..5d3cccb04 100644 --- a/nemo/src/rule_model/components/term/operation/operation_kind.rs +++ b/nemo/src/rule_model/components/term/operation/operation_kind.rs @@ -41,7 +41,6 @@ impl OperationNumArguments { /// Supported operations #[derive(Assoc, EnumIter, Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] #[func(pub fn name(&self) -> &'static str)] -#[func(pub fn from_name(name: &str) -> Option)] #[func(pub fn num_arguments(&self) -> OperationNumArguments)] #[func(pub fn is_boolean(&self) -> bool)] pub enum OperationKind { @@ -90,26 +89,26 @@ pub enum OperationKind { #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericRemainder, + /// Numeric greater than or equals comparison + #[assoc(name = builtins::BUILTIN_GREATEREQ)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(is_boolean = false)] + NumericGreaterthaneq, /// Numeric greater than comparison #[assoc(name = builtins::BUILTIN_GREATER)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericGreaterthan, - /// Numeric greater than or equals comparison - #[assoc(name = builtins::BUILTIN_GREATEREQ)] + /// Numeric less than or equals comparison + #[assoc(name = builtins::BUILTIN_LESSEQ)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] - NumericGreaterthaneq, + NumericLessthaneq, /// Numeric less than comparison #[assoc(name = builtins::BUILTIN_LESS)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericLessthan, - /// Numeric less than or equals comparison - #[assoc(name = builtins::BUILTIN_LESSQ)] - #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] - NumericLessthaneq, /// Lexicographic comparison between strings #[assoc(name = builtins::BUILTIN_COMPARE)] #[assoc(num_arguments = OperationNumArguments::Binary)] @@ -215,11 +214,6 @@ pub enum OperationKind { #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] LanguageTag, - /// Lexical value - #[assoc(name = builtins::BUILTIN_STR)] - #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] - LexicalValue, /// Absolute value of a numeric value #[assoc(name = builtins::BUILTIN_ABS)] #[assoc(num_arguments = OperationNumArguments::Unary)] @@ -330,6 +324,11 @@ pub enum OperationKind { #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] StringConcatenation, + /// Lexical value + #[assoc(name = builtins::BUILTIN_STR)] + #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(is_boolean = false)] + LexicalValue, } impl OperationKind { @@ -350,3 +349,32 @@ impl Display for OperationKind { write!(f, "{}", self.name()) } } + +#[cfg(test)] +mod test { + use strum::IntoEnumIterator; + + use super::OperationKind; + + #[test] + fn operation_order() { + // For the parsing to work correctly, + // every entry in `OperationKind` + // must be arranged in such a way that no operation name + // is the prefix of a subsequent operation name + + let names = OperationKind::iter() + .map(|kind| kind.name()) + .collect::>(); + + for (name_index, name) in names.iter().enumerate() { + if name_index == names.len() - 1 { + break; + } + + assert!(names[(name_index + 1)..] + .iter() + .all(|remaining| !remaining.starts_with(name))) + } + } +} diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index bc17f0d36..f23c269a9 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -62,250 +62,250 @@ impl ProgramBuilder { } } -impl Program { - /// Build a [Program] from an [ast::program::Program]. - pub fn from_ast(ast_program: ast::program::Program) -> Self { - let mut program = Program::default(); - - for (_statement_index, statement) in ast_program.statements.iter().enumerate() { - match statement { - ast::statement::Statement::Directive(directive) => { - program.ast_build_directive(directive); - } - ast::statement::Statement::Fact { - span: _span, - doc_comment: _doc_comment, - fact: _atom, - dot: _dot, - } => todo!(), - ast::statement::Statement::Rule { head, body, .. } => { - program.ast_build_rule(head, body); - } - ast::statement::Statement::Comment(_) => todo!(), - ast::statement::Statement::Error(_) => todo!(), - } - } - - program - } - - fn ast_build_rule( - &mut self, - head: &ast::List, - body: &ast::List, - ) { - let origin = Origin::External(self.rules.len()); - - let mut rule_builder = RuleBuilder::default().origin(origin); - - // TODO: Implement a normal iterator to avoid cloning - for (head_index, head_atom) in head.clone().into_iter().enumerate() { - let origin = Origin::External(head_index); - if let Literal::Positive(atom) = Self::ast_build_literal(origin, &head_atom) { - rule_builder.add_head_atom_mut(atom); - } else { - unreachable!("head must only contain positive atoms") - } - } - - // TODO: Implement a normal iterator to avoid cloning - for (body_index, body_atom) in body.clone().into_iter().enumerate() { - let origin = Origin::External(body_index); - rule_builder.add_body_literal_mut(Self::ast_build_literal(origin, &body_atom)); - } - - self.rules.push(rule_builder.finalize()); - } - - fn ast_build_literal(origin: Origin, atom: &ast::atom::Atom) -> Literal { - match atom { - ast::atom::Atom::Positive(positive_atom) => { - Literal::Positive(Self::ast_build_atom(origin, positive_atom)) - } - ast::atom::Atom::Negative { - atom: negative_atom, - .. - } => Literal::Negative(Self::ast_build_atom(origin, negative_atom)), - ast::atom::Atom::InfixAtom { - lhs, - operation, - rhs, - .. - } => { - let left = Self::ast_build_inner_term(Origin::External(0), lhs); - let right = Self::ast_build_inner_term(Origin::External(1), rhs); - - Literal::Operation( - Operation::new_from_name(&operation.to_string(), vec![left, right]) - .expect("unkown infix operation"), - ) - } - ast::atom::Atom::Map(_) => { - // Return unsupported error - todo!() - } - } - } - - fn ast_build_atom(origin: Origin, atom: &ast::named_tuple::NamedTuple) -> Atom { - let predicate_name = atom.identifier.to_string(); - let subterms = match &atom.tuple.terms { - Some(terms) => terms.to_item_vec(), - None => vec![], - }; - - let mut translated_subterms = Vec::new(); - - for (term_index, subterm) in subterms.into_iter().enumerate() { - let origin = Origin::External(term_index); - translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); - } - - Atom::new(&predicate_name, translated_subterms).set_origin(origin) - } - - fn ast_build_inner_term(origin: Origin, term: &ast::term::Term) -> Term { - match term { - ast::term::Term::Primitive(primitive) => Self::ast_build_primitive(origin, primitive), - ast::term::Term::UniversalVariable(name) => Term::universal_variable(&name.to_string()), - ast::term::Term::ExistentialVariable(name) => { - Term::existential_variable(&name.to_string()) - } - ast::term::Term::Binary { - lhs, - operation, - rhs, - .. - } => { - let left = Self::ast_build_inner_term(Origin::External(0), lhs); - let right = Self::ast_build_inner_term(Origin::External(1), rhs); - - Term::Operation( - Operation::new_from_name(&operation.to_string(), vec![left, right]) - .expect("unrecognized binary operation"), - ) - } - ast::term::Term::Aggregation { - operation: _, - terms: _, - .. - } => { - todo!() - } - ast::term::Term::Tuple(tuple) => Self::ast_build_inner_tuple(origin, tuple), - ast::term::Term::NamedTuple(named_tuple) => { - Self::ast_build_inner_named_tuple(origin, named_tuple) - } - ast::term::Term::Map(_) => todo!(), - ast::term::Term::Blank(_) => todo!(), - } - .set_origin(origin) - } - - fn ast_build_primitive(origin: Origin, primitive: &ast::term::Primitive) -> Term { - match primitive { - ast::term::Primitive::Constant(value) => { - Term::ground(AnyDataValue::new_iri(value.to_string())) - } - ast::term::Primitive::PrefixedConstant { - span: _, - prefix: _, - colon: _, - constant: _, - } => todo!(), - ast::term::Primitive::Number { - span: _, - sign: _, - before: _, - dot: _, - after: _, - exponent: _, - } => todo!(), - ast::term::Primitive::String(string) => { - Term::ground(AnyDataValue::new_plain_string(string.to_string())) - } - ast::term::Primitive::Iri(iri) => Term::ground(AnyDataValue::new_iri(iri.to_string())), - ast::term::Primitive::RdfLiteral { string, iri, .. } => { - Term::ground(AnyDataValue::new_other(string.to_string(), iri.to_string())) - } - } - .set_origin(origin) - } - - fn ast_build_inner_tuple(_origin: Origin, tuple: &ast::tuple::Tuple) -> Term { - let subterms = match &tuple.terms { - Some(terms) => terms.to_item_vec(), - None => vec![], - }; - - let mut translated_subterms = Vec::new(); - - for (term_index, subterm) in subterms.into_iter().enumerate() { - let origin = Origin::External(term_index); - translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); - } - - Term::Tuple(Tuple::new(translated_subterms)) - } - - fn ast_build_inner_named_tuple( - _origin: Origin, - named_tuple: &ast::named_tuple::NamedTuple, - ) -> Term { - let subterms = match &named_tuple.tuple.terms { - Some(terms) => terms.to_item_vec(), - None => vec![], - }; - - let mut translated_subterms = Vec::new(); - - for (term_index, subterm) in subterms.into_iter().enumerate() { - let origin = Origin::External(term_index); - translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); - } - - let name = &named_tuple.identifier.to_string(); - match OperationKind::from_name(name) { - Some(kind) => Term::Operation(Operation::new(kind, translated_subterms)), - None => Term::FunctionTerm(FunctionTerm::new(name, translated_subterms)), - } - } - - fn ast_build_directive(&mut self, directive: &ast::directive::Directive) { - match directive { - ast::directive::Directive::Base { base_iri: _, .. } => { - // self.base = Some(Base::new(base_iri.to_string())); - // TODO: Set origin - } - ast::directive::Directive::Prefix { - span: _, - doc_comment: _, - prefix: _, - prefix_iri: _, - dot: _, - } => todo!(), - ast::directive::Directive::Import { - span: _, - doc_comment: _, - predicate: _, - arrow: _, - map: _, - dot: _, - } => todo!(), - ast::directive::Directive::Export { - span: _, - doc_comment: _, - predicate: _, - arrow: _, - map: _, - dot: _, - } => todo!(), - ast::directive::Directive::Output { - span: _, - doc_comment: _, - predicates: _, - dot: _, - } => todo!(), - } - } -} +// impl Program { +// /// Build a [Program] from an [ast::program::Program]. +// pub fn from_ast(ast_program: ast::program::Program) -> Self { +// let mut program = Program::default(); + +// for (_statement_index, statement) in ast_program.statements.iter().enumerate() { +// match statement { +// ast::statement::Statement::Directive(directive) => { +// program.ast_build_directive(directive); +// } +// ast::statement::Statement::Fact { +// span: _span, +// doc_comment: _doc_comment, +// fact: _atom, +// dot: _dot, +// } => todo!(), +// ast::statement::Statement::Rule { head, body, .. } => { +// program.ast_build_rule(head, body); +// } +// ast::statement::Statement::Comment(_) => todo!(), +// ast::statement::Statement::Error(_) => todo!(), +// } +// } + +// program +// } + +// fn ast_build_rule( +// &mut self, +// head: &ast::List, +// body: &ast::List, +// ) { +// let origin = Origin::External(self.rules.len()); + +// let mut rule_builder = RuleBuilder::default().origin(origin); + +// // TODO: Implement a normal iterator to avoid cloning +// for (head_index, head_atom) in head.clone().into_iter().enumerate() { +// let origin = Origin::External(head_index); +// if let Literal::Positive(atom) = Self::ast_build_literal(origin, &head_atom) { +// rule_builder.add_head_atom_mut(atom); +// } else { +// unreachable!("head must only contain positive atoms") +// } +// } + +// // TODO: Implement a normal iterator to avoid cloning +// for (body_index, body_atom) in body.clone().into_iter().enumerate() { +// let origin = Origin::External(body_index); +// rule_builder.add_body_literal_mut(Self::ast_build_literal(origin, &body_atom)); +// } + +// self.rules.push(rule_builder.finalize()); +// } + +// fn ast_build_literal(origin: Origin, atom: &ast::atom::Atom) -> Literal { +// match atom { +// ast::atom::Atom::Positive(positive_atom) => { +// Literal::Positive(Self::ast_build_atom(origin, positive_atom)) +// } +// ast::atom::Atom::Negative { +// atom: negative_atom, +// .. +// } => Literal::Negative(Self::ast_build_atom(origin, negative_atom)), +// ast::atom::Atom::InfixAtom { +// lhs, +// operation, +// rhs, +// .. +// } => { +// let left = Self::ast_build_inner_term(Origin::External(0), lhs); +// let right = Self::ast_build_inner_term(Origin::External(1), rhs); + +// Literal::Operation( +// Operation::new_from_name(&operation.to_string(), vec![left, right]) +// .expect("unkown infix operation"), +// ) +// } +// ast::atom::Atom::Map(_) => { +// // Return unsupported error +// todo!() +// } +// } +// } + +// fn ast_build_atom(origin: Origin, atom: &ast::named_tuple::NamedTuple) -> Atom { +// let predicate_name = atom.identifier.to_string(); +// let subterms = match &atom.tuple.terms { +// Some(terms) => terms.to_item_vec(), +// None => vec![], +// }; + +// let mut translated_subterms = Vec::new(); + +// for (term_index, subterm) in subterms.into_iter().enumerate() { +// let origin = Origin::External(term_index); +// translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); +// } + +// Atom::new(&predicate_name, translated_subterms).set_origin(origin) +// } + +// fn ast_build_inner_term(origin: Origin, term: &ast::term::Term) -> Term { +// match term { +// ast::term::Term::Primitive(primitive) => Self::ast_build_primitive(origin, primitive), +// ast::term::Term::UniversalVariable(name) => Term::universal_variable(&name.to_string()), +// ast::term::Term::ExistentialVariable(name) => { +// Term::existential_variable(&name.to_string()) +// } +// ast::term::Term::Binary { +// lhs, +// operation, +// rhs, +// .. +// } => { +// let left = Self::ast_build_inner_term(Origin::External(0), lhs); +// let right = Self::ast_build_inner_term(Origin::External(1), rhs); + +// Term::Operation( +// Operation::new_from_name(&operation.to_string(), vec![left, right]) +// .expect("unrecognized binary operation"), +// ) +// } +// ast::term::Term::Aggregation { +// operation: _, +// terms: _, +// .. +// } => { +// todo!() +// } +// ast::term::Term::Tuple(tuple) => Self::ast_build_inner_tuple(origin, tuple), +// ast::term::Term::NamedTuple(named_tuple) => { +// Self::ast_build_inner_named_tuple(origin, named_tuple) +// } +// ast::term::Term::Map(_) => todo!(), +// ast::term::Term::Blank(_) => todo!(), +// } +// .set_origin(origin) +// } + +// fn ast_build_primitive(origin: Origin, primitive: &ast::term::Primitive) -> Term { +// match primitive { +// ast::term::Primitive::Constant(value) => { +// Term::ground(AnyDataValue::new_iri(value.to_string())) +// } +// ast::term::Primitive::PrefixedConstant { +// span: _, +// prefix: _, +// colon: _, +// constant: _, +// } => todo!(), +// ast::term::Primitive::Number { +// span: _, +// sign: _, +// before: _, +// dot: _, +// after: _, +// exponent: _, +// } => todo!(), +// ast::term::Primitive::String(string) => { +// Term::ground(AnyDataValue::new_plain_string(string.to_string())) +// } +// ast::term::Primitive::Iri(iri) => Term::ground(AnyDataValue::new_iri(iri.to_string())), +// ast::term::Primitive::RdfLiteral { string, iri, .. } => { +// Term::ground(AnyDataValue::new_other(string.to_string(), iri.to_string())) +// } +// } +// .set_origin(origin) +// } + +// fn ast_build_inner_tuple(_origin: Origin, tuple: &ast::tuple::Tuple) -> Term { +// let subterms = match &tuple.terms { +// Some(terms) => terms.to_item_vec(), +// None => vec![], +// }; + +// let mut translated_subterms = Vec::new(); + +// for (term_index, subterm) in subterms.into_iter().enumerate() { +// let origin = Origin::External(term_index); +// translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); +// } + +// Term::Tuple(Tuple::new(translated_subterms)) +// } + +// fn ast_build_inner_named_tuple( +// _origin: Origin, +// named_tuple: &ast::named_tuple::NamedTuple, +// ) -> Term { +// let subterms = match &named_tuple.tuple.terms { +// Some(terms) => terms.to_item_vec(), +// None => vec![], +// }; + +// let mut translated_subterms = Vec::new(); + +// for (term_index, subterm) in subterms.into_iter().enumerate() { +// let origin = Origin::External(term_index); +// translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); +// } + +// let name = &named_tuple.identifier.to_string(); +// match OperationKind::from_name(name) { +// Some(kind) => Term::Operation(Operation::new(kind, translated_subterms)), +// None => Term::FunctionTerm(FunctionTerm::new(name, translated_subterms)), +// } +// } + +// fn ast_build_directive(&mut self, directive: &ast::directive::Directive) { +// match directive { +// ast::directive::Directive::Base { base_iri: _, .. } => { +// // self.base = Some(Base::new(base_iri.to_string())); +// // TODO: Set origin +// } +// ast::directive::Directive::Prefix { +// span: _, +// doc_comment: _, +// prefix: _, +// prefix_iri: _, +// dot: _, +// } => todo!(), +// ast::directive::Directive::Import { +// span: _, +// doc_comment: _, +// predicate: _, +// arrow: _, +// map: _, +// dot: _, +// } => todo!(), +// ast::directive::Directive::Export { +// span: _, +// doc_comment: _, +// predicate: _, +// arrow: _, +// map: _, +// dot: _, +// } => todo!(), +// ast::directive::Directive::Output { +// span: _, +// doc_comment: _, +// predicates: _, +// dot: _, +// } => todo!(), +// } +// } +// } diff --git a/nemo/src/rule_model/syntax.rs b/nemo/src/rule_model/syntax.rs index af21b4583..c6a0042ee 100644 --- a/nemo/src/rule_model/syntax.rs +++ b/nemo/src/rule_model/syntax.rs @@ -4,4 +4,5 @@ pub(crate) mod aggregates; pub(crate) mod builtins; +pub(crate) mod datatypes; pub(crate) mod import_export; diff --git a/nemo/src/rule_model/syntax/builtins.rs b/nemo/src/rule_model/syntax/builtins.rs index 4389690c2..3ee8e1907 100644 --- a/nemo/src/rule_model/syntax/builtins.rs +++ b/nemo/src/rule_model/syntax/builtins.rs @@ -11,7 +11,7 @@ pub(crate) const BUILTIN_GREATEREQ: &str = "GREATEREQ"; /// Check if a numeric value is smaller than another pub(crate) const BUILTIN_LESS: &str = "LESS"; /// Check if a numeric value is smaller or equal to another -pub(crate) const BUILTIN_LESSQ: &str = "LESSEQ"; +pub(crate) const BUILTIN_LESSEQ: &str = "LESSEQ"; /// Check if value is an integer pub(crate) const BUILTIN_IS_INTEGER: &str = "isInteger"; /// Check if value is a 32bit floating point number diff --git a/nemo/src/rule_model/syntax/datatypes.rs b/nemo/src/rule_model/syntax/datatypes.rs new file mode 100644 index 000000000..dbe02911c --- /dev/null +++ b/nemo/src/rule_model/syntax/datatypes.rs @@ -0,0 +1,12 @@ +//! This module defines constants relating to the data types recognized by nemo. + +/// Can represent values of any type +pub(crate) const DATATYPE_ANY: &str = "any"; +/// Represents string values +pub(crate) const DATATYPE_STRING: &str = "string"; +/// Represents 64bit integer values +pub(crate) const DATATYPE_INT: &str = "int"; +/// Represents 64bit floating-point values +pub(crate) const DATATYPE_DOUBLE: &str = "double"; +/// Represents 32bit floating-point values +pub(crate) const DATATYPE_FLOAT: &str = "float"; diff --git a/nemo/src/rule_model/syntax/import_export.rs b/nemo/src/rule_model/syntax/import_export.rs index 0c062e31d..61c4f4440 100644 --- a/nemo/src/rule_model/syntax/import_export.rs +++ b/nemo/src/rule_model/syntax/import_export.rs @@ -3,4 +3,3 @@ pub(crate) mod attributes; pub(crate) mod compression; pub(crate) mod file_formats; -pub(crate) mod value_formats; diff --git a/nemo/src/rule_model/syntax/import_export/value_formats.rs b/nemo/src/rule_model/syntax/import_export/value_formats.rs deleted file mode 100644 index 2542366e8..000000000 --- a/nemo/src/rule_model/syntax/import_export/value_formats.rs +++ /dev/null @@ -1,22 +0,0 @@ -//! This module defines constants relating to the value formats accepted by import and export directives. - -/// The name of the general, best-effort value format. Importers/exporters suporting this format will usually -/// accept "any" input value and interpret it in the most natural way. Likewise, any value should be writable -/// in this format. -pub(crate) const VALUE_FORMAT_ANY: &str = "any"; -/// The name of the value format that interprets all values as plain strings. Importers/exporters suporting this -/// format will usually accept any input value and interpret it as strings in the most literal way. Only strings -/// can be written in this format. -pub(crate) const VALUE_FORMAT_STRING: &str = "string"; -/// The name of the value format that interprets values as integers whenever possible. Importers/exporters suporting -/// this format will usually only accept input values that are formatted like integers. Conversely, only integer values -/// can be written in this format. -pub(crate) const VALUE_FORMAT_INT: &str = "int"; -/// The name of the value format that interprets values as double-precision floating point numbers whenever possible. -/// Importers/exporters suporting this format will usually only accept input values that are formatted like decimal numbers, -/// integers, or floating-point numbers in scientific notation. Conversely, only double values -/// can be written in this format. -pub(crate) const VALUE_FORMAT_DOUBLE: &str = "double"; -/// The name of the special value format that indicates that a vlaue should be ignored altogether. -/// The respective column/parameter will be skiped in reading/writing. -pub(crate) const VALUE_FORMAT_SKIP: &str = "skip"; diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index caf8149b3..ec3522f0e 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -124,33 +124,35 @@ impl<'a> ASTProgramTranslation<'a> { ) -> Result> { let mut program_builder = ProgramBuilder::default(); - for (statement_index, rule) in vec![ast.statements()].into_iter().enumerate() { - let origin = Origin::External(statement_index); - - match self.build_rule(origin, rule) { - Ok(new_rule) => program_builder.add_rule(new_rule), - Err(translation_error) => self - .errors - .push(ProgramError::TranslationError(translation_error)), - } - } - - self.errors.extend( - self.validation_error_builder - .finalize() - .into_iter() - .map(ProgramError::ValidationError), - ); - - if self.errors.is_empty() { - Ok(program_builder.finalize()) - } else { - Err(ProgramErrorReport { - input: self.input, - label: self.input_label, - errors: self.errors, - }) - } + // for (statement_index, rule) in vec![ast.statements()].into_iter().enumerate() { + // let origin = Origin::External(statement_index); + + // match self.build_rule(origin, rule) { + // Ok(new_rule) => program_builder.add_rule(new_rule), + // Err(translation_error) => self + // .errors + // .push(ProgramError::TranslationError(translation_error)), + // } + // } + + // self.errors.extend( + // self.validation_error_builder + // .finalize() + // .into_iter() + // .map(ProgramError::ValidationError), + // ); + + // if self.errors.is_empty() { + // Ok(program_builder.finalize()) + // } else { + // Err(ProgramErrorReport { + // input: self.input, + // label: self.input_label, + // errors: self.errors, + // }) + // } + + todo!() } fn build_rule( @@ -246,6 +248,7 @@ impl<'a> ASTProgramTranslation<'a> { expression: &ast::expression::Expression, ) -> Result { Ok(match expression { + ast::expression::Expression::Arithmetic(_) => todo!(), ast::expression::Expression::Atom(atom) => todo!(), ast::expression::Expression::Blank(blank) => todo!(), ast::expression::Expression::Boolean(boolean) => todo!(), @@ -289,13 +292,21 @@ impl<'a> ASTProgramTranslation<'a> { } } }, + ast::expression::Expression::Aggregation(_) => todo!(), + ast::expression::Expression::Infix(_) => todo!(), + ast::expression::Expression::Map(_) => todo!(), + ast::expression::Expression::Negation(_) => todo!(), + ast::expression::Expression::Operation(_) => todo!(), } .set_origin(origin)) } - fn resolve_tag(&self, tag: &ast::tag::Tag<'a>) -> Result { - Ok(match tag { - ast::tag::Tag::Plain(token) => { + fn resolve_tag( + &self, + tag: &ast::tag::structure::StructureTag<'a>, + ) -> Result { + Ok(match tag.kind() { + ast::tag::structure::StructureTagKind::Plain(token) => { let token_string = token.to_string(); if let Some(base) = &self.base { @@ -304,7 +315,7 @@ impl<'a> ASTProgramTranslation<'a> { token_string } } - ast::tag::Tag::Prefixed { prefix, tag } => { + ast::tag::structure::StructureTagKind::Prefixed { prefix, tag } => { if let Some(expanded_prefix) = self.prefix_mapping.get(&prefix.to_string()) { format!("{expanded_prefix}{}", tag.to_string()) } else { @@ -315,7 +326,7 @@ impl<'a> ASTProgramTranslation<'a> { )); } } - ast::tag::Tag::Iri(iri) => iri.content(), + ast::tag::structure::StructureTagKind::Iri(iri) => iri.content(), }) } } From a38574f7dbac4fda0f2dd028be894894a874e897 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Sun, 21 Jul 2024 20:02:53 +0200 Subject: [PATCH 131/214] Reimplement error recovery (WIP) --- nemo-cli/src/example.rls | 1 - nemo-cli/src/main.rs | 3 + nemo/src/parser/ast/attribute.rs | 4 +- nemo/src/parser/ast/directive.rs | 43 +++++++- nemo/src/parser/ast/directive/unknown.rs | 50 ++++++++- nemo/src/parser/ast/program.rs | 41 +++++-- nemo/src/parser/ast/token.rs | 43 ++++++-- nemo/src/parser/error.rs | 130 ++++++++++++++++++----- nemo/src/parser/span.rs | 2 +- 9 files changed, 265 insertions(+), 52 deletions(-) delete mode 100644 nemo-cli/src/example.rls diff --git a/nemo-cli/src/example.rls b/nemo-cli/src/example.rls deleted file mode 100644 index bd9c18b8d..000000000 --- a/nemo-cli/src/example.rls +++ /dev/null @@ -1 +0,0 @@ -abc(?x, ?test, ?testt) :- p(?x, ?test) . \ No newline at end of file diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 8cd65d6ab..473a473c9 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -179,6 +179,9 @@ fn run(mut cli: CliApp) -> Result<(), Error> { } }; + println!("Parsing successful"); + std::process::exit(0); + let program = match rule_model::translation::ASTProgramTranslation::initialize( &rules_content, rules.to_string_lossy().to_string(), diff --git a/nemo/src/parser/ast/attribute.rs b/nemo/src/parser/ast/attribute.rs index 50799b8df..2083fc310 100644 --- a/nemo/src/parser/ast/attribute.rs +++ b/nemo/src/parser/ast/attribute.rs @@ -1,7 +1,7 @@ //! This module defines [Attribute]. use nom::{ - character::complete::newline, + character::complete::line_ending, sequence::{delimited, pair, terminated, tuple}, }; @@ -56,7 +56,7 @@ impl<'a> ProgramAST<'a> for Attribute<'a> { Atom::parse, pair(WSoC::parse, Token::closed_bracket), ), - newline, + line_ending, ), )(input) .map(|(rest, content)| { diff --git a/nemo/src/parser/ast/directive.rs b/nemo/src/parser/ast/directive.rs index 7b49decc9..cab1debe7 100644 --- a/nemo/src/parser/ast/directive.rs +++ b/nemo/src/parser/ast/directive.rs @@ -1,12 +1,15 @@ //! This module defines [Directive]s. +#![allow(missing_docs)] use base::Base; use declare::Declare; +use enum_assoc::Assoc; use export::Export; use import::Import; use nom::{branch::alt, combinator::map}; use output::Output; use prefix::Prefix; +use strum_macros::EnumIter; use unknown::UnknownDirective; use crate::parser::{ @@ -16,7 +19,7 @@ use crate::parser::{ ParserResult, }; -use super::ProgramAST; +use super::{token::TokenKind, ProgramAST}; pub mod base; pub mod declare; @@ -26,22 +29,56 @@ pub mod output; pub mod prefix; pub mod unknown; -/// Type of directives -#[derive(Debug)] +/// Types of directives +#[derive(Debug, Assoc, EnumIter, Clone, Copy, PartialEq, Eq)] +#[func(pub fn token(&self) -> Option)] +pub enum DirectiveKind { + /// Base + #[assoc(token = TokenKind::BaseDirective)] + Base, + /// Declare + #[assoc(token = TokenKind::DeclareDirective)] + Declare, + /// Export + #[assoc(token = TokenKind::ExportDirective)] + Export, + /// Import + #[assoc(token = TokenKind::ImportDirective)] + Import, + /// Output + #[assoc(token = TokenKind::OutputDirective)] + Output, + /// Prefix + #[assoc(token = TokenKind::PrefixDirective)] + Prefix, + /// Unknown + Unknown, +} + +/// Directive +#[derive(Assoc, Debug)] +#[func(pub fn kind(&self) -> DirectiveKind)] pub enum Directive<'a> { /// Base + #[assoc(kind = DirectiveKind::Base)] Base(Base<'a>), /// Declare + #[assoc(kind = DirectiveKind::Declare)] Declare(Declare<'a>), /// Export + #[assoc(kind = DirectiveKind::Export)] Export(Export<'a>), /// Import + #[assoc(kind = DirectiveKind::Import)] Import(Import<'a>), /// Output + #[assoc(kind = DirectiveKind::Output)] Output(Output<'a>), /// Prefix + #[assoc(kind = DirectiveKind::Prefix)] Prefix(Prefix<'a>), /// Unknown + #[assoc(kind = DirectiveKind::Base)] Unknown(UnknownDirective<'a>), } diff --git a/nemo/src/parser/ast/directive/unknown.rs b/nemo/src/parser/ast/directive/unknown.rs index d3a7d1991..f8f58b3a3 100644 --- a/nemo/src/parser/ast/directive/unknown.rs +++ b/nemo/src/parser/ast/directive/unknown.rs @@ -5,15 +5,20 @@ use nom::{ combinator::recognize, sequence::{pair, preceded, separated_pair}, }; +use nom_supreme::error::{BaseErrorKind, Expectation}; +use strum::IntoEnumIterator; use crate::parser::{ ast::{comment::wsoc::WSoC, token::Token, ProgramAST}, context::{context, ParserContext}, + error::ParserErrorTree, input::ParserInput, span::ProgramSpan, ParserResult, }; +use super::DirectiveKind; + /// Unknown directive specified by a user #[derive(Debug)] pub struct UnknownDirective<'a> { @@ -36,6 +41,33 @@ impl<'a> UnknownDirective<'a> { pub fn content(&self) -> String { self.content.0.to_string() } + + /// Parse the name of the directive. + pub fn parse_unknown(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + let keyword_parser = |input: ParserInput<'a>| { + if let Ok((rest, matched)) = Token::name(input.clone()) { + let mut is_known = false; + + for directive in DirectiveKind::iter().map(|kind| kind.token()).flatten() { + if matched.to_string() == directive.name() { + is_known = true; + break; + } + } + + if !is_known { + return Ok((rest, matched)); + } + } + + Err(nom::Err::Error(ParserErrorTree::Base { + location: input, + kind: BaseErrorKind::Expected(Expectation::Tag("known directive")), + })) + }; + + keyword_parser(input) + } } const CONTEXT: ParserContext = ParserContext::UnknownDirective; @@ -58,7 +90,7 @@ impl<'a> ProgramAST<'a> for UnknownDirective<'a> { context( CONTEXT, separated_pair( - preceded(Token::at, Token::name), + preceded(Token::at, Self::parse_unknown), pair(WSoC::parse_whitespace_comment, WSoC::parse), recognize(is_not(".")), ), @@ -108,5 +140,21 @@ mod test { let result = result.unwrap(); assert_eq!(expected, (result.1.name(), result.1.content())); } + + let known_directives = vec![ + "@base something", + "@declare something", + "@import something", + "@export something", + "@prefix something", + "@output something", + ]; + + for input in known_directives { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(UnknownDirective::parse)(parser_input); + + assert!(result.is_err()); + } } } diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index ec2d46f27..150e6a6a3 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -8,6 +8,7 @@ use nom::{ use crate::parser::{ context::{context, ParserContext}, + error::{recover, report_error}, input::ParserInput, span::ProgramSpan, ParserResult, @@ -78,14 +79,10 @@ impl<'a> ProgramAST<'a> for Program<'a> { opt(TopLevelComment::parse), delimited( WSoC::parse, - separated_list0(WSoC::parse, Statement::parse), + separated_list0(WSoC::parse, recover(report_error(Statement::parse))), WSoC::parse, ), - ), // pair( - // TopLevelComment::parse, - // WSoC::parse, - // // terminated(many0(preceded(WSoC::parse, Statement::parse)), WSoC::parse), - // ), + ), )(input) .map(|(rest, (comment, statements))| { let rest_span = rest.span; @@ -95,7 +92,7 @@ impl<'a> ProgramAST<'a> for Program<'a> { Self { span: input_span.until_rest(&rest_span), comment, - statements, + statements: statements.into_iter().flatten().collect(), }, ) }) @@ -138,4 +135,34 @@ mod test { assert!(result.1.comment().is_some()); assert_eq!(result.1.statements.len(), 4); } + + #[test] + fn parser_recover() { + let program = "//! Top-level comment\n\ + // Declarations:\n\ + @declare oops a(_: int, _: int) .\n\ + @declare b(_: int, _: int) .\n\ + /// A fact\n\ + a(1, 2) \n\ + \n\ + // Rules:\n\ + \n\ + /// A rule\n\ + b(?y, ?x) <- a(?x, ?y) .\n\ + \n\ + c(?y, ?x) :- a(?x, ?y) .\n\ + // Some more comments + "; + + let parser_input = ParserInput::new(program, ParserState::default()); + let result = Program::parse(parser_input.clone()) + .expect("This should not fail") + .1; + + println!("{:?}", result.statements); + + assert!(result.comment.is_some()); + assert_eq!(result.statements.len(), 2); + // assert_eq!(parser_input.state.errors.borrow().len(), 3); + } } diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index e0447680b..4598ad251 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -187,9 +187,9 @@ pub enum TokenKind { /// White spaces #[assoc(name = "whitespace")] Whitespace, - /// End of file - #[assoc(name = "end-of-file")] - EndOfFile, + /// Double new line + #[assoc(name = "double newline")] + DoubleNewline, /// Token that captures errors #[assoc(name = "error")] Error, @@ -241,7 +241,7 @@ impl<'a> Token<'a> { } /// Parse [TokenKind::Name]. - pub fn name(input: ParserInput<'a>) -> ParserResult<'a, Token> { + pub fn name(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { context( ParserContext::token(TokenKind::Name), recognize(pair( @@ -261,7 +261,7 @@ impl<'a> Token<'a> { } /// Parse [TokenKind::Iri]. - pub fn iri(input: ParserInput<'a>) -> ParserResult<'a, Token> { + pub fn iri(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { is_not("> \n")(input).map(|(rest, result)| { ( rest, @@ -274,7 +274,7 @@ impl<'a> Token<'a> { } /// Parse [TokenKind::String]. - pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token> { + pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { is_not("\"")(input).map(|(rest, result)| { ( rest, @@ -287,7 +287,7 @@ impl<'a> Token<'a> { } /// Parse [TokenKind::Digits]. - pub fn digits(input: ParserInput<'a>) -> ParserResult<'a, Token> { + pub fn digits(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { context(ParserContext::token(TokenKind::Digits), digit1)(input).map( |(rest_input, result)| { ( @@ -302,20 +302,45 @@ impl<'a> Token<'a> { } /// Parse [TokenKind::Whitespace]. - pub fn whitespace(input: ParserInput<'a>) -> ParserResult<'a, Token> { + pub fn whitespace(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { context(ParserContext::token(TokenKind::Whitespace), multispace1)(input).map( |(rest_input, result)| { ( rest_input, Token { span: result.span, - kind: TokenKind::Digits, + kind: TokenKind::Whitespace, }, ) }, ) } + /// Parse [TokenKind::DoubleNewline]. + pub fn double_newline(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context( + ParserContext::token(TokenKind::DoubleNewline), + alt((tag("\n\n"), tag("\r\n\r\n"), tag("\r\r"))), + )(input) + .map(|(rest_input, result)| { + ( + rest_input, + Token { + span: result.span, + kind: TokenKind::DoubleNewline, + }, + ) + }) + } + + /// Create [TokenKind::Error]. + pub fn error(span: ProgramSpan<'a>) -> Token<'a> { + Token { + span, + kind: TokenKind::Error, + } + } + string_token!(open_parenthesis, TokenKind::OpenParenthesis); string_token!(closed_parenthesis, TokenKind::ClosedParenthesis); string_token!(open_brace, TokenKind::OpenBrace); diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index d446da394..9f032d315 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -1,9 +1,21 @@ //! This module defines the error type that is returned when the parser is unsuccessful. -use nom::Parser; +use nom::{ + branch::alt, + bytes::complete::{take_until, take_while}, + character::complete::line_ending, + combinator::map, + sequence::{preceded, terminated}, + Parser, +}; use nom_supreme::error::{GenericErrorTree, StackContext}; -use super::{context::ParserContext, span::CharacterPosition, ParserInput, ParserResult}; +use super::{ + ast::{statement::Statement, token::Token}, + context::ParserContext, + span::CharacterPosition, + ParserInput, ParserResult, +}; /// Error tree used by nom parser pub type ParserErrorTree<'a> = GenericErrorTree< @@ -22,32 +34,67 @@ pub(crate) struct ParserError { pub(crate) context: Vec, } -// fn recover<'a, E>( -// mut parser: impl Parser, Statement<'a>, E>, -// error_msg: impl ToString, -// context: Context, -// _errors: ParserState<'s>, -// ) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { -// move |input: Input<'a, 's>| match parser.parse(input) { -// Ok(result) => Ok(result), -// Err(err) if input.input.is_empty() => Err(err), -// Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { -// let _err = Error { -// pos: Position { -// offset: input.input.location_offset(), -// line: input.input.location_line(), -// column: input.input.get_utf8_column() as u32, -// }, -// msg: error_msg.to_string(), -// context: vec![context], -// }; -// // errors.report_error(err); -// let (rest_input, span) = skip_to_statement_end::>>(input); -// Ok((rest_input, Statement::Error(span))) -// } -// Err(err) => Err(err), -// } -// } +/// Skip a statement, returning an error token. +pub(crate) fn skip_statement<'a>(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + let input_span = input.span; + + let until_double_newline = map( + alt(( + preceded(take_until("\n\n"), Token::double_newline), + preceded(take_until("\r\n\r\n"), Token::double_newline), + preceded(take_until("\r\r"), Token::double_newline), + )), + move |token| Token::error(input_span.enclose(&input_span, &token.span())), + ); + let until_dot_newline = map( + alt(( + preceded(take_until(".\n"), terminated(Token::dot, line_ending)), + preceded(take_until(".\r\n"), terminated(Token::dot, line_ending)), + preceded(take_until(".\r"), terminated(Token::dot, line_ending)), + )), + move |token| Token::error(input_span.enclose(&input_span, &token.span())), + ); + let until_eof = map(take_while(|_| true), move |_| Token::error(input_span)); + + alt((until_dot_newline, until_double_newline, until_eof))(input) +} + +pub(crate) fn recover<'a>( + mut parser: impl Parser, Statement<'a>, ParserErrorTree<'a>>, +) -> impl FnMut(ParserInput<'a>) -> ParserResult>> { + move |input: ParserInput<'a>| match parser.parse(input.clone()) { + Ok((rest, statement)) => Ok((rest, Some(statement))), + Err(err) if input.span.0.is_empty() => Err(err), + Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { + let (rest_input, _span) = skip_statement(input).expect("this parser cannot fail"); + Ok((rest_input, None)) + } + Err(err) => Err(err), + } +} + +pub(crate) fn report_error<'a>( + mut parser: impl Parser, Statement<'a>, ParserErrorTree<'a>>, +) -> impl FnMut(ParserInput<'a>) -> ParserResult> { + move |input| match parser.parse(input.clone()) { + Ok(result) => Ok(result), + Err(e) => { + if input.span.0.is_empty() { + return Err(e); + }; + match &e { + nom::Err::Incomplete(_) => (), + nom::Err::Error(err) | nom::Err::Failure(err) => { + let (_deepest_pos, errors) = get_deepest_errors(err); + for error in errors { + input.state.report_error(error); + } + } + }; + Err(e) + } + } +} /// Function to translate an [ParserErrorTree] returned by the nom parser /// into a [ParserError] that can be displayed to the user. @@ -168,3 +215,30 @@ fn get_deepest_errors<'a, 's>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, } } } + +#[cfg(test)] +mod test { + use crate::parser::{error::skip_statement, input::ParserInput, ParserState}; + + #[test] + fn skip_to_statement_end() { + let test = vec![ + ( + "some text ending in newline", + "some text ending in newline".to_string(), + ), + ("some text.\n More text", "some text.".to_string()), + ("some text\n\n More text", "some text\n\n".to_string()), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = skip_statement(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(expected, result.1.to_string()); + } + } +} diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index c0c7d3e44..6fee3eddf 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -128,7 +128,7 @@ impl<'a> ProgramSpan<'a> { )) } } - /// TODO: Description and Specify safety conditions + /// Create a [ProgramSpan] that encloses the given [ProgramSpan]s. /// TODO: Description and Specify safety conditions and verify that this is correct pub fn enclose(&self, first: &Self, second: &Self) -> Self { From 105479af349d8f29b0c828b867a540a94e607d0b Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 24 Jul 2024 00:33:25 +0200 Subject: [PATCH 132/214] Rework origin system --- nemo-cli/src/main.rs | 8 +- nemo-python/src/lib.rs | 14 +- nemo/src/parser.rs | 9 +- nemo/src/parser/ast.rs | 22 +- .../ast/expression/complex/operation.rs | 7 - nemo/src/parser/ast/program.rs | 9 +- nemo/src/parser/ast/statement.rs | 12 +- nemo/src/rule_model/components/rule.rs | 27 +- nemo/src/rule_model/error.rs | 285 ++++++++++++++---- nemo/src/rule_model/origin.rs | 4 +- nemo/src/rule_model/translation.rs | 221 ++++++-------- 11 files changed, 356 insertions(+), 262 deletions(-) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 473a473c9..54ee753fa 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -174,14 +174,11 @@ fn run(mut cli: CliApp) -> Result<(), Error> { { Ok(program) => program, Err(report) => { - report.eprint(report.build_reports(Color::Red))?; + report.eprint(report.build_reports())?; std::process::exit(1); } }; - println!("Parsing successful"); - std::process::exit(0); - let program = match rule_model::translation::ASTProgramTranslation::initialize( &rules_content, rules.to_string_lossy().to_string(), @@ -190,11 +187,12 @@ fn run(mut cli: CliApp) -> Result<(), Error> { { Ok(program) => program, Err(report) => { - report.eprint(report.build_reports(&program_ast, Color::Red))?; + report.eprint(report.build_reports().into_iter())?; std::process::exit(1); } }; + println!("Parsing successful"); std::process::exit(0); // let mut program = parse_program(rules_content)?; diff --git a/nemo-python/src/lib.rs b/nemo-python/src/lib.rs index 61adac8b2..66fa7ddb2 100644 --- a/nemo-python/src/lib.rs +++ b/nemo-python/src/lib.rs @@ -53,18 +53,16 @@ struct NemoProgram(nemo::model::Program); #[pyfunction] fn load_file(file: String) -> PyResult { let contents = read_to_string(file)?; - let ast = nemo::io::parser::parse_program_str(&contents).py_res()?; - let program = nemo::rule_model::program::Program::from_ast(ast); - let program = todo!("update NemoProgram to use the new rule model"); - Ok(NemoProgram(program)) + load_string(contents) } #[pyfunction] fn load_string(rules: String) -> PyResult { - let ast = nemo::io::parser::parse_program_str(&rules).py_res()?; - let program = nemo::rule_model::program::Program::from_ast(ast); - let program = todo!("update NemoProgram to use the new rule model"); - Ok(NemoProgram(program)) + // let ast = nemo::io::parser::parse_program_str(&rules).py_res()?; + // let program = nemo::rule_model::program::Program::from_ast(ast); + // let program = todo!("update NemoProgram to use the new rule model"); + // Ok(NemoProgram(program)) + todo!() } #[pymethods] diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index 51b2c9135..149fe4d90 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -47,7 +47,7 @@ pub struct Parser<'a> { /// Contains all errors that occurred during parsing #[derive(Debug)] pub struct ParserErrorReport<'a> { - /// Reference to the text that is going to be parser + /// Reference to the text that is going to be parsed input: &'a str, /// Label of the input text, usually a path of the input file label: String, @@ -72,10 +72,7 @@ impl<'a> ParserErrorReport<'a> { } /// Build a [Report] for each error. - pub fn build_reports( - &'a self, - color_error: Color, - ) -> impl Iterator)>> { + pub fn build_reports(&'a self) -> impl Iterator)>> { self.errors.iter().map(move |error| { let message = format!("expected `{}`", error.context[0].name()); @@ -84,7 +81,7 @@ impl<'a> ParserErrorReport<'a> { .with_label( Label::new((self.label.clone(), error.position.range())) .with_message(message) - .with_color(color_error), + .with_color(Color::Red), ) .finish() }) diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 4da79a46a..ddb1b32ca 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -11,12 +11,12 @@ pub mod statement; pub mod tag; pub mod token; -use crate::rule_model::origin::Origin; +use std::fmt::Debug; use super::{context::ParserContext, span::ProgramSpan, ParserInput, ParserResult}; /// Trait implemented by nodes in the abstract syntax tree -pub trait ProgramAST<'a>: Sync { +pub trait ProgramAST<'a>: Debug + Sync { /// Return all children of this node. fn children(&self) -> Vec<&dyn ProgramAST>; @@ -30,22 +30,4 @@ pub trait ProgramAST<'a>: Sync { /// Return [ParserContext] indicating the type of node. fn context(&self) -> ParserContext; - - /// Locate a node from a stack of [Origin]s. - fn locate(&'a self, origin_stack: &[Origin]) -> Option<&'a dyn ProgramAST<'a>> - where - Self: Sized + 'a, - { - let mut current_node: &dyn ProgramAST = self; - - for origin in origin_stack { - if let &Origin::External(index) = origin { - current_node = *current_node.children().get(index)?; - } else { - return None; - } - } - - Some(current_node) - } } diff --git a/nemo/src/parser/ast/expression/complex/operation.rs b/nemo/src/parser/ast/expression/complex/operation.rs index 1099e08b6..0203e27f4 100644 --- a/nemo/src/parser/ast/expression/complex/operation.rs +++ b/nemo/src/parser/ast/expression/complex/operation.rs @@ -113,13 +113,6 @@ mod test { #[test] fn parse_operation() { - println!( - "{:?}", - OperationKind::iter() - .map(|kind| kind.name()) - .collect::>() - ); - let test = vec![ ("SUM(1)", OperationKind::NumericSum), ("strlen(1)", OperationKind::StringLength), diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 150e6a6a3..3da33b2a4 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -2,8 +2,8 @@ use nom::{ combinator::opt, - multi::separated_list0, - sequence::{delimited, pair}, + multi::many0, + sequence::{delimited, pair, preceded}, }; use crate::parser::{ @@ -79,7 +79,10 @@ impl<'a> ProgramAST<'a> for Program<'a> { opt(TopLevelComment::parse), delimited( WSoC::parse, - separated_list0(WSoC::parse, recover(report_error(Statement::parse))), + many0(preceded( + WSoC::parse, + recover(report_error(Statement::parse)), + )), WSoC::parse, ), ), diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs index af40c1574..f917619c1 100644 --- a/nemo/src/parser/ast/statement.rs +++ b/nemo/src/parser/ast/statement.rs @@ -63,7 +63,7 @@ pub struct Statement<'a> { /// Doc comment associated with this statement comment: Option>, /// The statement - statement: StatementKind<'a>, + kind: StatementKind<'a>, } impl<'a> Statement<'a> { @@ -74,8 +74,8 @@ impl<'a> Statement<'a> { } /// Return the [StatementKind]. - pub fn statement(&self) -> &StatementKind<'a> { - &self.statement + pub fn kind(&self) -> &StatementKind<'a> { + &self.kind } } @@ -83,7 +83,7 @@ const CONTEXT: ParserContext = ParserContext::Statement; impl<'a> ProgramAST<'a> for Statement<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { - vec![match &self.statement { + vec![match &self.kind { StatementKind::Fact(statement) => statement, StatementKind::Rule(statement) => statement, StatementKind::Directive(statement) => statement, @@ -119,7 +119,7 @@ impl<'a> ProgramAST<'a> for Statement<'a> { Self { span: input_span.until_rest(&rest_span), comment, - statement, + kind: statement, }, ) }) @@ -159,7 +159,7 @@ mod test { assert!(result.is_ok()); let result = result.unwrap(); - assert_eq!(result.1.statement.context(), expect); + assert_eq!(result.1.kind.context(), expect); } } } diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 546583ba4..89bfe0ad5 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -186,8 +186,6 @@ impl ProgramComponent for Rule { let safe_variables = self.safe_variables(); for atom in &self.head { - builder.push_origin(atom.origin().clone()); - for term in atom.subterms() { if let Term::Primitive(Primitive::Variable(head_variable)) = term { if !safe_variables.contains(head_variable) { @@ -195,7 +193,12 @@ impl ProgramComponent for Rule { .name() .expect("anonymous variables not allowed in the head"); - let hint = if let Some(closest_option) = find_best_similarity( + let info = builder.report_error( + head_variable.origin().clone(), + ValidationErrorKind::HeadUnsafe(head_variable.clone()), + ); + + if let Some(closest_option) = find_best_similarity( head_variable_name.clone(), &safe_variables .iter() @@ -206,27 +209,15 @@ impl ProgramComponent for Rule { && closest_option.0.len() > 2 && closest_option.1 > 0.75 { - vec![Hint::SimilarExists { + info.add_hint(Hint::SimilarExists { kind: "variable".to_string(), name: closest_option.0, - }] - } else { - vec![] + }); } - } else { - vec![] - }; - - builder.report_error( - head_variable.origin(), - ValidationErrorKind::HeadUnsafe(head_variable.clone()), - hint, - ); + } } } } - - builder.pop_origin(); } Ok(()) diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 1ab31f50a..e371732d9 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -5,28 +5,161 @@ pub mod hint; pub mod translation_error; pub mod validation_error; -use std::fmt::Display; +use std::{ + fmt::{Debug, Display}, + ops::Range, +}; +use ariadne::{Color, Label, ReportBuilder}; use hint::Hint; use translation_error::TranslationErrorKind; use validation_error::ValidationErrorKind; -use crate::parser::{ - ast::ProgramAST, - span::{CharacterRange, ProgramSpan}, -}; +use crate::parser::span::{CharacterRange, ProgramSpan}; use super::origin::Origin; +/// Types of [ComplexError] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ComplexErrorKind { + /// Error + Error, + /// Warning, + Warning, +} + +/// Types of [ComplexErrorLabel]s +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ComplexErrorLabelKind { + /// Error + Error, + /// Warning, + Warning, + /// Information + Information, +} + +/// Label of a [ComplexError] +#[derive(Debug)] +pub struct ComplexErrorLabel +where + Reference: Debug, +{ + /// Kind of label + kind: ComplexErrorLabelKind, + /// Reference to the source code + reference: Reference, + /// Message + message: String, +} + +/// Complex error that additional information to an error +#[derive(Debug)] +pub struct ComplexError +where + Reference: Debug, +{ + /// Type of error + pub kind: ComplexErrorKind, + /// Where this error occurred + pub reference: Reference, + /// Labels + pub labels: Vec>, + /// Hints + pub hints: Vec, +} + +impl ComplexError +where + Reference: Debug, +{ + /// Check whether this is a error (and not a warning). + pub fn is_error(&self) -> bool { + matches!(self.kind, ComplexErrorKind::Error) + } + + /// Create a new error. + pub fn new_error(reference: Reference) -> Self { + Self { + kind: ComplexErrorKind::Error, + reference, + labels: Vec::default(), + hints: Vec::default(), + } + } + + /// Create a new warning + pub fn new_warning(reference: Reference) -> Self { + Self { + kind: ComplexErrorKind::Warning, + reference, + labels: Vec::default(), + hints: Vec::default(), + } + } + + /// Add a new label to the error. + pub fn add_label( + &mut self, + kind: ComplexErrorLabelKind, + reference: Reference, + message: String, + ) -> &mut Self { + self.labels.push(ComplexErrorLabel { + kind: kind, + reference, + message, + }); + + self + } + + /// Add a new hint to the error. + pub fn add_hint(&mut self, hint: Hint) -> &mut Self { + self.hints.push(hint); + + self + } + + /// Add this information to a [ReportBuilder]. + pub fn report<'a, Translation>( + &self, + mut report: ReportBuilder<'a, (String, Range)>, + source_label: String, + translation: Translation, + ) -> ReportBuilder<'a, (String, Range)> + where + Translation: Fn(&Reference) -> Range, + { + for label in &self.labels { + let color = match label.kind { + ComplexErrorLabelKind::Error => Color::Red, + ComplexErrorLabelKind::Warning => Color::Yellow, + ComplexErrorLabelKind::Information => Color::Blue, + }; + + report = report.with_label( + Label::new((source_label.clone(), translation(&label.reference))) + .with_message(label.message.clone()) + .with_color(color), + ); + } + + for hint in &self.hints { + report = report.with_help(hint.message()); + } + + report + } +} + /// Error that occurs during validation of a program. #[derive(Debug)] pub struct ValidationError { /// The kind of error kind: ValidationErrorKind, - /// Stack of [Origin] from which the original AST node can be derived - origin_stack: Vec, - /// List of hints - hints: Vec, + /// Additional information + info: ComplexError, } impl Display for ValidationError { @@ -38,43 +171,33 @@ impl Display for ValidationError { /// Builder for [ValidationError] #[derive(Debug, Default)] pub struct ValidationErrorBuilder { - /// Current stack of [Origin] - origin_stack: Vec, /// Current stack of [ValidationError]s - error_stack: Vec, + errors: Vec, } impl ValidationErrorBuilder { - /// Push an [Origin] onto the stack. - pub fn push_origin(&mut self, origin: Origin) { - self.origin_stack.push(origin); - } - - /// Pop the origin stack. - pub fn pop_origin(&mut self) { - self.origin_stack.pop(); - } - /// Add a new error. pub fn report_error( &mut self, - origin: &Origin, - error_kind: ValidationErrorKind, - hints: Vec, - ) { - let mut origin_stack = self.origin_stack.clone(); - origin_stack.push(origin.clone()); + origin: Origin, + kind: ValidationErrorKind, + ) -> &mut ComplexError { + let message = kind.to_string(); + + self.errors.push(ValidationError { + kind: kind, + info: ComplexError::new_error(origin), + }); - self.error_stack.push(ValidationError { - kind: error_kind, - origin_stack, - hints, - }) + let info = &mut self.errors.last_mut().expect("error was just added").info; + info.add_label(ComplexErrorLabelKind::Error, origin, message); + + info } /// Finish building and return a list of [ValidationError]s. pub fn finalize(self) -> Vec { - self.error_stack + self.errors } } @@ -83,20 +206,44 @@ impl ValidationErrorBuilder { pub struct TranslationError { /// The type of error that occurred kind: TranslationErrorKind, - /// Range signifying the program part that should be highlighted - range: CharacterRange, - /// List of hints - hints: Vec, + /// Additional information + info: ComplexError, } impl TranslationError { /// Create a new [TranslationError] from a given [ProgramSPan]. - pub fn new<'a>(span: ProgramSpan<'a>, kind: TranslationErrorKind, hints: Vec) -> Self { - Self { + pub fn new<'a>(span: ProgramSpan<'a>, kind: TranslationErrorKind) -> Self { + let message = kind.to_string(); + + let mut result = Self { kind, - range: span.range(), - hints, - } + info: ComplexError::new_error(span.range()), + }; + + result + .info + .add_label(ComplexErrorLabelKind::Error, span.range(), message); + + result + } + + /// Add a new label to the error. + pub fn add_label( + mut self, + kind: ComplexErrorLabelKind, + range: CharacterRange, + message: String, + ) -> Self { + self.info.add_label(kind, range, message); + + self + } + + /// Add a new hint to the error. + pub fn add_hint(mut self, hint: Hint) -> Self { + self.info.add_hint(hint); + + self } } @@ -119,18 +266,6 @@ impl ProgramError { } } - /// Return the [CharacterRange] associated with this error. - pub fn range<'a, Node: ProgramAST<'a>>(&self, ast: &'a Node) -> CharacterRange { - match self { - ProgramError::TranslationError(error) => error.range, - ProgramError::ValidationError(error) => ast - .locate(&error.origin_stack) - .expect("invalid origin") - .span() - .range(), - } - } - /// Return the error code of the message. pub fn error_code(&self) -> usize { match self { @@ -139,23 +274,39 @@ impl ProgramError { } } - /// Return an optional note that may be attached to the error. - pub fn note(&self) -> Option { + /// Return the range indicating where the error occurred + pub fn range(&self, translation: Translation) -> Range + where + Translation: Fn(&Origin) -> Range, + { match self { - ProgramError::TranslationError(error) => error.kind.note(), - ProgramError::ValidationError(error) => error.kind.note(), + ProgramError::TranslationError(error) => error.info.reference.range(), + ProgramError::ValidationError(error) => translation(&error.info.reference), } - .map(|note| note.to_string()) } - /// Return a list of hints that fit the error message. - pub fn hints(&self) -> Vec { + pub fn report<'a, Translation>( + &'a self, + mut report: ReportBuilder<'a, (String, Range)>, + source_label: String, + translation: Translation, + ) -> ReportBuilder<'a, (String, Range)> + where + Translation: Fn(&Origin) -> Range, + { + report = report + .with_code(self.error_code()) + .with_message(self.message()); + match self { - ProgramError::TranslationError(error) => &error.hints, - ProgramError::ValidationError(error) => &error.hints, + ProgramError::TranslationError(error) => { + error + .info + .report(report, source_label, |range| range.range()) + } + ProgramError::ValidationError(error) => { + error.info.report(report, source_label, translation) + } } - .iter() - .map(|hint| hint.message().to_string()) - .collect() } } diff --git a/nemo/src/rule_model/origin.rs b/nemo/src/rule_model/origin.rs index 994ae7d52..09b6b006a 100644 --- a/nemo/src/rule_model/origin.rs +++ b/nemo/src/rule_model/origin.rs @@ -1,9 +1,11 @@ //! This module defines +use std::hash::Hash; + pub(crate) type ExternalReference = usize; /// Origin of a program component -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub enum Origin { /// Component was created via a constructor Created, diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index ec3522f0e..51a75e040 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -29,6 +29,9 @@ pub struct ASTProgramTranslation<'a> { /// Label of the input file input_label: String, + /// Mapping of [Origin] to [ProgramAST] nodes + origin_map: HashMap>, + /// Prefix mapping prefix_mapping: HashMap, /// Base @@ -47,12 +50,22 @@ impl<'a> ASTProgramTranslation<'a> { Self { input, input_label, + origin_map: HashMap::new(), prefix_mapping: HashMap::new(), base: None, validation_error_builder: ValidationErrorBuilder::default(), errors: Vec::default(), } } + + /// Register a [ProgramAST] so that it can be associated with and later referenced by + /// the returned [Origin]. + pub fn register_node(&mut self, node: &'a dyn ProgramAST<'a>) -> Origin { + let new_origin = Origin::External(self.origin_map.len()); + self.origin_map.insert(new_origin, node); + + new_origin + } } /// Report of all [ProgramError]s occurred @@ -63,6 +76,8 @@ pub struct ProgramErrorReport<'a> { input: &'a str, /// Label of the input file label: String, + /// Mapping of [Origin] to [ProgramAST] nodes + origin_map: HashMap>, /// Errors errors: Vec, @@ -70,12 +85,9 @@ pub struct ProgramErrorReport<'a> { impl<'a> ProgramErrorReport<'a> { /// Print the given reports. - pub fn eprint<'s, ReportIterator>( - &'s self, - reports: ReportIterator, - ) -> Result<(), std::io::Error> + pub fn eprint<'s, ReportIterator>(&self, reports: ReportIterator) -> Result<(), std::io::Error> where - ReportIterator: Iterator)>>, + ReportIterator: Iterator)>>, { for report in reports { report.eprint((self.label.clone(), Source::from(self.input)))?; @@ -85,34 +97,30 @@ impl<'a> ProgramErrorReport<'a> { } /// Build a [Report] for each error. - pub fn build_reports( - &'a self, - ast: &'a ast::program::Program<'a>, - color_error: Color, - ) -> impl Iterator)>> { - self.errors.iter().map(move |error| { - let range = error.range(ast); - - let mut report = - Report::build(ReportKind::Error, self.label.clone(), range.start.offset) - .with_code(error.error_code()) - .with_message(error.message()) - .with_label( - Label::new((self.label.clone(), range.range())) - .with_message(error.message()) - .with_color(color_error), - ); - if let Some(note) = error.note() { - report = report.with_note(note); - } - if !error.hints().is_empty() { - for hint in error.hints() { - report = report.with_help(hint); - } - } - - report.finish() - }) + pub fn build_reports(&self) -> Vec)>> { + self.errors + .iter() + .map(move |error| { + let translation = |origin: &Origin| { + self.origin_map + .get(origin) + .expect("map must contain origin") + .span() + .range() + .range() + }; + + let mut report = Report::build( + ReportKind::Error, + self.label.clone(), + error.range(translation).start, + ); + + report = error.report(report, self.label.clone(), translation); + + report.finish() + }) + .collect() } } @@ -120,132 +128,105 @@ impl<'a> ASTProgramTranslation<'a> { /// Translate the given [ProgramAST] into a [Program]. pub fn translate( mut self, - ast: &ast::program::Program<'a>, + ast: &'a ast::program::Program<'a>, ) -> Result> { let mut program_builder = ProgramBuilder::default(); - // for (statement_index, rule) in vec![ast.statements()].into_iter().enumerate() { - // let origin = Origin::External(statement_index); - - // match self.build_rule(origin, rule) { - // Ok(new_rule) => program_builder.add_rule(new_rule), - // Err(translation_error) => self - // .errors - // .push(ProgramError::TranslationError(translation_error)), - // } - // } - - // self.errors.extend( - // self.validation_error_builder - // .finalize() - // .into_iter() - // .map(ProgramError::ValidationError), - // ); - - // if self.errors.is_empty() { - // Ok(program_builder.finalize()) - // } else { - // Err(ProgramErrorReport { - // input: self.input, - // label: self.input_label, - // errors: self.errors, - // }) - // } - - todo!() + for statement in ast.statements() { + match statement.kind() { + ast::statement::StatementKind::Fact(_) => todo!(), + ast::statement::StatementKind::Rule(rule) => match self.build_rule(rule) { + Ok(new_rule) => program_builder.add_rule(new_rule), + Err(translation_error) => self + .errors + .push(ProgramError::TranslationError(translation_error)), + }, + ast::statement::StatementKind::Directive(_) => todo!(), + } + } + + self.errors.extend( + self.validation_error_builder + .finalize() + .into_iter() + .map(ProgramError::ValidationError), + ); + + if self.errors.is_empty() { + Ok(program_builder.finalize()) + } else { + Err(ProgramErrorReport { + input: self.input, + label: self.input_label, + errors: self.errors, + origin_map: self.origin_map, + }) + } } - fn build_rule( - &mut self, - origin: Origin, - rule: &ast::rule::Rule<'a>, - ) -> Result { - self.validation_error_builder.push_origin(origin); - let mut rule_builder = RuleBuilder::default().origin(origin); + fn build_rule(&mut self, rule: &'a ast::rule::Rule<'a>) -> Result { + let mut rule_builder = RuleBuilder::default().origin(self.register_node(rule)); - let mut expression_counter: usize = 0; for expression in rule.head() { - let origin_expression = Origin::External(expression_counter); - rule_builder.add_head_atom_mut(self.build_head_atom(origin_expression, expression)?); - - expression_counter += 1; + rule_builder.add_head_atom_mut(self.build_head_atom(expression)?); } for expression in rule.body() { - let origin_expression = Origin::External(expression_counter); - rule_builder - .add_body_literal_mut(self.build_body_literal(origin_expression, expression)?); - - expression_counter += 1; + rule_builder.add_body_literal_mut(self.build_body_literal(expression)?); } - let rule = rule_builder.finalize().set_origin(origin); + let rule = rule_builder.finalize(); let _ = rule.validate(&mut self.validation_error_builder); - self.validation_error_builder.pop_origin(); Ok(rule) } fn build_body_literal( &mut self, - origin: Origin, - head: &ast::expression::Expression<'a>, + head: &'a ast::expression::Expression<'a>, ) -> Result { - self.validation_error_builder.push_origin(origin); - let result = if let ast::expression::Expression::Atom(atom) = head { let mut subterms = Vec::new(); - for (expression_index, expression) in atom.expressions().enumerate() { - let term_origin = Origin::External(expression_index); - subterms.push(self.build_inner_term(term_origin, expression)?); + for expression in atom.expressions() { + subterms.push(self.build_inner_term(expression)?); } - Literal::Positive(Atom::new(&self.resolve_tag(atom.tag())?, subterms)) + Literal::Positive( + Atom::new(&self.resolve_tag(atom.tag())?, subterms) + .set_origin(self.register_node(atom)), + ) } else { - return Err(TranslationError::new( - head.span(), - TranslationErrorKind::HeadNonAtom(head.context_type().name().to_string()), - vec![], - )); + todo!() } - .set_origin(origin); + .set_origin(self.register_node(head)); - self.validation_error_builder.pop_origin(); Ok(result) } fn build_head_atom( &mut self, - origin: Origin, - head: &ast::expression::Expression<'a>, + head: &'a ast::expression::Expression<'a>, ) -> Result { - self.validation_error_builder.push_origin(origin); - let result = if let ast::expression::Expression::Atom(atom) = head { let mut subterms = Vec::new(); - for (expression_index, expression) in atom.expressions().enumerate() { - let term_origin = Origin::External(expression_index); - subterms.push(self.build_inner_term(term_origin, expression)?); + for expression in atom.expressions() { + subterms.push(self.build_inner_term(expression)?); } - Atom::new(&self.resolve_tag(atom.tag())?, subterms) + Atom::new(&self.resolve_tag(atom.tag())?, subterms).set_origin(self.register_node(atom)) } else { return Err(TranslationError::new( head.span(), TranslationErrorKind::HeadNonAtom(head.context_type().name().to_string()), - vec![], )); - } - .set_origin(origin); + }; - self.validation_error_builder.pop_origin(); Ok(result) } fn build_inner_term( - &self, - origin: Origin, - expression: &ast::expression::Expression, + &mut self, + expression: &'a ast::expression::Expression, ) -> Result { Ok(match expression { ast::expression::Expression::Arithmetic(_) => todo!(), @@ -261,33 +242,33 @@ impl<'a> ASTProgramTranslation<'a> { ast::expression::basic::variable::VariableType::Universal => { if let Some(variable_name) = variable.name() { Term::universal_variable(&variable_name) + .set_origin(self.register_node(variable)) } else { return Err(TranslationError::new( variable.span(), TranslationErrorKind::UnnamedVariable, - vec![Hint::AnonymousVariables], - )); + ) + .add_hint(Hint::AnonymousVariables)); } } ast::expression::basic::variable::VariableType::Existential => { if let Some(variable_name) = variable.name() { Term::existential_variable(&variable_name) + .set_origin(self.register_node(variable)) } else { return Err(TranslationError::new( variable.span(), TranslationErrorKind::UnnamedVariable, - vec![], )); } } ast::expression::basic::variable::VariableType::Anonymous => { if variable.name().is_none() { - Term::anonymous_variable() + Term::anonymous_variable().set_origin(self.register_node(variable)) } else { return Err(TranslationError::new( variable.span(), TranslationErrorKind::NamedAnonymous(variable.span().0.to_string()), - vec![], )); } } @@ -297,13 +278,12 @@ impl<'a> ASTProgramTranslation<'a> { ast::expression::Expression::Map(_) => todo!(), ast::expression::Expression::Negation(_) => todo!(), ast::expression::Expression::Operation(_) => todo!(), - } - .set_origin(origin)) + }) } fn resolve_tag( &self, - tag: &ast::tag::structure::StructureTag<'a>, + tag: &'a ast::tag::structure::StructureTag<'a>, ) -> Result { Ok(match tag.kind() { ast::tag::structure::StructureTagKind::Plain(token) => { @@ -322,7 +302,6 @@ impl<'a> ASTProgramTranslation<'a> { return Err(TranslationError::new( prefix.span(), TranslationErrorKind::UnknownPrefix(prefix.to_string()), - vec![], )); } } From 3ecf659848f9d0c74bfc4450ffd25f9084d5d9ef Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 26 Jul 2024 13:31:25 +0200 Subject: [PATCH 133/214] Rename ProgramSpan to Span --- nemo/src/parser/ast.rs | 4 ++-- nemo/src/parser/ast/attribute.rs | 6 +++--- nemo/src/parser/ast/comment/closed.rs | 8 ++++---- nemo/src/parser/ast/comment/doc.rs | 8 ++++---- nemo/src/parser/ast/comment/line.rs | 8 ++++---- nemo/src/parser/ast/comment/toplevel.rs | 8 ++++---- nemo/src/parser/ast/comment/wsoc.rs | 4 ++-- nemo/src/parser/ast/directive.rs | 4 ++-- nemo/src/parser/ast/directive/base.rs | 6 +++--- nemo/src/parser/ast/directive/declare.rs | 6 +++--- nemo/src/parser/ast/directive/export.rs | 6 +++--- nemo/src/parser/ast/directive/import.rs | 6 +++--- nemo/src/parser/ast/directive/output.rs | 6 +++--- nemo/src/parser/ast/directive/prefix.rs | 6 +++--- nemo/src/parser/ast/directive/unknown.rs | 8 ++++---- nemo/src/parser/ast/expression.rs | 4 ++-- nemo/src/parser/ast/expression/basic/blank.rs | 6 +++--- nemo/src/parser/ast/expression/basic/boolean.rs | 6 +++--- nemo/src/parser/ast/expression/basic/constant.rs | 6 +++--- nemo/src/parser/ast/expression/basic/iri.rs | 6 +++--- nemo/src/parser/ast/expression/basic/number.rs | 6 +++--- .../src/parser/ast/expression/basic/rdf_literal.rs | 6 +++--- nemo/src/parser/ast/expression/basic/string.rs | 6 +++--- nemo/src/parser/ast/expression/basic/variable.rs | 6 +++--- .../parser/ast/expression/complex/aggregation.rs | 6 +++--- .../parser/ast/expression/complex/arithmetic.rs | 8 ++++---- nemo/src/parser/ast/expression/complex/atom.rs | 6 +++--- nemo/src/parser/ast/expression/complex/infix.rs | 6 +++--- nemo/src/parser/ast/expression/complex/map.rs | 6 +++--- nemo/src/parser/ast/expression/complex/negation.rs | 6 +++--- .../src/parser/ast/expression/complex/operation.rs | 6 +++--- nemo/src/parser/ast/expression/complex/tuple.rs | 6 +++--- nemo/src/parser/ast/program.rs | 6 +++--- nemo/src/parser/ast/rule.rs | 6 +++--- nemo/src/parser/ast/sequence/declare.rs | 4 ++-- nemo/src/parser/ast/sequence/key_value.rs | 4 ++-- nemo/src/parser/ast/sequence/one.rs | 4 ++-- nemo/src/parser/ast/sequence/simple.rs | 4 ++-- nemo/src/parser/ast/statement.rs | 6 +++--- nemo/src/parser/ast/tag/aggregation.rs | 6 +++--- nemo/src/parser/ast/tag/datatype.rs | 6 +++--- nemo/src/parser/ast/tag/operation.rs | 6 +++--- nemo/src/parser/ast/tag/parameter.rs | 6 +++--- nemo/src/parser/ast/tag/structure.rs | 6 +++--- nemo/src/parser/ast/token.rs | 8 ++++---- nemo/src/parser/input.rs | 14 +++++++------- nemo/src/parser/span.rs | 6 +++--- nemo/src/rule_model/error.rs | 4 ++-- 48 files changed, 146 insertions(+), 146 deletions(-) diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index ddb1b32ca..48535d649 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -13,7 +13,7 @@ pub mod token; use std::fmt::Debug; -use super::{context::ParserContext, span::ProgramSpan, ParserInput, ParserResult}; +use super::{context::ParserContext, span::Span, ParserInput, ParserResult}; /// Trait implemented by nodes in the abstract syntax tree pub trait ProgramAST<'a>: Debug + Sync { @@ -21,7 +21,7 @@ pub trait ProgramAST<'a>: Debug + Sync { fn children(&self) -> Vec<&dyn ProgramAST>; /// Return the region of text this node originates from. - fn span(&self) -> ProgramSpan<'a>; + fn span(&self) -> Span<'a>; /// Parse the given input into this type of node fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> diff --git a/nemo/src/parser/ast/attribute.rs b/nemo/src/parser/ast/attribute.rs index 2083fc310..855a2dd05 100644 --- a/nemo/src/parser/ast/attribute.rs +++ b/nemo/src/parser/ast/attribute.rs @@ -8,7 +8,7 @@ use nom::{ use crate::parser::{ context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -18,7 +18,7 @@ use super::{comment::wsoc::WSoC, expression::complex::atom::Atom, token::Token, #[derive(Debug)] pub struct Attribute<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// [Atom] containing the content of the directive content: Atom<'a>, @@ -38,7 +38,7 @@ impl<'a> ProgramAST<'a> for Attribute<'a> { vec![self.content()] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/comment/closed.rs b/nemo/src/parser/ast/comment/closed.rs index 854609ef7..f30604d2b 100644 --- a/nemo/src/parser/ast/comment/closed.rs +++ b/nemo/src/parser/ast/comment/closed.rs @@ -9,7 +9,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -17,10 +17,10 @@ use crate::parser::{ #[derive(Debug)] pub struct ClosedComment<'a> { /// [ProgramSpan] associated with this comment - span: ProgramSpan<'a>, + span: Span<'a>, /// Part of the comment that contains the content - content: ProgramSpan<'a>, + content: Span<'a>, } const CONTEXT: ParserContext = ParserContext::Comment; @@ -37,7 +37,7 @@ impl<'a> ProgramAST<'a> for ClosedComment<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/comment/doc.rs b/nemo/src/parser/ast/comment/doc.rs index bfab5ca89..a64de865b 100644 --- a/nemo/src/parser/ast/comment/doc.rs +++ b/nemo/src/parser/ast/comment/doc.rs @@ -11,7 +11,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -19,10 +19,10 @@ use crate::parser::{ #[derive(Debug)] pub struct DocComment<'a> { /// [ProgramSpan] associated with this comment - span: ProgramSpan<'a>, + span: Span<'a>, /// Each line of the comment - content: Vec>, + content: Vec>, } impl<'a> DocComment<'a> { @@ -41,7 +41,7 @@ impl<'a> ProgramAST<'a> for DocComment<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/comment/line.rs b/nemo/src/parser/ast/comment/line.rs index aa80e95c3..658d5dbc9 100644 --- a/nemo/src/parser/ast/comment/line.rs +++ b/nemo/src/parser/ast/comment/line.rs @@ -10,7 +10,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -18,10 +18,10 @@ use crate::parser::{ #[derive(Debug)] pub struct LineComment<'a> { /// [ProgramSpan] associated with this comment - span: ProgramSpan<'a>, + span: Span<'a>, /// Part of the comment that contains the content - content: ProgramSpan<'a>, + content: Span<'a>, } const CONTEXT: ParserContext = ParserContext::Comment; @@ -38,7 +38,7 @@ impl<'a> ProgramAST<'a> for LineComment<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/comment/toplevel.rs b/nemo/src/parser/ast/comment/toplevel.rs index 469582210..e5c249c41 100644 --- a/nemo/src/parser/ast/comment/toplevel.rs +++ b/nemo/src/parser/ast/comment/toplevel.rs @@ -11,7 +11,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -19,10 +19,10 @@ use crate::parser::{ #[derive(Debug)] pub struct TopLevelComment<'a> { /// [ProgramSpan] associated with this comment - span: ProgramSpan<'a>, + span: Span<'a>, /// Each line of the comment - content: Vec>, + content: Vec>, } const CONTEXT: ParserContext = ParserContext::TopLevelComment; @@ -42,7 +42,7 @@ impl<'a> ProgramAST<'a> for TopLevelComment<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/comment/wsoc.rs b/nemo/src/parser/ast/comment/wsoc.rs index 2d3342452..3715459f0 100644 --- a/nemo/src/parser/ast/comment/wsoc.rs +++ b/nemo/src/parser/ast/comment/wsoc.rs @@ -10,7 +10,7 @@ use nom::{ use crate::parser::{ ast::{token::Token, ProgramAST}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -29,7 +29,7 @@ pub enum WhiteSpaceComment<'a> { #[derive(Debug)] pub struct WSoC<'a> { /// [ProgramSpan] associated with this comment - _span: ProgramSpan<'a>, + _span: Span<'a>, /// comments comments: Vec>, } diff --git a/nemo/src/parser/ast/directive.rs b/nemo/src/parser/ast/directive.rs index cab1debe7..dadc789d4 100644 --- a/nemo/src/parser/ast/directive.rs +++ b/nemo/src/parser/ast/directive.rs @@ -15,7 +15,7 @@ use unknown::UnknownDirective; use crate::parser::{ context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -112,7 +112,7 @@ impl<'a> ProgramAST<'a> for Directive<'a> { }] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { match self { Directive::Base(directive) => directive.span(), Directive::Declare(directive) => directive.span(), diff --git a/nemo/src/parser/ast/directive/base.rs b/nemo/src/parser/ast/directive/base.rs index 1448ddb1a..114f2fb87 100644 --- a/nemo/src/parser/ast/directive/base.rs +++ b/nemo/src/parser/ast/directive/base.rs @@ -6,7 +6,7 @@ use crate::parser::{ ast::{comment::wsoc::WSoC, expression::basic::iri::Iri, token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -14,7 +14,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Base<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// The global prefix iri: Iri<'a>, @@ -34,7 +34,7 @@ impl<'a> ProgramAST<'a> for Base<'a> { vec![&self.iri] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/directive/declare.rs b/nemo/src/parser/ast/directive/declare.rs index 09618c600..8199d0077 100644 --- a/nemo/src/parser/ast/directive/declare.rs +++ b/nemo/src/parser/ast/directive/declare.rs @@ -13,7 +13,7 @@ use crate::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }, rule_model::components::datatype::DataType, @@ -23,7 +23,7 @@ use crate::{ #[derive(Debug)] pub struct Declare<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Predicate this statement applies to predicate: StructureTag<'a>, @@ -62,7 +62,7 @@ impl<'a> ProgramAST<'a> for Declare<'a> { result } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/directive/export.rs b/nemo/src/parser/ast/directive/export.rs index a7d200221..f78c6d0ce 100644 --- a/nemo/src/parser/ast/directive/export.rs +++ b/nemo/src/parser/ast/directive/export.rs @@ -9,7 +9,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -17,7 +17,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Export<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Predicate that is being exported predicate: StructureTag<'a>, @@ -62,7 +62,7 @@ impl<'a> ProgramAST<'a> for Export<'a> { vec![&self.predicate, &self.instructions] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/directive/import.rs b/nemo/src/parser/ast/directive/import.rs index dac2ffe0a..1b4621144 100644 --- a/nemo/src/parser/ast/directive/import.rs +++ b/nemo/src/parser/ast/directive/import.rs @@ -9,7 +9,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -17,7 +17,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Import<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Predicate that is being Imported predicate: StructureTag<'a>, @@ -62,7 +62,7 @@ impl<'a> ProgramAST<'a> for Import<'a> { vec![&self.predicate, &self.instructions] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/directive/output.rs b/nemo/src/parser/ast/directive/output.rs index 3e01d4f24..71ae107e9 100644 --- a/nemo/src/parser/ast/directive/output.rs +++ b/nemo/src/parser/ast/directive/output.rs @@ -6,7 +6,7 @@ use crate::parser::{ ast::{comment::wsoc::WSoC, tag::structure::StructureTag, token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -14,7 +14,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Output<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// The predicate predicate: StructureTag<'a>, @@ -34,7 +34,7 @@ impl<'a> ProgramAST<'a> for Output<'a> { vec![&self.predicate] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/directive/prefix.rs b/nemo/src/parser/ast/directive/prefix.rs index f4ccf2e26..3954cbb09 100644 --- a/nemo/src/parser/ast/directive/prefix.rs +++ b/nemo/src/parser/ast/directive/prefix.rs @@ -6,7 +6,7 @@ use crate::parser::{ ast::{comment::wsoc::WSoC, expression::basic::iri::Iri, token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -14,7 +14,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Prefix<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// The prefix prefix: Token<'a>, @@ -41,7 +41,7 @@ impl<'a> ProgramAST<'a> for Prefix<'a> { self.value.children() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/directive/unknown.rs b/nemo/src/parser/ast/directive/unknown.rs index f8f58b3a3..37ca30130 100644 --- a/nemo/src/parser/ast/directive/unknown.rs +++ b/nemo/src/parser/ast/directive/unknown.rs @@ -13,7 +13,7 @@ use crate::parser::{ context::{context, ParserContext}, error::ParserErrorTree, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -23,12 +23,12 @@ use super::DirectiveKind; #[derive(Debug)] pub struct UnknownDirective<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Name of the directive name: Token<'a>, /// Content - content: ProgramSpan<'a>, + content: Span<'a>, } impl<'a> UnknownDirective<'a> { @@ -77,7 +77,7 @@ impl<'a> ProgramAST<'a> for UnknownDirective<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index 78821bc59..07fb2c745 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -16,7 +16,7 @@ use nom::{branch::alt, combinator::map}; use crate::parser::{ context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -128,7 +128,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { }] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { match self { Expression::Aggregation(expression) => expression.span(), Expression::Arithmetic(expression) => expression.span(), diff --git a/nemo/src/parser/ast/expression/basic/blank.rs b/nemo/src/parser/ast/expression/basic/blank.rs index 891a0117e..c87d53d50 100644 --- a/nemo/src/parser/ast/expression/basic/blank.rs +++ b/nemo/src/parser/ast/expression/basic/blank.rs @@ -6,7 +6,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -14,7 +14,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Blank<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Name of the blank node name: Token<'a>, @@ -39,7 +39,7 @@ impl<'a> ProgramAST<'a> for Blank<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/boolean.rs b/nemo/src/parser/ast/expression/basic/boolean.rs index 5a247be2e..b8f7c2d64 100644 --- a/nemo/src/parser/ast/expression/basic/boolean.rs +++ b/nemo/src/parser/ast/expression/basic/boolean.rs @@ -11,7 +11,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -31,7 +31,7 @@ pub enum BooleanValue { #[derive(Debug)] pub struct Boolean<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Value value: BooleanValue, @@ -62,7 +62,7 @@ impl<'a> ProgramAST<'a> for Boolean<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/constant.rs b/nemo/src/parser/ast/expression/basic/constant.rs index 6cdb8d945..34098bf80 100644 --- a/nemo/src/parser/ast/expression/basic/constant.rs +++ b/nemo/src/parser/ast/expression/basic/constant.rs @@ -6,7 +6,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -25,7 +25,7 @@ enum ConstantKind<'a> { #[derive(Debug)] pub struct Constant<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// The constant constant: ConstantKind<'a>, @@ -48,7 +48,7 @@ impl<'a> ProgramAST<'a> for Constant<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/iri.rs b/nemo/src/parser/ast/expression/basic/iri.rs index 9f43d28ab..a77db8130 100644 --- a/nemo/src/parser/ast/expression/basic/iri.rs +++ b/nemo/src/parser/ast/expression/basic/iri.rs @@ -7,7 +7,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -15,7 +15,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Iri<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Part of the Iri that is the content content: Token<'a>, @@ -35,7 +35,7 @@ impl<'a> ProgramAST<'a> for Iri<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/number.rs b/nemo/src/parser/ast/expression/basic/number.rs index fe668dc54..3b43957e3 100644 --- a/nemo/src/parser/ast/expression/basic/number.rs +++ b/nemo/src/parser/ast/expression/basic/number.rs @@ -13,7 +13,7 @@ use crate::parser::{ ProgramAST, }, context::{context, ParserContext}, - span::ProgramSpan, + span::Span, ParserInput, ParserResult, }; @@ -52,7 +52,7 @@ enum NumberSign { #[derive(Debug)] pub struct Number<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Sign of the integer part _integer_sign: NumberSign, @@ -115,7 +115,7 @@ impl<'a> ProgramAST<'a> for Number<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/rdf_literal.rs b/nemo/src/parser/ast/expression/basic/rdf_literal.rs index a5adcb1ea..cd6c75496 100644 --- a/nemo/src/parser/ast/expression/basic/rdf_literal.rs +++ b/nemo/src/parser/ast/expression/basic/rdf_literal.rs @@ -7,7 +7,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -17,7 +17,7 @@ use super::iri::Iri; #[derive(Debug)] pub struct RdfLiteral<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Content part rdf literal content: Token<'a>, @@ -50,7 +50,7 @@ impl<'a> ProgramAST<'a> for RdfLiteral<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs index 93615b112..7e10e861c 100644 --- a/nemo/src/parser/ast/expression/basic/string.rs +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -10,7 +10,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -18,7 +18,7 @@ use crate::parser::{ #[derive(Debug)] pub struct StringLiteral<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Part of the string that is the content content: Token<'a>, @@ -55,7 +55,7 @@ impl<'a> ProgramAST<'a> for StringLiteral<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/basic/variable.rs b/nemo/src/parser/ast/expression/basic/variable.rs index aea97e761..f11d14a4c 100644 --- a/nemo/src/parser/ast/expression/basic/variable.rs +++ b/nemo/src/parser/ast/expression/basic/variable.rs @@ -11,7 +11,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -34,7 +34,7 @@ pub enum VariableType { #[derive(Debug)] pub struct Variable<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Type of variable kind: VariableType, @@ -82,7 +82,7 @@ impl<'a> ProgramAST<'a> for Variable<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex/aggregation.rs b/nemo/src/parser/ast/expression/complex/aggregation.rs index 60ac63d56..069493939 100644 --- a/nemo/src/parser/ast/expression/complex/aggregation.rs +++ b/nemo/src/parser/ast/expression/complex/aggregation.rs @@ -14,7 +14,7 @@ use crate::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }, rule_model::components::term::aggregate::AggregateKind, @@ -26,7 +26,7 @@ use crate::{ #[derive(Debug)] pub struct Aggregation<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Type of Aggregation, tag: AggregationTag<'a>, @@ -68,7 +68,7 @@ impl<'a> ProgramAST<'a> for Aggregation<'a> { result } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex/arithmetic.rs b/nemo/src/parser/ast/expression/complex/arithmetic.rs index 7702801e9..8b16932a2 100644 --- a/nemo/src/parser/ast/expression/complex/arithmetic.rs +++ b/nemo/src/parser/ast/expression/complex/arithmetic.rs @@ -23,7 +23,7 @@ use crate::parser::{ context::{context, ParserContext}, error::ParserErrorTree, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -75,7 +75,7 @@ impl ArithmeticOperation { #[derive(Debug)] pub struct Arithmetic<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Type of arithmetic operation kind: ArithmeticOperation, @@ -109,7 +109,7 @@ struct ArithmeticChain<'a> { } impl<'a> ArithmeticChain<'a> { - fn fold(mut self, input_span: &ProgramSpan<'a>) -> Expression<'a> { + fn fold(mut self, input_span: &Span<'a>) -> Expression<'a> { if self.sequence.is_empty() { self.initial } else { @@ -255,7 +255,7 @@ impl<'a> ProgramAST<'a> for Arithmetic<'a> { todo!() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex/atom.rs b/nemo/src/parser/ast/expression/complex/atom.rs index 708850da4..c2d644799 100644 --- a/nemo/src/parser/ast/expression/complex/atom.rs +++ b/nemo/src/parser/ast/expression/complex/atom.rs @@ -9,7 +9,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -17,7 +17,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Atom<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Tag of this Atom tag: StructureTag<'a>, @@ -51,7 +51,7 @@ impl<'a> ProgramAST<'a> for Atom<'a> { result } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex/infix.rs b/nemo/src/parser/ast/expression/complex/infix.rs index 98911eeb3..79be90530 100644 --- a/nemo/src/parser/ast/expression/complex/infix.rs +++ b/nemo/src/parser/ast/expression/complex/infix.rs @@ -17,7 +17,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -54,7 +54,7 @@ pub enum InfixExpressionKind { #[derive(Debug)] pub struct InfixExpression<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Kind of infix expression kind: InfixExpressionKind, @@ -116,7 +116,7 @@ impl<'a> ProgramAST<'a> for InfixExpression<'a> { vec![&*self.left, &*self.right] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex/map.rs b/nemo/src/parser/ast/expression/complex/map.rs index cdb403fbb..05c25bd14 100644 --- a/nemo/src/parser/ast/expression/complex/map.rs +++ b/nemo/src/parser/ast/expression/complex/map.rs @@ -12,7 +12,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -20,7 +20,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Map<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Tag of this map, if it exists tag: Option>, @@ -58,7 +58,7 @@ impl<'a> ProgramAST<'a> for Map<'a> { result } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex/negation.rs b/nemo/src/parser/ast/expression/complex/negation.rs index fee98cdaa..c727ec6b2 100644 --- a/nemo/src/parser/ast/expression/complex/negation.rs +++ b/nemo/src/parser/ast/expression/complex/negation.rs @@ -6,7 +6,7 @@ use crate::parser::{ ast::{expression::Expression, token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -14,7 +14,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Negation<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// The negated expression expression: Box>, @@ -34,7 +34,7 @@ impl<'a> ProgramAST<'a> for Negation<'a> { vec![&*self.expression] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex/operation.rs b/nemo/src/parser/ast/expression/complex/operation.rs index 0203e27f4..d5f7710ac 100644 --- a/nemo/src/parser/ast/expression/complex/operation.rs +++ b/nemo/src/parser/ast/expression/complex/operation.rs @@ -11,7 +11,7 @@ use crate::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }, rule_model::components::term::operation::operation_kind::OperationKind, @@ -23,7 +23,7 @@ use crate::{ #[derive(Debug)] pub struct Operation<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Type of operation tag: OperationTag<'a>, @@ -57,7 +57,7 @@ impl<'a> ProgramAST<'a> for Operation<'a> { result } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/expression/complex/tuple.rs b/nemo/src/parser/ast/expression/complex/tuple.rs index 4ae1d433e..3f42b21a5 100644 --- a/nemo/src/parser/ast/expression/complex/tuple.rs +++ b/nemo/src/parser/ast/expression/complex/tuple.rs @@ -12,7 +12,7 @@ use crate::parser::{ }, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -20,7 +20,7 @@ use crate::parser::{ #[derive(Debug)] pub struct Tuple<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// List of underlying expressions expressions: ExpressionSequenceOne<'a>, @@ -46,7 +46,7 @@ impl<'a> ProgramAST<'a> for Tuple<'a> { result } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 3da33b2a4..4ae0dbdeb 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -10,7 +10,7 @@ use crate::parser::{ context::{context, ParserContext}, error::{recover, report_error}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -24,7 +24,7 @@ use super::{ #[derive(Debug)] pub struct Program<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Top level comment comment: Option>, @@ -63,7 +63,7 @@ impl<'a> ProgramAST<'a> for Program<'a> { result } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/rule.rs b/nemo/src/parser/ast/rule.rs index 3f1818ad2..8e2008329 100644 --- a/nemo/src/parser/ast/rule.rs +++ b/nemo/src/parser/ast/rule.rs @@ -8,7 +8,7 @@ use nom::{ use crate::parser::{ context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -21,7 +21,7 @@ use super::{ #[derive(Debug)] pub struct Rule<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Attributes attached to this rule attributes: Vec>, @@ -62,7 +62,7 @@ impl<'a> ProgramAST<'a> for Rule<'a> { result } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/sequence/declare.rs b/nemo/src/parser/ast/sequence/declare.rs index d68c04cad..c6be8b6b1 100644 --- a/nemo/src/parser/ast/sequence/declare.rs +++ b/nemo/src/parser/ast/sequence/declare.rs @@ -15,7 +15,7 @@ use crate::parser::{ ProgramAST, }, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -23,7 +23,7 @@ use crate::parser::{ #[derive(Debug)] pub struct DeclareSequence<'a> { /// [ProgramSpan] associated with this sequence - _span: ProgramSpan<'a>, + _span: Span<'a>, /// List of name-type pairs pairs: Vec<(ParameterName<'a>, DataTypeTag<'a>)>, diff --git a/nemo/src/parser/ast/sequence/key_value.rs b/nemo/src/parser/ast/sequence/key_value.rs index 7c3d5b46f..68738397c 100644 --- a/nemo/src/parser/ast/sequence/key_value.rs +++ b/nemo/src/parser/ast/sequence/key_value.rs @@ -10,7 +10,7 @@ use nom::{ use crate::parser::{ ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -18,7 +18,7 @@ use crate::parser::{ #[derive(Debug)] pub struct KeyValueSequence<'a> { /// [ProgramSpan] associated with this sequence - _span: ProgramSpan<'a>, + _span: Span<'a>, /// List of key-value pairs expressions: Vec<(Expression<'a>, Expression<'a>)>, diff --git a/nemo/src/parser/ast/sequence/one.rs b/nemo/src/parser/ast/sequence/one.rs index 9843e7a3e..d88f29772 100644 --- a/nemo/src/parser/ast/sequence/one.rs +++ b/nemo/src/parser/ast/sequence/one.rs @@ -7,7 +7,7 @@ use nom::{branch::alt, combinator::map, multi::separated_list1, sequence::tuple} use crate::parser::{ ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -17,7 +17,7 @@ use crate::parser::{ #[derive(Debug)] pub struct ExpressionSequenceOne<'a> { /// [ProgramSpan] associated with this sequence - _span: ProgramSpan<'a>, + _span: Span<'a>, /// List of expressions expressions: Vec>, diff --git a/nemo/src/parser/ast/sequence/simple.rs b/nemo/src/parser/ast/sequence/simple.rs index 16ed32ba8..fbb4a4c8b 100644 --- a/nemo/src/parser/ast/sequence/simple.rs +++ b/nemo/src/parser/ast/sequence/simple.rs @@ -7,7 +7,7 @@ use nom::{multi::separated_list1, sequence::tuple}; use crate::parser::{ ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -15,7 +15,7 @@ use crate::parser::{ #[derive(Debug)] pub struct ExpressionSequenceSimple<'a> { /// [ProgramSpan] associated with this sequence - _span: ProgramSpan<'a>, + _span: Span<'a>, /// List of expressions expressions: Vec>, diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs index f917619c1..4e73978f2 100644 --- a/nemo/src/parser/ast/statement.rs +++ b/nemo/src/parser/ast/statement.rs @@ -10,7 +10,7 @@ use nom::{ use crate::parser::{ context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -58,7 +58,7 @@ impl<'a> StatementKind<'a> { #[derive(Debug)] pub struct Statement<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Doc comment associated with this statement comment: Option>, @@ -90,7 +90,7 @@ impl<'a> ProgramAST<'a> for Statement<'a> { }] } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/tag/aggregation.rs b/nemo/src/parser/ast/tag/aggregation.rs index ef64fd5a8..19c7105a9 100644 --- a/nemo/src/parser/ast/tag/aggregation.rs +++ b/nemo/src/parser/ast/tag/aggregation.rs @@ -10,7 +10,7 @@ use crate::{ context::{context, ParserContext}, error::ParserErrorTree, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }, rule_model::components::term::aggregate::AggregateKind, @@ -20,7 +20,7 @@ use crate::{ #[derive(Debug)] pub struct AggregationTag<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Type of aggregation, if known kind: Option, @@ -40,7 +40,7 @@ impl<'a> ProgramAST<'a> for AggregationTag<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/tag/datatype.rs b/nemo/src/parser/ast/tag/datatype.rs index 12562b36d..98bfe5808 100644 --- a/nemo/src/parser/ast/tag/datatype.rs +++ b/nemo/src/parser/ast/tag/datatype.rs @@ -10,7 +10,7 @@ use crate::{ context::{context, ParserContext}, error::ParserErrorTree, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }, rule_model::components::datatype::DataType, @@ -20,7 +20,7 @@ use crate::{ #[derive(Debug)] pub struct DataTypeTag<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Data type data_type: DataType, @@ -40,7 +40,7 @@ impl<'a> ProgramAST<'a> for DataTypeTag<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/tag/operation.rs b/nemo/src/parser/ast/tag/operation.rs index ef1a65727..d060fa8c8 100644 --- a/nemo/src/parser/ast/tag/operation.rs +++ b/nemo/src/parser/ast/tag/operation.rs @@ -10,7 +10,7 @@ use crate::{ context::{context, ParserContext}, error::ParserErrorTree, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }, rule_model::components::term::operation::operation_kind::OperationKind, @@ -20,7 +20,7 @@ use crate::{ #[derive(Debug)] pub struct OperationTag<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Type of operation kind: OperationKind, @@ -40,7 +40,7 @@ impl<'a> ProgramAST<'a> for OperationTag<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/tag/parameter.rs b/nemo/src/parser/ast/tag/parameter.rs index 4ceea26dd..0a5aa545b 100644 --- a/nemo/src/parser/ast/tag/parameter.rs +++ b/nemo/src/parser/ast/tag/parameter.rs @@ -6,7 +6,7 @@ use crate::parser::{ ast::{token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -23,7 +23,7 @@ pub enum Parameter { #[derive(Debug)] pub struct ParameterName<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Parameter parameter: Parameter, @@ -43,7 +43,7 @@ impl<'a> ProgramAST<'a> for ParameterName<'a> { Vec::default() } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/tag/structure.rs b/nemo/src/parser/ast/tag/structure.rs index 32a911512..11001b15e 100644 --- a/nemo/src/parser/ast/tag/structure.rs +++ b/nemo/src/parser/ast/tag/structure.rs @@ -6,7 +6,7 @@ use crate::parser::{ ast::{expression::basic::iri::Iri, token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, - span::ProgramSpan, + span::Span, ParserResult, }; @@ -25,7 +25,7 @@ pub enum StructureTagKind<'a> { #[derive(Debug)] pub struct StructureTag<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// Type of [StructureTag] kind: StructureTagKind<'a>, @@ -62,7 +62,7 @@ impl<'a> ProgramAST<'a> for StructureTag<'a> { } } - fn span(&self) -> ProgramSpan<'a> { + fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 4598ad251..8b172af4a 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -14,7 +14,7 @@ use nom::{ use crate::parser::{ context::{context, ParserContext}, - span::ProgramSpan, + span::Span, ParserInput, ParserResult, }; @@ -200,7 +200,7 @@ pub enum TokenKind { #[derive(Debug)] pub struct Token<'a> { /// [ProgramSpan] associated with this node - span: ProgramSpan<'a>, + span: Span<'a>, /// The kind of token kind: TokenKind, @@ -231,7 +231,7 @@ macro_rules! string_token { impl<'a> Token<'a> { /// Return the [ProgramSpan] of this token. - pub fn span(&self) -> ProgramSpan<'a> { + pub fn span(&self) -> Span<'a> { self.span } @@ -334,7 +334,7 @@ impl<'a> Token<'a> { } /// Create [TokenKind::Error]. - pub fn error(span: ProgramSpan<'a>) -> Token<'a> { + pub fn error(span: Span<'a>) -> Token<'a> { Token { span, kind: TokenKind::Error, diff --git a/nemo/src/parser/input.rs b/nemo/src/parser/input.rs index 728af9e66..0dc893e51 100644 --- a/nemo/src/parser/input.rs +++ b/nemo/src/parser/input.rs @@ -7,12 +7,12 @@ use nom::{ }; use nom_locate::LocatedSpan; -use super::{span::ProgramSpan, ParserState}; +use super::{span::Span, ParserState}; /// Input to a nom parser function #[derive(Debug, Clone)] pub struct ParserInput<'a> { - pub(crate) span: ProgramSpan<'a>, + pub(crate) span: Span<'a>, pub(crate) state: ParserState, } @@ -20,7 +20,7 @@ impl<'a> ParserInput<'a> { /// Create a new [ParserInput] from a string slice. pub fn new(input: &'a str, state: ParserState) -> Self { Self { - span: ProgramSpan(LocatedSpan::new(input)), + span: Span(LocatedSpan::new(input)), state, } } @@ -106,7 +106,7 @@ impl<'a> InputIter for ParserInput<'a> { impl InputTake for ParserInput<'_> { fn take(&self, count: usize) -> Self { Self { - span: ProgramSpan(self.span.0.take(count)), + span: Span(self.span.0.take(count)), state: self.state.clone(), } } @@ -115,11 +115,11 @@ impl InputTake for ParserInput<'_> { let (first, second) = self.span.0.take_split(count); ( Self { - span: ProgramSpan(first), + span: Span(first), state: self.state.clone(), }, Self { - span: ProgramSpan(second), + span: Span(second), state: self.state.clone(), }, ) @@ -207,7 +207,7 @@ where { fn slice(&self, range: R) -> Self { ParserInput { - span: ProgramSpan(self.span.0.slice(range)), + span: Span(self.span.0.slice(range)), state: self.state.clone(), } } diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 6fee3eddf..9ddb4465e 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -70,15 +70,15 @@ impl CharacterRange { /// Maker for a region of text within a string slice #[derive(Debug, Clone, Copy)] -pub struct ProgramSpan<'a>(pub(crate) LocatedSpan<&'a str>); +pub struct Span<'a>(pub(crate) LocatedSpan<&'a str>); -impl<'a> From> for ProgramSpan<'a> { +impl<'a> From> for Span<'a> { fn from(value: LocatedSpan<&'a str>) -> Self { Self(value) } } -impl<'a> ProgramSpan<'a> { +impl<'a> Span<'a> { /// Compute the [CharacterRange] for this region of text. pub fn range(&self) -> CharacterRange { let start = CharacterPosition { diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index e371732d9..014314c36 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -15,7 +15,7 @@ use hint::Hint; use translation_error::TranslationErrorKind; use validation_error::ValidationErrorKind; -use crate::parser::span::{CharacterRange, ProgramSpan}; +use crate::parser::span::{CharacterRange, Span}; use super::origin::Origin; @@ -212,7 +212,7 @@ pub struct TranslationError { impl TranslationError { /// Create a new [TranslationError] from a given [ProgramSPan]. - pub fn new<'a>(span: ProgramSpan<'a>, kind: TranslationErrorKind) -> Self { + pub fn new<'a>(span: Span<'a>, kind: TranslationErrorKind) -> Self { let message = kind.to_string(); let mut result = Self { From ce8d6429d8628fe87e38cf4d3fef9a71c91ab293 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 30 Jul 2024 12:07:44 +0200 Subject: [PATCH 134/214] Add unified syntax file --- nemo/src/lib.rs | 1 + nemo/src/syntax.rs | 384 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 385 insertions(+) create mode 100644 nemo/src/syntax.rs diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 4677dc557..ecf384c66 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -27,6 +27,7 @@ pub mod api; pub mod error; pub mod io; pub mod parser; +pub mod syntax; pub mod execution; pub mod model; diff --git a/nemo/src/syntax.rs b/nemo/src/syntax.rs new file mode 100644 index 000000000..378cbf817 --- /dev/null +++ b/nemo/src/syntax.rs @@ -0,0 +1,384 @@ +#![allow(unused)] // only temporary +//! The Syntax constants for the nemo language. +//! Every utilisation of syntax (e.g. parsing or formatting values to +//! string representation) has to reference the constants defined +//! in this module and must not use strings directly. + +/// The token used to terminate statements. +/// Although comments often get treated as statements, they don't get +/// terminated with this token. +/// The terminated statements are directives, facts and rules. +pub const STATEMENT_DELIMITER: &str = "."; +/// The token used to separate elements in a sequence. +pub const SEQUENCE_SEPARATOR: &str = ","; + +pub mod directive { + //! This module contains the syntax definitions for directives. + + /// The token identifying a directive keyword. + pub const INDICATOR_TOKEN: &str = "@"; + + /// The string used in the keyword for the base directive. + pub const BASE: &str = "base"; + + /// The string used in the keyword for the prefix directive. + pub const PREFIX: &str = "prefix"; + + /// The token used to separate prefix and name + pub const NAMESPACE_SEPARATOR: &str = "::"; + + /// The string used in the keyword for the import directive. + pub const IMPORT: &str = "import"; + + /// The token used to assign an import configuration to a predicate. + pub const IMPORT_ASSIGNMENT: &str = ":-"; + + /// The string used in the keyword for the export directive. + pub const EXPORT: &str = "export"; + + /// The token used to assign an export configuration to a predicate. + pub const EXPORT_ASSIGNMENT: &str = ":-"; + + /// The string used in the keyword for the declare directive. + pub const DECLARE: &str = "declare"; + + /// Separator for name datatype pairs in declare directives. + pub const NAME_DATATYPE_SEPARATOR: &str = ":"; + + /// The token used to separate the name and the datatype. + pub const DECLARE_ASSIGNMENT: &str = ":"; + + /// The string used in the keyword for the output directive. + pub const OUTPUT: &str = "output"; +} + +pub mod rule { + //! This module contains the syntax definitions for rules. + + /// The token separating the rule head from the rule body. + pub const ARROW: &str = ":-"; + /// The opening token for attributes. + pub const OPEN_ATTRIBUTE: &str = "#["; + /// The closing token for attributes. + pub const CLOSE_ATTRIBUTE: &str = "]"; +} +pub mod fact { + //! This module contains the syntax definitions for facts. +} + +pub mod expression { + //! This module contains the syntax definitions for expressions + + /// Syntax for atoms + pub mod atom { + /// Token for opening [Atom] term sequence. + pub const OPEN: &str = "("; + /// Token to close [Atom] term sequence. + pub const CLOSE: &str = ")"; + /// Token to negate an atom + pub const NEG: &str = "~"; + } + + /// Syntax for aggregates + pub mod aggregate { + /// Aggregate indicator + pub const INDICATOR: &str = "#"; + /// Opening delimiter + pub const OPEN: &str = "("; + /// Closing delimiter + pub const CLOSE: &str = ")"; + } + + /// Syntax for variables + pub mod variable { + /// Indicator for universal variables + pub const UNIVERSAL_INDICATOR: &str = "?"; + /// Indicator for existential variables + pub const EXISTENTIAL_INDICATOR: &str = "!"; + } + + /// Syntax for operations + pub mod operation { + /// Opening delimiter for argument list + pub const OPEN: &str = "("; + /// Closing delimiter for argument list + pub const CLOSE: &str = ")"; + } +} + +pub mod comment { + //! This module contains the syntax definitions for comments. + + /// The token identifying top level documentation comments. + pub const TOP_LEVEL: &str = "//!"; + /// The token identifying documentation comments. + pub const DOC_COMMENT: &str = "///"; + /// The token identifying normal comments. + pub const COMMENT: &str = "//"; + /// The token to handle four slashes as a norma comment and not a doc comment. + pub const COMMENT_LONG: &str = "////"; + /// The continuation of the comment syntax + pub(crate) const COMMENT_EXT: &str = "/"; + /// The opening token for closed comments. + pub const CLOSED_OPEN: &str = "/*"; + /// The closing token for closed comments. + pub const CLOSED_CLOSE: &str = "*/"; +} + +pub mod operator { + //! This module defines mathematical operators. + + /// Greater than operation + pub const GREATER: &str = ">"; + /// Greater or equal operation + pub const GREATER_EQUAL: &str = ">="; + /// Less than operation + pub const LESS: &str = "<"; + /// Less or equal operation + pub const LESS_EQUAL: &str = "<="; + /// Is equal operation + pub const EQUAL: &str = "="; + /// Is not equal operation + pub const UNEQUAL: &str = "!="; + /// Addition + pub const PLUS: &str = "+"; + /// Subtraction + pub const MINUS: &str = "-"; + /// Multiplication + pub const MUL: &str = "*"; + /// Division + pub const DIV: &str = "/"; +} + +pub mod builtin { + //! Collection of all builtin functions and aggregates. + + /// This module contains all strings of the supported builtin functions. + pub mod function { + /// Check if two values are equal to each other + pub(crate) const EQUAL: &str = "EQUAL"; + /// Check if two values are not equal to each other + pub(crate) const UNEQUAL: &str = "UNEQUAL"; + /// Check if a numeric value is greater than another + pub(crate) const GREATER: &str = "GREATER"; + /// Check if a numeric value is greater or equal to another + pub(crate) const GREATEREQ: &str = "GREATEREQ"; + /// Check if a numeric value is smaller than another + pub(crate) const LESS: &str = "LESS"; + /// Check if a numeric value is smaller or equal to another + pub(crate) const LESSEQ: &str = "LESSEQ"; + /// Check if value is an integer + pub(crate) const IS_INTEGER: &str = "isInteger"; + /// Check if value is a 32bit floating point number + pub(crate) const IS_FLOAT: &str = "isFloat"; + /// Check if value is a 64bit floating point number + pub(crate) const IS_DOUBLE: &str = "isDouble"; + /// Check if value is an iri + pub(crate) const IS_IRI: &str = "isIri"; + /// Check if value is numeric + pub(crate) const IS_NUMERIC: &str = "isNumeric"; + /// Check if value is null + pub(crate) const IS_NULL: &str = "isNull"; + /// Check if value is string + pub(crate) const IS_STRING: &str = "isString"; + /// Compute the absoule value of a number + pub(crate) const ABS: &str = "ABS"; + /// Compute the square root of a number + pub(crate) const SQRT: &str = "SQRT"; + /// Logical negation of a boolean value + pub(crate) const NOT: &str = "NOT"; + /// String representation of a value + pub(crate) const FULLSTR: &str = "fullStr"; + /// Lexical value + pub(crate) const STR: &str = "STR"; + /// Compute the sine of a value + pub(crate) const SIN: &str = "SIN"; + /// Compute the cosine of a value + pub(crate) const COS: &str = "COS"; + /// Compute the tangent of a value + pub(crate) const TAN: &str = "TAN"; + /// Compute the length of a string + pub(crate) const STRLEN: &str = "STRLEN"; + /// Compute the reverse of a string value + pub(crate) const STRREV: &str = "STRREV"; + /// Replace characters in strings with their upper case version + pub(crate) const UCASE: &str = "UCASE"; + /// Replace characters in strings with their lower case version + pub(crate) const LCASE: &str = "LCASE"; + /// Round a value to the nearest integer + pub(crate) const ROUND: &str = "ROUND"; + /// Round up to the nearest integer + pub(crate) const CEIL: &str = "CEIL"; + /// Round down to the neatest integer + pub(crate) const FLOOR: &str = "FLOOR"; + /// Return the datatype of the value + pub(crate) const DATATYPE: &str = "DATATYPE"; + /// Return the language tag of the value + pub(crate) const LANG: &str = "LANG"; + /// Convert the value to an integer + pub(crate) const INT: &str = "INT"; + /// Convert the value to a 64bit floating point number + pub(crate) const DOUBLE: &str = "DOUBLE"; + /// Convert the value to a 32bit floating point number + pub(crate) const FLOAT: &str = "FLOAT"; + /// Compute the logarithm of the numerical value + pub(crate) const LOGARITHM: &str = "LOG"; + /// Raise the numerical value to a power + pub(crate) const POW: &str = "POW"; + /// Compare two string values + pub(crate) const COMPARE: &str = "COMPARE"; + /// Check if one string value is contained in another + pub(crate) const CONTAINS: &str = "CONTAINS"; + /// Return a substring of a given string value + pub(crate) const SUBSTR: &str = "SUBSTR"; + /// Check if a string starts with a certain string + pub(crate) const STRSTARTS: &str = "STRSTARTS"; + /// Check if a string ends with a certain string + pub(crate) const STRENDS: &str = "STRENDS"; + /// Return the first part of a string split by some other string + pub(crate) const STRBEFORE: &str = "STRBEFORE"; + /// Return the second part of a string split by some other string + pub(crate) const STRAFTER: &str = "STRAFTER"; + /// Compute the remainder of two numerical values + pub(crate) const REM: &str = "REM"; + /// Compute the and on the bit representation of integer values + pub(crate) const BITAND: &str = "BITAND"; + /// Compute the or on the bit representation of integer values + pub(crate) const BITOR: &str = "BITOR"; + /// Compute the exclusive or on the bit representation of integer values + pub(crate) const BITXOR: &str = "BITXOR"; + /// Compute the maximum of numeric values + pub(crate) const MAX: &str = "MAX"; + /// Compute the minimum of numeric values + pub(crate) const MIN: &str = "MIN"; + /// Compute the lukasiewicz norm of numeric values + pub(crate) const LUKA: &str = "LUKA"; + /// Compute the sum of numerical values + pub(crate) const SUM: &str = "SUM"; + /// Compute the product of numerical values + pub(crate) const PRODUCT: &str = "PROD"; + /// Compute the difference between to numeric values + pub(crate) const SUBTRACTION: &str = "MINUS"; + /// Compute the quotient of two numeric values + pub(crate) const DIVISION: &str = "DIV"; + /// Compute the multiplicative inverse of a numeric value + pub(crate) const INVERSE: &str = "INVERSE"; + /// Compute the logical and between boolean values + pub(crate) const AND: &str = "AND"; + /// Compute the logical or between boolean values + pub(crate) const OR: &str = "OR"; + /// Compute the concatenation of string values + pub(crate) const CONCAT: &str = "CONCAT"; + } + + /// This module contains all strings of the supported builtin aggregates. + pub mod aggregate { + /// Compute the sum of a list of numbers + pub(crate) const SUM: &str = "sum"; + /// Count the number of values + pub(crate) const COUNT: &str = "count"; + /// Return the minimum value + pub(crate) const MIN: &str = "min"; + /// Return the maximum value + pub(crate) const MAX: &str = "max"; + } +} +pub mod datatypes { + //! This module defines the syntax for all supported datatypes. + + /// Can represent values of any type + pub const ANY: &str = "any"; + /// Represents string values + pub const STRING: &str = "string"; + /// Represents 64bit integer values + pub const INT: &str = "int"; + /// Represents 64bit floating-point values + pub const DOUBLE: &str = "double"; + /// Represents 32bit floating-point values + pub const FLOAT: &str = "float"; +} + +pub mod datavalues { + //! This module defines the syntax for datavalues. + pub use nemo_physical::datavalues::syntax::boolean; + pub use nemo_physical::datavalues::syntax::iri; + pub use nemo_physical::datavalues::syntax::map; + pub use nemo_physical::datavalues::syntax::string; + pub use nemo_physical::datavalues::syntax::tuple; + pub use nemo_physical::datavalues::syntax::RDF_DATATYPE_INDICATOR; + + /// Anonymous values such as variables or names + pub const ANONYMOUS: &str = "_"; + /// Dot for decimal numbers + pub const DOT: &str = "."; +} + +pub mod import_export { + //! This module defines the import/export configuration options. + + pub mod attribute { + //! This module defines all the keys + /// Name of the attribute for specifying the resource in import/export directives. + pub const RESOURCE: &str = "resource"; + /// Name of the attribute for specifying the format in import/export directives. + pub const FORMAT: &str = "format"; + /// Name of the attribute for specifying a base IRI in import/export directives. + pub const BASE: &str = "base"; + /// Name of the attribute for specifying a delimiter in import/export directives for delimiter-separated values format. + pub const DSV_DELIMITER: &str = "delimiter"; + /// Name of the attribute for specifying the compression in import/export directives. + pub const COMPRESSION: &str = "compression"; + /// Name of the attribute for specifying the limit in import/export directives. + pub const LIMIT: &str = "limit"; + // compression + /// The name of the compression format that means "no compression". + pub const VALUE_COMPRESSION_NONE: &str = "none"; + /// The name of the compression format that means "no compression". + pub const VALUE_COMPRESSION_GZIP: &str = "gzip"; + } + + pub mod file_format { + //! All the "predicate names" used in the maps in import/export directives. + + /// The "predicate name" used for the CSV format in import/export directives. + pub const CSV: &str = "csv"; + /// The "predicate name" used for the DSV format in import/export directives. + pub const DSV: &str = "dsv"; + /// The "predicate name" used for the TSV format in import/export directives. + pub const TSV: &str = "tsv"; + /// The "predicate name" used for the generic RDF format in import/export directives. + pub const RDF_UNSPECIFIED: &str = "rdf"; + /// The "predicate name" used for the Ntriples format in import/export directives. + pub const RDF_NTRIPLES: &str = "ntriples"; + /// The "predicate name" used for the NQuads format in import/export directives. + pub const RDF_NQUADS: &str = "nquads"; + /// The "predicate name" used for the Turtle format in import/export directives. + pub const RDF_TURTLE: &str = "turtle"; + /// The "predicate name" used for the TriG format in import/export directives. + pub const RDF_TRIG: &str = "trig"; + /// The "predicate name" used for the RDF/XML format in import/export directives. + pub const RDF_XML: &str = "rdfxml"; + /// The "predicate name" used for the json format in import/export directives. + pub const JSON: &str = "json"; + + // file extensions + /// The file extension used for CSV files + pub(crate) const EXTENSION_CSV: &str = "csv"; + /// The file extension used for TSV files + pub(crate) const EXTENSION_TSV: &str = "csv"; + /// The file extension used for DSV files + pub(crate) const EXTENSION_DSV: &str = "csv"; + /// The file extension used for Ntriples files + pub(crate) const EXTENSION_RDF_NTRIPLES: &str = "nt"; + /// The file extension used for NQuads files + pub(crate) const EXTENSION_RDF_NQUADS: &str = "nq"; + /// The file extension used for Turtle files + pub(crate) const EXTENSION_RDF_TURTLE: &str = "ttl"; + /// The file extension used for TriG files + pub(crate) const EXTENSION_RDF_TRIG: &str = "trig"; + /// The file extension used for RDF/XML files + pub(crate) const EXTENSION_RDF_XML: &str = "rdf"; + /// The file extension used for json files + pub(crate) const EXTENSION_JSON: &str = "json"; + } +} From f1a68054b37c7e4292c32d20568197ad8b0b015e Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 30 Jul 2024 12:07:44 +0200 Subject: [PATCH 135/214] Use syntax constants instead of str values --- .../src/datavalues/boolean_datavalue.rs | 22 +++++-- nemo-physical/src/datavalues/map_datavalue.rs | 21 +++---- .../src/datavalues/other_datavalue.rs | 4 +- nemo-physical/src/datavalues/syntax.rs | 60 ++++++++++++++----- .../src/datavalues/tuple_datavalue.rs | 17 +++--- 5 files changed, 81 insertions(+), 43 deletions(-) diff --git a/nemo-physical/src/datavalues/boolean_datavalue.rs b/nemo-physical/src/datavalues/boolean_datavalue.rs index 1501019c1..53192839d 100644 --- a/nemo-physical/src/datavalues/boolean_datavalue.rs +++ b/nemo-physical/src/datavalues/boolean_datavalue.rs @@ -1,6 +1,12 @@ //! This module provides implementations [DataValue]s that represent boolean values. -use super::{DataValue, ValueDomain}; +use super::{ + syntax::{ + boolean::{FALSE, TRUE}, + RDF_DATATYPE_INDICATOR, + }, + DataValue, ValueDomain, +}; /// Physical representation of a boolean value #[repr(transparent)] @@ -21,9 +27,9 @@ impl DataValue for BooleanDataValue { fn lexical_value(&self) -> String { if self.0 { - "true".to_string() + TRUE.to_string() } else { - "false".to_string() + FALSE.to_string() } } @@ -41,9 +47,15 @@ impl DataValue for BooleanDataValue { fn canonical_string(&self) -> String { if self.0 { - "\"true\"^^<".to_string() + &self.datatype_iri() + ">" + format!( + "\"{TRUE}\"{RDF_DATATYPE_INDICATOR}<{}>", + &self.datatype_iri() + ) } else { - "\"false\"^^<".to_string() + &self.datatype_iri() + ">" + format!( + "\"{FALSE}\"{RDF_DATATYPE_INDICATOR}<{}>", + &self.datatype_iri() + ) } } } diff --git a/nemo-physical/src/datavalues/map_datavalue.rs b/nemo-physical/src/datavalues/map_datavalue.rs index e09092135..17014cb10 100644 --- a/nemo-physical/src/datavalues/map_datavalue.rs +++ b/nemo-physical/src/datavalues/map_datavalue.rs @@ -3,10 +3,7 @@ use std::collections::BTreeMap; -use super::{ - syntax::{DELIM_MAP_CLOSE, DELIM_MAP_OPEN, MAP_ASSIGN, MAP_SEPARATOR}, - AnyDataValue, DataValue, IriDataValue, ValueDomain, -}; +use super::{syntax::map, AnyDataValue, DataValue, IriDataValue, ValueDomain}; /// Physical representation of a finite map on [DataValue]s. /// @@ -57,16 +54,16 @@ impl DataValue for MapDataValue { .iter() .map(|v| { DataValue::canonical_string(v.0) - + MAP_ASSIGN + + map::KEY_VALUE_ASSIGN + DataValue::canonical_string(v.1).as_str() }) - .intersperse(MAP_SEPARATOR.to_string()) + .intersperse(map::SEPARATOR.to_string()) .collect::(); if let Some(iri) = self.label() { - iri.canonical_string() + DELIM_MAP_OPEN + pairs.as_str() + DELIM_MAP_CLOSE + iri.canonical_string() + map::OPEN + pairs.as_str() + map::CLOSE } else { - DELIM_MAP_OPEN.to_string() + pairs.as_str() + DELIM_MAP_CLOSE + map::OPEN.to_string() + pairs.as_str() + map::CLOSE } } @@ -117,19 +114,19 @@ impl std::fmt::Display for MapDataValue { if let Some(iri) = self.label() { iri.fmt(f)?; } - f.write_str(DELIM_MAP_OPEN)?; + f.write_str(map::OPEN)?; let mut first = true; for (key, value) in self.pairs.iter() { if first { first = false; } else { - f.write_str(MAP_SEPARATOR)?; + f.write_str(map::SEPARATOR)?; } key.fmt(f)?; - f.write_str(MAP_ASSIGN)?; + f.write_str(map::KEY_VALUE_ASSIGN)?; value.fmt(f)?; } - f.write_str(DELIM_MAP_CLOSE) + f.write_str(map::CLOSE) } } diff --git a/nemo-physical/src/datavalues/other_datavalue.rs b/nemo-physical/src/datavalues/other_datavalue.rs index a98292fad..aaf6acb28 100644 --- a/nemo-physical/src/datavalues/other_datavalue.rs +++ b/nemo-physical/src/datavalues/other_datavalue.rs @@ -1,7 +1,7 @@ //! This module provides implementations [DataValue]s that represent datavalues for //! which we have no specific handling. -use super::{DataValue, ValueDomain}; +use super::{syntax::RDF_DATATYPE_INDICATOR, DataValue, ValueDomain}; /// Physical representation of arbitrary datavalues using two Strings, one /// for the lexical value and one for the datatype IRI. @@ -31,7 +31,7 @@ impl DataValue for OtherDataValue { fn canonical_string(&self) -> String { super::datavalue::quote_string(self.0.as_str()) - + "^^" + + RDF_DATATYPE_INDICATOR + &super::datavalue::quote_iri(self.1.as_str()) } } diff --git a/nemo-physical/src/datavalues/syntax.rs b/nemo-physical/src/datavalues/syntax.rs index 293f1adbb..2c0fdf3fd 100644 --- a/nemo-physical/src/datavalues/syntax.rs +++ b/nemo-physical/src/datavalues/syntax.rs @@ -2,20 +2,52 @@ //! These are kept in one location, since they are required in various //! places related to parsing and display. -/// Opening delimiter for tuples. -pub const DELIM_TUPLE_OPEN: &str = "("; -/// Closing delimiter for tuples. -pub const DELIM_TUPLE_CLOSE: &str = ")"; -/// Opening delimiter for tuples. -pub const DELIM_MAP_OPEN: &str = "{"; -/// Closing delimiter for tuples. -pub const DELIM_MAP_CLOSE: &str = "}"; -/// Assignment operator for maps. -pub const MAP_ASSIGN: &str = "="; -/// Separator for key-value pairs in maps -pub const MAP_SEPARATOR: &str = ","; -/// Separator for elements of tuples -pub const TUPLE_SEPARATOR: &str = ","; +/// This module defines the syntax for tuple values +pub mod tuple { + /// Opening delimiter for tuples. + pub const OPEN: &str = "("; + /// Closing delimiter for tuples. + pub const CLOSE: &str = ")"; + /// Separator for elements of tuples + pub const SEPARATOR: &str = ","; +} + +/// This module defines the syntax for map values +pub mod map { + /// Opening delimiter for tuples. + pub const OPEN: &str = "{"; + /// Closing delimiter for tuples. + pub const CLOSE: &str = "}"; + /// Assignment operator for maps. + pub const KEY_VALUE_ASSIGN: &str = ":"; + /// Separator for key-value pairs in maps + pub const SEPARATOR: &str = ","; +} + +/// This module defines the boolean values +pub mod boolean { + /// True + pub const TRUE: &str = "true"; + /// False + pub const FALSE: &str = "false"; +} + +/// This module defines the syntax for IRIs +pub mod iri { + /// Opening delimiter for IRIs + pub const OPEN: &str = "<"; + /// Closing delimiter for IRIs + pub const CLOSE: &str = ">"; +} + +/// This module defines the syntax for strings +pub mod string { + /// Language tag indicator after strings + pub const LANG_TAG: &str = "@"; +} + +/// RDF datatype indicator +pub const RDF_DATATYPE_INDICATOR: &str = "^^"; /// Initial part of IRI in all XML Schema types: pub const XSD_PREFIX: &str = "http://www.w3.org/2001/XMLSchema#"; diff --git a/nemo-physical/src/datavalues/tuple_datavalue.rs b/nemo-physical/src/datavalues/tuple_datavalue.rs index 8eb975ee8..c9a62df5c 100644 --- a/nemo-physical/src/datavalues/tuple_datavalue.rs +++ b/nemo-physical/src/datavalues/tuple_datavalue.rs @@ -3,10 +3,7 @@ use std::sync::Arc; -use super::{ - syntax::{DELIM_TUPLE_CLOSE, DELIM_TUPLE_OPEN, TUPLE_SEPARATOR}, - AnyDataValue, DataValue, IriDataValue, ValueDomain, -}; +use super::{syntax::tuple, AnyDataValue, DataValue, IriDataValue, ValueDomain}; /// Physical representation of a fixed-length tuple of [DataValue]s. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] @@ -49,13 +46,13 @@ impl DataValue for TupleDataValue { .iter() .map(DataValue::canonical_string) //.by_ref() - .intersperse(TUPLE_SEPARATOR.to_string()) + .intersperse(tuple::SEPARATOR.to_string()) .collect::(); if let Some(iri) = self.label() { - iri.canonical_string() + DELIM_TUPLE_OPEN + values.as_str() + DELIM_TUPLE_CLOSE + iri.canonical_string() + tuple::OPEN + values.as_str() + tuple::CLOSE } else { - DELIM_TUPLE_OPEN.to_string() + values.as_str() + DELIM_TUPLE_CLOSE + tuple::OPEN.to_string() + values.as_str() + tuple::CLOSE } } @@ -100,17 +97,17 @@ impl std::fmt::Display for TupleDataValue { if let Some(iri) = self.label() { iri.fmt(f)?; } - f.write_str(DELIM_TUPLE_OPEN)?; + f.write_str(tuple::OPEN)?; let mut first = true; for v in self.values.iter() { if first { first = false; } else { - f.write_str(TUPLE_SEPARATOR)?; + f.write_str(tuple::SEPARATOR)?; } v.fmt(f)?; } - f.write_str(DELIM_TUPLE_CLOSE) + f.write_str(tuple::CLOSE) } } From 1c32885a7d16cc4da201e67e6651a36797b5c3b0 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 30 Jul 2024 12:23:55 +0200 Subject: [PATCH 136/214] Change syntax constant source to new syntax file --- nemo/src/rule_model/components/datatype.rs | 12 +- .../components/import_export/attributes.rs | 14 +-- .../components/import_export/compression.rs | 6 +- .../components/import_export/file_formats.rs | 42 +++---- .../rule_model/components/term/aggregate.rs | 20 +-- .../term/operation/operation_kind.rs | 116 +++++++++--------- nemo/src/rule_model/syntax.rs | 8 -- nemo/src/rule_model/syntax/aggregates.rs | 10 -- nemo/src/rule_model/syntax/builtins.rs | 116 ------------------ nemo/src/rule_model/syntax/datatypes.rs | 12 -- nemo/src/rule_model/syntax/import_export.rs | 5 - .../syntax/import_export/attributes.rs | 14 --- .../syntax/import_export/compression.rs | 6 - .../syntax/import_export/file_formats.rs | 41 ------- nemo/src/rule_model/translation.rs | 20 +-- 15 files changed, 117 insertions(+), 325 deletions(-) delete mode 100644 nemo/src/rule_model/syntax.rs delete mode 100644 nemo/src/rule_model/syntax/aggregates.rs delete mode 100644 nemo/src/rule_model/syntax/builtins.rs delete mode 100644 nemo/src/rule_model/syntax/datatypes.rs delete mode 100644 nemo/src/rule_model/syntax/import_export.rs delete mode 100644 nemo/src/rule_model/syntax/import_export/attributes.rs delete mode 100644 nemo/src/rule_model/syntax/import_export/compression.rs delete mode 100644 nemo/src/rule_model/syntax/import_export/file_formats.rs diff --git a/nemo/src/rule_model/components/datatype.rs b/nemo/src/rule_model/components/datatype.rs index 278087728..60dfc12e8 100644 --- a/nemo/src/rule_model/components/datatype.rs +++ b/nemo/src/rule_model/components/datatype.rs @@ -4,24 +4,24 @@ use enum_assoc::Assoc; use strum_macros::EnumIter; -use crate::rule_model::syntax::datatypes; +use crate::syntax::datatypes; #[derive(Assoc, EnumIter, Debug, Copy, Clone, PartialEq, Eq)] #[func(pub fn name(&self) -> &'static str)] pub enum DataType { /// 64bit integer number - #[assoc(name = datatypes::DATATYPE_INT)] + #[assoc(name = datatypes::INT)] Integer, /// 32bit floating point number - #[assoc(name = datatypes::DATATYPE_FLOAT)] + #[assoc(name = datatypes::FLOAT)] Float, /// 64bit floating point number - #[assoc(name = datatypes::DATATYPE_DOUBLE)] + #[assoc(name = datatypes::DOUBLE)] Double, /// String - #[assoc(name = datatypes::DATATYPE_STRING)] + #[assoc(name = datatypes::STRING)] String, /// Any data value - #[assoc(name = datatypes::DATATYPE_ANY)] + #[assoc(name = datatypes::ANY)] Any, } diff --git a/nemo/src/rule_model/components/import_export/attributes.rs b/nemo/src/rule_model/components/import_export/attributes.rs index 159bb857c..65dbba5c8 100644 --- a/nemo/src/rule_model/components/import_export/attributes.rs +++ b/nemo/src/rule_model/components/import_export/attributes.rs @@ -5,29 +5,29 @@ use std::{fmt::Display, hash::Hash}; use enum_assoc::Assoc; -use crate::rule_model::syntax::import_export::attributes; +use crate::syntax::import_export::attribute; /// Supported attributes in import/export directives #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq, Hash)] #[func(pub fn name(&self) -> &'static str)] pub enum ImportExportAttribute { /// Location of the file - #[assoc(name = attributes::ATTRIBUTE_NAME_RESOURCE)] + #[assoc(name = attribute::RESOURCE)] Resource, /// Data types of the input relations - #[assoc(name = attributes::ATTRIBUTE_NAME_FORMAT)] + #[assoc(name = attribute::FORMAT)] Format, /// Base IRI - #[assoc(name = attributes::ATTRIBUTE_NAME_BASE)] + #[assoc(name = attribute::BASE)] Base, /// Delimiter used to separate values - #[assoc(name = attributes::ATTRIBUTE_NAME_DSV_DELIMITER)] + #[assoc(name = attribute::DSV_DELIMITER)] Delimiter, /// Compression format - #[assoc(name = attributes::ATTRIBUTE_NAME_COMPRESSION)] + #[assoc(name = attribute::COMPRESSION)] Compression, /// Limit import/export to first n number of facts - #[assoc(name = attributes::ATTRIBUTE_NAME_LIMIT)] + #[assoc(name = attribute::LIMIT)] Limit, } diff --git a/nemo/src/rule_model/components/import_export/compression.rs b/nemo/src/rule_model/components/import_export/compression.rs index 2e7c9bcc3..bd1b3b9f5 100644 --- a/nemo/src/rule_model/components/import_export/compression.rs +++ b/nemo/src/rule_model/components/import_export/compression.rs @@ -5,17 +5,17 @@ use std::fmt::Display; use enum_assoc::Assoc; -use crate::rule_model::syntax::import_export::compression; +use crate::syntax::import_export::attribute; /// Compression formats #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] #[func(pub fn name(&self) -> &'static str)] pub enum CompressionFormat { /// No compression - #[assoc(name = compression::VALUE_COMPRESSION_NONE)] + #[assoc(name = attribute::VALUE_COMPRESSION_NONE)] None, /// GZip compression - #[assoc(name = compression::VALUE_COMPRESSION_GZIP)] + #[assoc(name = attribute::VALUE_COMPRESSION_GZIP)] GZip, } diff --git a/nemo/src/rule_model/components/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs index 7d9c7ce99..e4ae11364 100644 --- a/nemo/src/rule_model/components/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -5,9 +5,9 @@ use std::{collections::HashMap, fmt::Display}; use enum_assoc::Assoc; -use crate::rule_model::{ - components::import_export::attributes::ImportExportAttribute, - syntax::import_export::file_formats, +use crate::{ + rule_model::components::import_export::attributes::ImportExportAttribute, + syntax::import_export::file_format, }; /// Marks whether a an attribute is required or optional @@ -26,64 +26,64 @@ pub(crate) enum AttributeRequirement { #[func(pub fn attributes(&self) -> HashMap)] pub enum FileFormat { /// Comma-separated values - #[assoc(name = file_formats::FILE_FORMAT_CSV)] - #[assoc(extension = file_formats::EXTENSION_CSV)] + #[assoc(name = file_format::CSV)] + #[assoc(extension = file_format::EXTENSION_CSV)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] CSV, /// Delimiter-separated values - #[assoc(name = file_formats::FILE_FORMAT_DSV)] - #[assoc(extension = file_formats::EXTENSION_DSV)] + #[assoc(name = file_format::DSV)] + #[assoc(extension = file_format::EXTENSION_DSV)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] DSV, /// Tab-separated values - #[assoc(name = file_formats::FILE_FORMAT_TSV)] - #[assoc(extension = file_formats::EXTENSION_TSV)] + #[assoc(name = file_format::TSV)] + #[assoc(extension = file_format::EXTENSION_TSV)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] TSV, /// JSON objects - #[assoc(name = file_formats::FILE_FORMAT_JSON)] - #[assoc(extension = file_formats::EXTENSION_JSON)] + #[assoc(name = file_format::JSON)] + #[assoc(extension = file_format::EXTENSION_JSON)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] JSON, /// RDF 1.1 N-Triples - #[assoc(name = file_formats::FILE_FORMAT_RDF_NTRIPLES)] - #[assoc(extension = file_formats::EXTENSION_RDF_NTRIPLES)] + #[assoc(name = file_format::RDF_NTRIPLES)] + #[assoc(extension = file_format::EXTENSION_RDF_NTRIPLES)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] NTriples, /// RDF 1.1 N-Quads - #[assoc(name = file_formats::FILE_FORMAT_RDF_NQUADS)] - #[assoc(extension = file_formats::EXTENSION_RDF_NQUADS)] + #[assoc(name = file_format::RDF_NQUADS)] + #[assoc(extension = file_format::EXTENSION_RDF_NQUADS)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] NQuads, /// RDF 1.1 Turtle - #[assoc(name = file_formats::FILE_FORMAT_RDF_TURTLE)] - #[assoc(extension = file_formats::EXTENSION_RDF_TURTLE)] + #[assoc(name = file_format::RDF_TURTLE)] + #[assoc(extension = file_format::EXTENSION_RDF_TURTLE)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] Turtle, /// RDF 1.1 RDF/XML - #[assoc(name = file_formats::FILE_FORMAT_RDF_XML)] - #[assoc(extension = file_formats::EXTENSION_RDF_XML)] + #[assoc(name = file_format::RDF_XML)] + #[assoc(extension = file_format::EXTENSION_RDF_XML)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] RDFXML, /// RDF 1.1 TriG - #[assoc(name = file_formats::FILE_FORMAT_RDF_TRIG)] - #[assoc(extension = file_formats::EXTENSION_RDF_TRIG)] + #[assoc(name = file_format::RDF_TRIG)] + #[assoc(extension = file_format::EXTENSION_RDF_TRIG)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 2b44682fc..4135e61dd 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -6,11 +6,13 @@ use std::{fmt::Display, hash::Hash}; use enum_assoc::Assoc; use strum_macros::EnumIter; -use crate::rule_model::{ - components::{IterableVariables, ProgramComponent}, - error::ValidationErrorBuilder, - origin::Origin, - syntax::aggregates, +use crate::{ + rule_model::{ + components::{IterableVariables, ProgramComponent}, + error::ValidationErrorBuilder, + origin::Origin, + }, + syntax::builtin::aggregate, }; use super::{primitive::variable::Variable, Term}; @@ -20,16 +22,16 @@ use super::{primitive::variable::Variable, Term}; #[func(pub fn name(&self) -> &'static str)] pub enum AggregateKind { /// Count of distinct values - #[assoc(name = aggregates::AGGREGATE_COUNT)] + #[assoc(name = aggregate::COUNT)] CountValues, /// Minimum numerical value - #[assoc(name = aggregates::AGGREGATE_MIN)] + #[assoc(name = aggregate::MIN)] MinNumber, /// Maximum numerical value - #[assoc(name = aggregates::AGGREGATE_MAX)] + #[assoc(name = aggregate::MAX)] MaxNumber, /// Sum of numerical values - #[assoc(name = aggregates::AGGREGATE_SUM)] + #[assoc(name = aggregate::SUM)] SumOfNumbers, } diff --git a/nemo/src/rule_model/components/term/operation/operation_kind.rs b/nemo/src/rule_model/components/term/operation/operation_kind.rs index 5d3cccb04..a77206da5 100644 --- a/nemo/src/rule_model/components/term/operation/operation_kind.rs +++ b/nemo/src/rule_model/components/term/operation/operation_kind.rs @@ -6,7 +6,7 @@ use std::fmt::Display; use enum_assoc::Assoc; use strum_macros::EnumIter; -use crate::rule_model::syntax::builtins; +use crate::syntax::builtin::function; /// Number of arguments supported by an operation #[derive(Debug)] @@ -45,287 +45,287 @@ impl OperationNumArguments { #[func(pub fn is_boolean(&self) -> bool)] pub enum OperationKind { /// Equality - #[assoc(name = builtins::BUILTIN_EQUAL)] + #[assoc(name = function::EQUAL)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = true)] Equal, /// Inequality - #[assoc(name = builtins::BUILTIN_UNEQUAL)] + #[assoc(name = function::UNEQUAL)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = true)] Unequals, /// Sum of numeric values - #[assoc(name = builtins::BUILTIN_SUM)] + #[assoc(name = function::SUM)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(is_boolean = false)] NumericSum, /// Subtraction between two numeric values - #[assoc(name = builtins::BUILTIN_SUBTRACTION)] + #[assoc(name = function::SUBTRACTION)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericSubtraction, /// Product of numeric values - #[assoc(name = builtins::BUILTIN_PRODUCT)] + #[assoc(name = function::PRODUCT)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(is_boolean = false)] NumericProduct, /// Division between two numeric values - #[assoc(name = builtins::BUILTIN_DIVISION)] + #[assoc(name = function::DIVISION)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericDivision, /// Logarithm of a numeric value to some numeric base #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(name = builtins::BUILTIN_LOGARITHM)] + #[assoc(name = function::LOGARITHM)] #[assoc(is_boolean = false)] NumericLogarithm, /// Numeric value raised to another numeric value - #[assoc(name = builtins::BUILTIN_POW)] + #[assoc(name = function::POW)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericPower, /// Remainder of a division between two numeric values - #[assoc(name = builtins::BUILTIN_REM)] + #[assoc(name = function::REM)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericRemainder, /// Numeric greater than or equals comparison - #[assoc(name = builtins::BUILTIN_GREATEREQ)] + #[assoc(name = function::GREATEREQ)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericGreaterthaneq, /// Numeric greater than comparison - #[assoc(name = builtins::BUILTIN_GREATER)] + #[assoc(name = function::GREATER)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericGreaterthan, /// Numeric less than or equals comparison - #[assoc(name = builtins::BUILTIN_LESSEQ)] + #[assoc(name = function::LESSEQ)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericLessthaneq, /// Numeric less than comparison - #[assoc(name = builtins::BUILTIN_LESS)] + #[assoc(name = function::LESS)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] NumericLessthan, /// Lexicographic comparison between strings - #[assoc(name = builtins::BUILTIN_COMPARE)] + #[assoc(name = function::COMPARE)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] StringCompare, /// Check whether string is contained in another, correspondng to SPARQL function CONTAINS. - #[assoc(name = builtins::BUILTIN_CONTAINS)] + #[assoc(name = function::CONTAINS)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = true)] StringContains, /// String starting at some start position - #[assoc(name = builtins::BUILTIN_SUBSTR)] + #[assoc(name = function::SUBSTR)] #[assoc(num_arguments = OperationNumArguments::Choice(vec![OperationNumArguments::Binary, OperationNumArguments::Ternary]))] #[assoc(is_boolean = false)] StringSubstring, /// First part of a string split by some other string - #[assoc(name = builtins::BUILTIN_STRBEFORE)] + #[assoc(name = function::STRBEFORE)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] StringBefore, /// Second part of a string split by some other string - #[assoc(name = builtins::BUILTIN_STRAFTER)] + #[assoc(name = function::STRAFTER)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = false)] StringAfter, /// Whether string starts with a certain string - #[assoc(name = builtins::BUILTIN_STRSTARTS)] + #[assoc(name = function::STRSTARTS)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = true)] StringStarts, /// Whether string ends with a certain string - #[assoc(name = builtins::BUILTIN_STRENDS)] + #[assoc(name = function::STRENDS)] #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(is_boolean = true)] StringEnds, /// Boolean negation - #[assoc(name = builtins::BUILTIN_NOT)] + #[assoc(name = function::NOT)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = true)] BooleanNegation, /// Cast to double - #[assoc(name = builtins::BUILTIN_DOUBLE)] + #[assoc(name = function::DOUBLE)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] CastToDouble, /// Cast to float - #[assoc(name = builtins::BUILTIN_FLOAT)] + #[assoc(name = function::FLOAT)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] CastToFloat, /// Cast to integer - #[assoc(name = builtins::BUILTIN_INT)] + #[assoc(name = function::INT)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] CastToInteger, /// Canonical string representation of a value - #[assoc(name = builtins::BUILTIN_FULLSTR)] + #[assoc(name = function::FULLSTR)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] CanonicalString, /// Check if value is an integer - #[assoc(name = builtins::BUILTIN_IS_INTEGER)] + #[assoc(name = function::IS_INTEGER)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = true)] CheckIsInteger, /// Check if value is a float - #[assoc(name = builtins::BUILTIN_IS_FLOAT)] + #[assoc(name = function::IS_FLOAT)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = true)] CheckIsFloat, /// Check if value is a double - #[assoc(name = builtins::BUILTIN_IS_DOUBLE)] + #[assoc(name = function::IS_DOUBLE)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = true)] CheckIsDouble, /// Check if value is an iri - #[assoc(name = builtins::BUILTIN_IS_IRI)] + #[assoc(name = function::IS_IRI)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = true)] CheckIsIri, /// Check if value is numeric - #[assoc(name = builtins::BUILTIN_IS_NUMERIC)] + #[assoc(name = function::IS_NUMERIC)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = true)] CheckIsNumeric, /// Check if value is a null - #[assoc(name = builtins::BUILTIN_IS_NULL)] + #[assoc(name = function::IS_NULL)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = true)] CheckIsNull, /// Check if value is a string - #[assoc(name = builtins::BUILTIN_IS_STRING)] + #[assoc(name = function::IS_STRING)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = true)] CheckIsString, /// Get datatype of a value - #[assoc(name = builtins::BUILTIN_DATATYPE)] + #[assoc(name = function::DATATYPE)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] Datatype, /// Get language tag of a languaged tagged string - #[assoc(name = builtins::BUILTIN_LANG)] + #[assoc(name = function::LANG)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] LanguageTag, /// Absolute value of a numeric value - #[assoc(name = builtins::BUILTIN_ABS)] + #[assoc(name = function::ABS)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericAbsolute, /// Cosine of a numeric valueloga - #[assoc(name = builtins::BUILTIN_COS)] + #[assoc(name = function::COS)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericCosine, /// Rounding up of a numeric value - #[assoc(name = builtins::BUILTIN_CEIL)] + #[assoc(name = function::CEIL)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericCeil, /// Rounding down of a numeric value - #[assoc(name = builtins::BUILTIN_FLOOR)] + #[assoc(name = function::FLOOR)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericFloor, /// Additive inverse of a numeric value - #[assoc(name = builtins::BUILTIN_INVERSE)] + #[assoc(name = function::INVERSE)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericNegation, /// Rounding of a numeric value - #[assoc(name = builtins::BUILTIN_ROUND)] + #[assoc(name = function::ROUND)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericRound, /// Sine of a numeric value - #[assoc(name = builtins::BUILTIN_SIN)] + #[assoc(name = function::SIN)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericSine, /// Square root of a numeric value - #[assoc(name = builtins::BUILTIN_SQRT)] + #[assoc(name = function::SQRT)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericSquareroot, /// Tangent of a numeric value - #[assoc(name = builtins::BUILTIN_TAN)] + #[assoc(name = function::TAN)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] NumericTangent, /// Length of a string value - #[assoc(name = builtins::BUILTIN_STRLEN)] + #[assoc(name = function::STRLEN)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] StringLength, /// Reverse of a string value - #[assoc(name = builtins::BUILTIN_STRREV)] + #[assoc(name = function::STRREV)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] StringReverse, /// String converted to lowercase letters - #[assoc(name = builtins::BUILTIN_LCASE)] + #[assoc(name = function::LCASE)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] StringLowercase, /// String converted to uppercase letters - #[assoc(name = builtins::BUILTIN_UCASE)] + #[assoc(name = function::UCASE)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] StringUppercase, /// Bitwise and operation - #[assoc(name = builtins::BUILTIN_BITAND)] + #[assoc(name = function::BITAND)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] BitAnd, /// Bitwise or operation - #[assoc(name = builtins::BUILTIN_BITOR)] + #[assoc(name = function::BITOR)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] BitOr, /// Bitwise xor operation - #[assoc(name = builtins::BUILTIN_BITXOR)] + #[assoc(name = function::BITXOR)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] BitXor, /// Conjunction of boolean values - #[assoc(name = builtins::BUILTIN_AND)] + #[assoc(name = function::AND)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(is_boolean = true)] BooleanConjunction, /// Disjunction of boolean values - #[assoc(name = builtins::BUILTIN_OR)] + #[assoc(name = function::OR)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(is_boolean = true)] BooleanDisjunction, /// Minimum of numeric values - #[assoc(name = builtins::BUILTIN_MIN)] + #[assoc(name = function::MIN)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(is_boolean = false)] NumericMinimum, /// Maximum of numeric values - #[assoc(name = builtins::BUILTIN_MAX)] + #[assoc(name = function::MAX)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(is_boolean = false)] NumericMaximum, /// Lukasiewicz norm of numeric values - #[assoc(name = builtins::BUILTIN_LUKA)] + #[assoc(name = function::LUKA)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(is_boolean = false)] NumericLukasiewicz, /// Concatentation of two string values, correspondng to SPARQL function CONCAT. - #[assoc(name = builtins::BUILTIN_CONCAT)] + #[assoc(name = function::CONCAT)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] StringConcatenation, /// Lexical value - #[assoc(name = builtins::BUILTIN_STR)] + #[assoc(name = function::STR)] #[assoc(num_arguments = OperationNumArguments::Unary)] #[assoc(is_boolean = false)] LexicalValue, diff --git a/nemo/src/rule_model/syntax.rs b/nemo/src/rule_model/syntax.rs deleted file mode 100644 index c6a0042ee..000000000 --- a/nemo/src/rule_model/syntax.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! Constants for strings that are relevant to the syntax of rules. -//! These are kept in one location, since they are required in various -//! places related to parsing and display. - -pub(crate) mod aggregates; -pub(crate) mod builtins; -pub(crate) mod datatypes; -pub(crate) mod import_export; diff --git a/nemo/src/rule_model/syntax/aggregates.rs b/nemo/src/rule_model/syntax/aggregates.rs deleted file mode 100644 index 262b6b540..000000000 --- a/nemo/src/rule_model/syntax/aggregates.rs +++ /dev/null @@ -1,10 +0,0 @@ -//! This module contains constants relating to aggregate names. - -/// Compute the sum of a list of numbers -pub(crate) const AGGREGATE_SUM: &str = "sum"; -/// Count the number of values -pub(crate) const AGGREGATE_COUNT: &str = "count"; -/// Return the minimum value -pub(crate) const AGGREGATE_MIN: &str = "min"; -/// Return the maximum value -pub(crate) const AGGREGATE_MAX: &str = "max"; diff --git a/nemo/src/rule_model/syntax/builtins.rs b/nemo/src/rule_model/syntax/builtins.rs deleted file mode 100644 index 3ee8e1907..000000000 --- a/nemo/src/rule_model/syntax/builtins.rs +++ /dev/null @@ -1,116 +0,0 @@ -//! This module contains constants relating to the builtin functions that are supported. - -/// Check if two values are equal to each other -pub(crate) const BUILTIN_EQUAL: &str = "EQUAL"; -/// Check if two values are not equal to each other -pub(crate) const BUILTIN_UNEQUAL: &str = "UNEQUAL"; -/// Check if a numeric value is greater than another -pub(crate) const BUILTIN_GREATER: &str = "GREATER"; -/// Check if a numeric value is greater or equal to another -pub(crate) const BUILTIN_GREATEREQ: &str = "GREATEREQ"; -/// Check if a numeric value is smaller than another -pub(crate) const BUILTIN_LESS: &str = "LESS"; -/// Check if a numeric value is smaller or equal to another -pub(crate) const BUILTIN_LESSEQ: &str = "LESSEQ"; -/// Check if value is an integer -pub(crate) const BUILTIN_IS_INTEGER: &str = "isInteger"; -/// Check if value is a 32bit floating point number -pub(crate) const BUILTIN_IS_FLOAT: &str = "isFloat"; -/// Check if value is a 64bit floating point number -pub(crate) const BUILTIN_IS_DOUBLE: &str = "isDouble"; -/// Check if value is an iri -pub(crate) const BUILTIN_IS_IRI: &str = "isIri"; -/// Check if value is numeric -pub(crate) const BUILTIN_IS_NUMERIC: &str = "isNumeric"; -/// Check if value is null -pub(crate) const BUILTIN_IS_NULL: &str = "isNull"; -/// Check if value is string -pub(crate) const BUILTIN_IS_STRING: &str = "isString"; -/// Compute the absoule value of a number -pub(crate) const BUILTIN_ABS: &str = "ABS"; -/// Compute the square root of a number -pub(crate) const BUILTIN_SQRT: &str = "SQRT"; -/// Logical negation of a boolean value -pub(crate) const BUILTIN_NOT: &str = "NOT"; -/// String representation of a value -pub(crate) const BUILTIN_FULLSTR: &str = "fullStr"; -/// Lexical value -pub(crate) const BUILTIN_STR: &str = "STR"; -/// Compute the sine of a value -pub(crate) const BUILTIN_SIN: &str = "SIN"; -/// Compute the cosine of a value -pub(crate) const BUILTIN_COS: &str = "COS"; -/// Compute the tangent of a value -pub(crate) const BUILTIN_TAN: &str = "TAN"; -/// Compute the length of a string -pub(crate) const BUILTIN_STRLEN: &str = "STRLEN"; -/// Compute the reverse of a string value -pub(crate) const BUILTIN_STRREV: &str = "STRREV"; -/// Replace characters in strings with their upper case version -pub(crate) const BUILTIN_UCASE: &str = "UCASE"; -/// Replace characters in strings with their lower case version -pub(crate) const BUILTIN_LCASE: &str = "LCASE"; -/// Round a value to the nearest integer -pub(crate) const BUILTIN_ROUND: &str = "ROUND"; -/// Round up to the nearest integer -pub(crate) const BUILTIN_CEIL: &str = "CEIL"; -/// Round down to the neatest integer -pub(crate) const BUILTIN_FLOOR: &str = "FLOOR"; -/// Return the datatype of the value -pub(crate) const BUILTIN_DATATYPE: &str = "DATATYPE"; -/// Return the language tag of the value -pub(crate) const BUILTIN_LANG: &str = "LANG"; -/// Convert the value to an integer -pub(crate) const BUILTIN_INT: &str = "INT"; -/// Convert the value to a 64bit floating point number -pub(crate) const BUILTIN_DOUBLE: &str = "DOUBLE"; -/// Convert the value to a 32bit floating point number -pub(crate) const BUILTIN_FLOAT: &str = "FLOAT"; -/// Compute the logarithm of the numerical value -pub(crate) const BUILTIN_LOGARITHM: &str = "LOG"; -/// Raise the numerical value to a power -pub(crate) const BUILTIN_POW: &str = "POW"; -/// Compare two string values -pub(crate) const BUILTIN_COMPARE: &str = "COMPARE"; -/// Check if one string value is contained in another -pub(crate) const BUILTIN_CONTAINS: &str = "CONTAINS"; -/// Return a substring of a given string value -pub(crate) const BUILTIN_SUBSTR: &str = "SUBSTR"; -/// Check if a string starts with a certain string -pub(crate) const BUILTIN_STRSTARTS: &str = "STRSTARTS"; -/// Check if a string ends with a certain string -pub(crate) const BUILTIN_STRENDS: &str = "STRENDS"; -/// Return the first part of a string split by some other string -pub(crate) const BUILTIN_STRBEFORE: &str = "STRBEFORE"; -/// Return the second part of a string split by some other string -pub(crate) const BUILTIN_STRAFTER: &str = "STRAFTER"; -/// Compute the remainder of two numerical values -pub(crate) const BUILTIN_REM: &str = "REM"; -/// Compute the and on the bit representation of integer values -pub(crate) const BUILTIN_BITAND: &str = "BITAND"; -/// Compute the or on the bit representation of integer values -pub(crate) const BUILTIN_BITOR: &str = "BITOR"; -/// Compute the exclusive or on the bit representation of integer values -pub(crate) const BUILTIN_BITXOR: &str = "BITXOR"; -/// Compute the maximum of numeric values -pub(crate) const BUILTIN_MAX: &str = "MAX"; -/// Compute the minimum of numeric values -pub(crate) const BUILTIN_MIN: &str = "MIN"; -/// Compute the lukasiewicz norm of numeric values -pub(crate) const BUILTIN_LUKA: &str = "LUKA"; -/// Compute the sum of numerical values -pub(crate) const BUILTIN_SUM: &str = "SUM"; -/// Compute the product of numerical values -pub(crate) const BUILTIN_PRODUCT: &str = "PROD"; -/// Compute the difference between to numeric values -pub(crate) const BUILTIN_SUBTRACTION: &str = "MINUS"; -/// Compute the quotient of two numeric values -pub(crate) const BUILTIN_DIVISION: &str = "DIV"; -/// Compute the multiplicative inverse of a numeric value -pub(crate) const BUILTIN_INVERSE: &str = "INVERSE"; -/// Compute the logical and between boolean values -pub(crate) const BUILTIN_AND: &str = "AND"; -/// Compute the logical or between boolean values -pub(crate) const BUILTIN_OR: &str = "OR"; -/// Compute the concatenation of string values -pub(crate) const BUILTIN_CONCAT: &str = "CONCAT"; diff --git a/nemo/src/rule_model/syntax/datatypes.rs b/nemo/src/rule_model/syntax/datatypes.rs deleted file mode 100644 index dbe02911c..000000000 --- a/nemo/src/rule_model/syntax/datatypes.rs +++ /dev/null @@ -1,12 +0,0 @@ -//! This module defines constants relating to the data types recognized by nemo. - -/// Can represent values of any type -pub(crate) const DATATYPE_ANY: &str = "any"; -/// Represents string values -pub(crate) const DATATYPE_STRING: &str = "string"; -/// Represents 64bit integer values -pub(crate) const DATATYPE_INT: &str = "int"; -/// Represents 64bit floating-point values -pub(crate) const DATATYPE_DOUBLE: &str = "double"; -/// Represents 32bit floating-point values -pub(crate) const DATATYPE_FLOAT: &str = "float"; diff --git a/nemo/src/rule_model/syntax/import_export.rs b/nemo/src/rule_model/syntax/import_export.rs deleted file mode 100644 index 61c4f4440..000000000 --- a/nemo/src/rule_model/syntax/import_export.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! This module defines constants relating to import and export directives. - -pub(crate) mod attributes; -pub(crate) mod compression; -pub(crate) mod file_formats; diff --git a/nemo/src/rule_model/syntax/import_export/attributes.rs b/nemo/src/rule_model/syntax/import_export/attributes.rs deleted file mode 100644 index c36ad9a38..000000000 --- a/nemo/src/rule_model/syntax/import_export/attributes.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! This module contains constants relating to accepted attributes - -/// Name of the attribute for specifying the resource in import/export directives. -pub(crate) const ATTRIBUTE_NAME_RESOURCE: &str = "resource"; -/// Name of the attribute for specifying the format in import/export directives. -pub(crate) const ATTRIBUTE_NAME_FORMAT: &str = "format"; -/// Name of the attribute for specifying a base IRI in import/export directives. -pub(crate) const ATTRIBUTE_NAME_BASE: &str = "base"; -/// Name of the attribute for specifying a delimiter in import/export directives for delimiter-separated values format. -pub(crate) const ATTRIBUTE_NAME_DSV_DELIMITER: &str = "delimiter"; -/// Name of the attribute for specifying the compression in import/export directives. -pub(crate) const ATTRIBUTE_NAME_COMPRESSION: &str = "compression"; -/// Name of the attribute for specifying the limit in import/export directives. -pub(crate) const ATTRIBUTE_NAME_LIMIT: &str = "limit"; diff --git a/nemo/src/rule_model/syntax/import_export/compression.rs b/nemo/src/rule_model/syntax/import_export/compression.rs deleted file mode 100644 index 4ef00ebbc..000000000 --- a/nemo/src/rule_model/syntax/import_export/compression.rs +++ /dev/null @@ -1,6 +0,0 @@ -//! This module contains constants relating to compression formats accepted in import/export statements. - -/// The name of the compression format that means "no compression". -pub(crate) const VALUE_COMPRESSION_NONE: &str = "none"; -/// The name of the compression format that means "no compression". -pub(crate) const VALUE_COMPRESSION_GZIP: &str = "gzip"; diff --git a/nemo/src/rule_model/syntax/import_export/file_formats.rs b/nemo/src/rule_model/syntax/import_export/file_formats.rs deleted file mode 100644 index dc60e916f..000000000 --- a/nemo/src/rule_model/syntax/import_export/file_formats.rs +++ /dev/null @@ -1,41 +0,0 @@ -//! This module defines constants relating to the supported file formats. - -/// The "predicate name" used for the CSV format in import/export directives. -pub(crate) const FILE_FORMAT_CSV: &str = "csv"; -/// The "predicate name" used for the DSV format in import/export directives. -pub(crate) const FILE_FORMAT_DSV: &str = "dsv"; -/// The "predicate name" used for the TSV format in import/export directives. -pub(crate) const FILE_FORMAT_TSV: &str = "tsv"; -/// The "predicate name" used for the generic RDF format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_UNSPECIFIED: &str = "rdf"; -/// The "predicate name" used for the Ntriples format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_NTRIPLES: &str = "ntriples"; -/// The "predicate name" used for the NQuads format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_NQUADS: &str = "nquads"; -/// The "predicate name" used for the Turtle format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_TURTLE: &str = "turtle"; -/// The "predicate name" used for the TriG format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_TRIG: &str = "trig"; -/// The "predicate name" used for the RDF/XML format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_XML: &str = "rdfxml"; -/// The "predicate name" used for the json format in import/export directives. -pub(crate) const FILE_FORMAT_JSON: &str = "json"; - -/// The file extension used for CSV files -pub(crate) const EXTENSION_CSV: &str = "csv"; -/// The file extension used for TSV files -pub(crate) const EXTENSION_TSV: &str = "csv"; -/// The file extension used for DSV files -pub(crate) const EXTENSION_DSV: &str = "csv"; -/// The file extension used for Ntriples files -pub(crate) const EXTENSION_RDF_NTRIPLES: &str = "nt"; -/// The file extension used for NQuads files -pub(crate) const EXTENSION_RDF_NQUADS: &str = "nq"; -/// The file extension used for Turtle files -pub(crate) const EXTENSION_RDF_TURTLE: &str = "ttl"; -/// The file extension used for TriG files -pub(crate) const EXTENSION_RDF_TRIG: &str = "trig"; -/// The file extension used for RDF/XML files -pub(crate) const EXTENSION_RDF_XML: &str = "rdf"; -/// The file extension used for json files -pub(crate) const EXTENSION_JSON: &str = "json"; diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index 51a75e040..d80784966 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -133,15 +133,17 @@ impl<'a> ASTProgramTranslation<'a> { let mut program_builder = ProgramBuilder::default(); for statement in ast.statements() { - match statement.kind() { - ast::statement::StatementKind::Fact(_) => todo!(), - ast::statement::StatementKind::Rule(rule) => match self.build_rule(rule) { - Ok(new_rule) => program_builder.add_rule(new_rule), - Err(translation_error) => self - .errors - .push(ProgramError::TranslationError(translation_error)), - }, - ast::statement::StatementKind::Directive(_) => todo!(), + if let Some(statement) = statement { + match statement.kind() { + ast::statement::StatementKind::Fact(_) => todo!(), + ast::statement::StatementKind::Rule(rule) => match self.build_rule(rule) { + Ok(new_rule) => program_builder.add_rule(new_rule), + Err(translation_error) => self + .errors + .push(ProgramError::TranslationError(translation_error)), + }, + ast::statement::StatementKind::Directive(_) => todo!(), + } } } From b0abb82d2bf19cea0e8b03f21b55e1babd82fa1b Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 30 Jul 2024 12:27:58 +0200 Subject: [PATCH 137/214] Remove syntax module from rule model in favour of new crate wide syntax module --- nemo/src/rule_model.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs index bc061e0f8..0d830b668 100644 --- a/nemo/src/rule_model.rs +++ b/nemo/src/rule_model.rs @@ -4,7 +4,6 @@ pub mod util; pub(crate) mod origin; -pub(crate) mod syntax; pub mod components; pub mod error; From 15f3b6a73497398729d9bfd1b1dfdb7c9292fae1 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 30 Jul 2024 12:29:17 +0200 Subject: [PATCH 138/214] Refactor Parser --- nemo/src/parser/ast.rs | 30 +- nemo/src/parser/ast/attribute.rs | 11 +- nemo/src/parser/ast/comment/closed.rs | 4 + nemo/src/parser/ast/comment/doc.rs | 27 +- nemo/src/parser/ast/comment/line.rs | 19 +- nemo/src/parser/ast/comment/toplevel.rs | 25 +- nemo/src/parser/ast/comment/wsoc.rs | 37 +- nemo/src/parser/ast/directive.rs | 12 + nemo/src/parser/ast/directive/base.rs | 9 +- nemo/src/parser/ast/directive/declare.rs | 70 ++- nemo/src/parser/ast/directive/export.rs | 38 +- nemo/src/parser/ast/directive/import.rs | 38 +- nemo/src/parser/ast/directive/output.rs | 24 +- nemo/src/parser/ast/directive/prefix.rs | 29 +- nemo/src/parser/ast/directive/unknown.rs | 34 +- nemo/src/parser/ast/expression.rs | 12 +- nemo/src/parser/ast/expression/basic/iri.rs | 2 +- .../src/parser/ast/expression/basic/string.rs | 2 +- .../parser/ast/expression/basic/variable.rs | 10 +- .../ast/expression/complex/aggregation.rs | 7 +- .../ast/expression/complex/arithmetic.rs | 138 ++--- .../src/parser/ast/expression/complex/atom.rs | 4 +- .../parser/ast/expression/complex/infix.rs | 2 +- nemo/src/parser/ast/expression/complex/map.rs | 24 +- .../ast/expression/complex/operation.rs | 14 +- .../complex/parenthesised_expression.rs | 89 ++++ .../parser/ast/expression/complex/tuple.rs | 24 +- nemo/src/parser/ast/program.rs | 46 +- nemo/src/parser/ast/rule.rs | 14 +- nemo/src/parser/ast/sequence.rs | 173 ++++++ nemo/src/parser/ast/sequence/declare.rs | 93 ++-- nemo/src/parser/ast/sequence/key_value.rs | 89 ++-- nemo/src/parser/ast/sequence/one.rs | 6 +- nemo/src/parser/ast/sequence/simple.rs | 2 +- nemo/src/parser/ast/statement.rs | 41 +- nemo/src/parser/ast/tag/datatype.rs | 2 +- nemo/src/parser/ast/tag/parameter.rs | 2 +- nemo/src/parser/ast/tag/structure.rs | 2 +- nemo/src/parser/ast/token.rs | 503 +++++++++++++----- nemo/src/parser/context.rs | 16 +- 40 files changed, 1146 insertions(+), 578 deletions(-) create mode 100644 nemo/src/parser/ast/expression/complex/parenthesised_expression.rs diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 48535d649..9685a68a6 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -14,11 +14,12 @@ pub mod token; use std::fmt::Debug; use super::{context::ParserContext, span::Span, ParserInput, ParserResult}; +use ascii_tree::Tree; /// Trait implemented by nodes in the abstract syntax tree pub trait ProgramAST<'a>: Debug + Sync { /// Return all children of this node. - fn children(&self) -> Vec<&dyn ProgramAST>; + fn children(&'a self) -> Vec<&'a dyn ProgramAST>; /// Return the region of text this node originates from. fn span(&self) -> Span<'a>; @@ -31,3 +32,30 @@ pub trait ProgramAST<'a>: Debug + Sync { /// Return [ParserContext] indicating the type of node. fn context(&self) -> ParserContext; } + +pub(crate) fn ast_to_ascii_tree<'a>(node: &'a dyn ProgramAST<'a>) -> Tree { + let mut vec = Vec::new(); + for child in node.children() { + vec.push(ast_to_ascii_tree(child)); + } + let colour = if node.children().is_empty() { + "\x1b[91m" + } else { + "\x1b[92m" + }; + let fragment = *node.span().0.fragment(); + let str = if fragment.len() > 60 { + format!("{:?}[…]", &fragment[0..60]) + } else { + format!("{:?}", fragment) + }; + Tree::Node( + format!( + "{} \x1b[34m@{}:{} {colour}{str}\x1b[0m", + node.context().name().to_string(), + node.span().0.location_line(), + node.span().0.get_utf8_column() + ), + vec, + ) +} diff --git a/nemo/src/parser/ast/attribute.rs b/nemo/src/parser/ast/attribute.rs index 855a2dd05..a90961907 100644 --- a/nemo/src/parser/ast/attribute.rs +++ b/nemo/src/parser/ast/attribute.rs @@ -1,9 +1,6 @@ //! This module defines [Attribute]. -use nom::{ - character::complete::line_ending, - sequence::{delimited, pair, terminated, tuple}, -}; +use nom::sequence::{delimited, pair, terminated, tuple}; use crate::parser::{ context::{context, ParserContext}, @@ -52,11 +49,11 @@ impl<'a> ProgramAST<'a> for Attribute<'a> { CONTEXT, terminated( delimited( - tuple((Token::hash, Token::open_bracket, WSoC::parse)), + tuple((Token::open_attribute, WSoC::parse)), Atom::parse, - pair(WSoC::parse, Token::closed_bracket), + pair(WSoC::parse, Token::close_attribute), ), - line_ending, + WSoC::parse, ), )(input) .map(|(rest, content)| { diff --git a/nemo/src/parser/ast/comment/closed.rs b/nemo/src/parser/ast/comment/closed.rs index f30604d2b..a20c60331 100644 --- a/nemo/src/parser/ast/comment/closed.rs +++ b/nemo/src/parser/ast/comment/closed.rs @@ -26,6 +26,8 @@ pub struct ClosedComment<'a> { const CONTEXT: ParserContext = ParserContext::Comment; impl<'a> ClosedComment<'a> { + // NOTE: Should this return a &str, so that the consumer can decide whether to turn it into an + // owned value or not? /// Return the content of the comment pub fn content(&self) -> String { self.content.0.to_string() @@ -50,6 +52,8 @@ impl<'a> ProgramAST<'a> for ClosedComment<'a> { context( CONTEXT, delimited( + // NOTE: With this, nested comments are not allowed (won't get parsed + // correctly). Token::open_comment, take_until(TokenKind::CloseComment.name()), Token::close_comment, diff --git a/nemo/src/parser/ast/comment/doc.rs b/nemo/src/parser/ast/comment/doc.rs index a64de865b..8dc3bdca6 100644 --- a/nemo/src/parser/ast/comment/doc.rs +++ b/nemo/src/parser/ast/comment/doc.rs @@ -1,10 +1,11 @@ //! This module defines [DocComment]. use nom::{ + branch::alt, character::complete::{line_ending, not_line_ending}, - combinator::opt, - multi::separated_list1, - sequence::{pair, preceded}, + combinator::eof, + multi::many1, + sequence::tuple, }; use crate::parser::{ @@ -53,17 +54,19 @@ impl<'a> ProgramAST<'a> for DocComment<'a> { context( CONTEXT, - separated_list1( - line_ending, - preceded( - pair(Token::doc_comment, opt(Token::whitespace)), - not_line_ending, - ), - ), + many1(tuple(( + Token::space0, + Token::doc_comment, + not_line_ending, + alt((line_ending, eof)), + ))), )(input) .map(|(rest, result)| { let rest_span = rest.span; - let content = result.into_iter().map(|comment| comment.span).collect(); + let content = result + .into_iter() + .map(|(_, _, comment, _)| comment.span) + .collect(); ( rest, @@ -95,11 +98,13 @@ mod test { let test = vec![ ("/// my comment", 1), ("///my comment\r\n/// my other comment", 2), + ("///my comment\r\n /// my other comment", 2), ]; for (input, expected) in test { let parser_input = ParserInput::new(input, ParserState::default()); let result = all_consuming(DocComment::parse)(parser_input); + dbg!(&result); assert!(result.is_ok()); diff --git a/nemo/src/parser/ast/comment/line.rs b/nemo/src/parser/ast/comment/line.rs index 658d5dbc9..e4eff8c9b 100644 --- a/nemo/src/parser/ast/comment/line.rs +++ b/nemo/src/parser/ast/comment/line.rs @@ -1,9 +1,10 @@ //! This module defines [LineComment]. use nom::{ - character::complete::not_line_ending, - combinator::opt, - sequence::{pair, preceded}, + branch::alt, + character::complete::{line_ending, not_line_ending}, + combinator::eof, + sequence::tuple, }; use crate::parser::{ @@ -50,12 +51,9 @@ impl<'a> ProgramAST<'a> for LineComment<'a> { context( CONTEXT, - preceded( - pair(Token::comment, opt(Token::whitespace)), - not_line_ending, - ), + tuple((Token::comment, not_line_ending, alt((line_ending, eof)))), )(input) - .map(|(rest, content)| { + .map(|(rest, (_, content, _))| { let rest_span = rest.span; ( @@ -86,13 +84,16 @@ mod test { #[test] fn parse_line_comment() { let test = vec![ - ("// my comment", "my comment".to_string()), + ("// my comment", " my comment".to_string()), ("//my comment", "my comment".to_string()), + ("// \tmy comment\n", " \tmy comment".to_string()), + ("//// my comment", " my comment".to_string()), ]; for (input, expected) in test { let parser_input = ParserInput::new(input, ParserState::default()); let result = all_consuming(LineComment::parse)(parser_input); + dbg!(&result); assert!(result.is_ok()); diff --git a/nemo/src/parser/ast/comment/toplevel.rs b/nemo/src/parser/ast/comment/toplevel.rs index e5c249c41..1b9453141 100644 --- a/nemo/src/parser/ast/comment/toplevel.rs +++ b/nemo/src/parser/ast/comment/toplevel.rs @@ -1,10 +1,11 @@ //! This module defines [TopLevelComment]. use nom::{ + branch::alt, character::complete::{line_ending, not_line_ending}, - combinator::opt, - multi::separated_list1, - sequence::{pair, preceded}, + combinator::eof, + multi::many1, + sequence::tuple, }; use crate::parser::{ @@ -54,17 +55,19 @@ impl<'a> ProgramAST<'a> for TopLevelComment<'a> { context( CONTEXT, - separated_list1( - line_ending, - preceded( - pair(Token::toplevel_comment, opt(Token::whitespace)), - not_line_ending, - ), - ), + many1(tuple(( + Token::space0, + Token::toplevel_comment, + not_line_ending, + alt((line_ending, eof)), + ))), )(input) .map(|(rest, result)| { let rest_span = rest.span; - let content = result.into_iter().map(|result| result.span).collect(); + let content = result + .into_iter() + .map(|(_, _, result, _)| result.span) + .collect(); ( rest, diff --git a/nemo/src/parser/ast/comment/wsoc.rs b/nemo/src/parser/ast/comment/wsoc.rs index 3715459f0..15eb2818e 100644 --- a/nemo/src/parser/ast/comment/wsoc.rs +++ b/nemo/src/parser/ast/comment/wsoc.rs @@ -8,17 +8,19 @@ use nom::{ }; use crate::parser::{ - ast::{token::Token, ProgramAST}, + ast::{ + comment::{closed::ClosedComment, line::LineComment}, + token::Token, + ProgramAST, + }, input::ParserInput, span::Span, ParserResult, }; -use super::{closed::ClosedComment, line::LineComment}; - /// Type of comment that can appear in any "whit-space position" #[derive(Debug)] -pub enum WhiteSpaceComment<'a> { +pub enum CommentType<'a> { /// Line comment Line(LineComment<'a>), /// Closed comment @@ -31,33 +33,32 @@ pub struct WSoC<'a> { /// [ProgramSpan] associated with this comment _span: Span<'a>, /// comments - comments: Vec>, + comments: Vec>, } impl<'a> WSoC<'a> { /// Return comments contained within this object. - pub fn comments(&self) -> &Vec> { + pub fn comments(&self) -> &Vec> { &self.comments } - /// Parse one or more white-spaces optionally followed by a comment. - pub fn parse_whitespace_comment( - input: ParserInput<'a>, - ) -> ParserResult<'a, Option> { - preceded( - Token::whitespace, - opt(alt(( - map(LineComment::parse, WhiteSpaceComment::Line), - map(ClosedComment::parse, WhiteSpaceComment::Closed), - ))), - )(input) + fn parse_whitespace(input: ParserInput<'a>) -> ParserResult<'a, Option> { + Token::whitespace(input).map(|(rest, _)| (rest, None)) + } + + fn parse_comment(input: ParserInput<'a>) -> ParserResult<'a, Option> { + alt(( + map(LineComment::parse, CommentType::Line), + map(ClosedComment::parse, CommentType::Closed), + ))(input) + .map(|(rest, comment)| (rest, Some(comment))) } /// Parse whitespace or comments. pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { let input_span = input.span; - many0(Self::parse_whitespace_comment)(input).map(|(rest, comments)| { + many0(alt((WSoC::parse_whitespace, WSoC::parse_comment)))(input).map(|(rest, comments)| { let rest_span = rest.span; ( diff --git a/nemo/src/parser/ast/directive.rs b/nemo/src/parser/ast/directive.rs index dadc789d4..3c0a4bcb3 100644 --- a/nemo/src/parser/ast/directive.rs +++ b/nemo/src/parser/ast/directive.rs @@ -168,6 +168,18 @@ mod test { ("@output test", ParserContext::Output), ("@prefix test: ", ParserContext::Prefix), ("@test something", ParserContext::UnknownDirective), + ("@basetest ", ParserContext::UnknownDirective), + ("@declaretest test(a:int)", ParserContext::UnknownDirective), + ( + "@exporttest test :- csv {}", + ParserContext::UnknownDirective, + ), + ( + "@importtest test :- csv {}", + ParserContext::UnknownDirective, + ), + ("@outputtest test", ParserContext::UnknownDirective), + ("@prefixtest test: ", ParserContext::UnknownDirective), ]; for (input, expect) in test { diff --git a/nemo/src/parser/ast/directive/base.rs b/nemo/src/parser/ast/directive/base.rs index 114f2fb87..e8345c30a 100644 --- a/nemo/src/parser/ast/directive/base.rs +++ b/nemo/src/parser/ast/directive/base.rs @@ -25,6 +25,10 @@ impl<'a> Base<'a> { pub fn iri(&self) -> &Iri<'a> { &self.iri } + + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, Iri> { + Iri::parse(input) + } } const CONTEXT: ParserContext = ParserContext::Base; @@ -48,12 +52,11 @@ impl<'a> ProgramAST<'a> for Base<'a> { CONTEXT, preceded( tuple(( - Token::at, + Token::directive_indicator, Token::directive_base, - WSoC::parse_whitespace_comment, WSoC::parse, )), - Iri::parse, + Self::parse_body, ), )(input) .map(|(rest, iri)| { diff --git a/nemo/src/parser/ast/directive/declare.rs b/nemo/src/parser/ast/directive/declare.rs index 8199d0077..842bc8b88 100644 --- a/nemo/src/parser/ast/directive/declare.rs +++ b/nemo/src/parser/ast/directive/declare.rs @@ -1,22 +1,19 @@ //! This module defines the [Declare] directive. -use nom::sequence::{delimited, pair, preceded, tuple}; - -use crate::{ - parser::{ - ast::{ - comment::wsoc::WSoC, - sequence::declare::DeclareSequence, - tag::{parameter::Parameter, structure::StructureTag}, - token::Token, - ProgramAST, - }, - context::{context, ParserContext}, - input::ParserInput, - span::Span, - ParserResult, +use nom::sequence::{delimited, pair, preceded, separated_pair, tuple}; + +use crate::parser::{ + ast::{ + comment::wsoc::WSoC, + sequence::{declare::NameTypePair, Sequence}, + tag::structure::StructureTag, + token::Token, + ProgramAST, }, - rule_model::components::datatype::DataType, + context::{context, ParserContext}, + input::ParserInput, + span::Span, + ParserResult, }; /// Declare directive, associating atom positions with names and data types @@ -28,7 +25,7 @@ pub struct Declare<'a> { /// Predicate this statement applies to predicate: StructureTag<'a>, /// The declaration - declaration: DeclareSequence<'a>, + declaration: Sequence<'a, NameTypePair<'a>>, } impl<'a> Declare<'a> { @@ -38,12 +35,22 @@ impl<'a> Declare<'a> { } /// Return an iterator over the name-type pairs. - pub fn name_type_pairs(&self) -> impl Iterator + '_ { - self.declaration - .iter() - .map(|(parameter_name, tag_datatype)| { - (parameter_name.parameter().clone(), tag_datatype.data_type()) - }) + pub fn name_type_pairs(&self) -> impl Iterator + '_ { + self.declaration.clone().into_iter() + } + + pub fn parse_body( + input: ParserInput<'a>, + ) -> ParserResult<'a, (StructureTag, Sequence)> { + separated_pair( + StructureTag::parse, + WSoC::parse, + delimited( + pair(Token::atom_open, WSoC::parse), + Sequence::::parse, + pair(WSoC::parse, Token::atom_close), + ), + )(input) } } @@ -54,9 +61,8 @@ impl<'a> ProgramAST<'a> for Declare<'a> { let mut result = Vec::<&dyn ProgramAST>::new(); result.push(&self.predicate); - for (parameter, data_type) in self.declaration.iter() { - result.push(parameter); - result.push(data_type); + for pair in self.declaration.iter() { + result.push(pair); } result @@ -76,19 +82,11 @@ impl<'a> ProgramAST<'a> for Declare<'a> { CONTEXT, preceded( tuple(( - Token::at, + Token::directive_indicator, Token::directive_declare, - WSoC::parse_whitespace_comment, WSoC::parse, )), - pair( - StructureTag::parse, - delimited( - tuple((WSoC::parse, Token::open_parenthesis, WSoC::parse)), - DeclareSequence::parse, - tuple((WSoC::parse, Token::closed_parenthesis, WSoC::parse)), - ), - ), + Self::parse_body, ), )(input) .map(|(rest, (predicate, declaration))| { diff --git a/nemo/src/parser/ast/directive/export.rs b/nemo/src/parser/ast/directive/export.rs index f78c6d0ce..35a41e9fd 100644 --- a/nemo/src/parser/ast/directive/export.rs +++ b/nemo/src/parser/ast/directive/export.rs @@ -1,6 +1,6 @@ //! This module defines the [Export] directive. -use nom::sequence::{preceded, separated_pair, tuple}; +use nom::sequence::tuple; use crate::parser::{ ast::{ @@ -36,22 +36,15 @@ impl<'a> Export<'a> { &self.instructions } - /// Parse the left part of the export directive. - fn parse_left_part(input: ParserInput<'a>) -> ParserResult<'a, StructureTag<'a>> { - preceded( - tuple(( - Token::at, - Token::directive_export, - WSoC::parse_whitespace_comment, - WSoC::parse, - )), + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (StructureTag, Map)> { + tuple(( StructureTag::parse, - )(input) - } - - /// Parse the right part of the export directive. - fn parse_right_part(input: ParserInput<'a>) -> ParserResult<'a, Map<'a>> { - Map::parse(input) + WSoC::parse, + Token::export_assignment, + WSoC::parse, + Map::parse, + ))(input) + .map(|(rest, (predicate, _, _, _, instructions))| (rest, (predicate, instructions))) } } @@ -74,13 +67,14 @@ impl<'a> ProgramAST<'a> for Export<'a> { context( CONTEXT, - separated_pair( - Self::parse_left_part, - tuple((WSoC::parse, Token::arrow, WSoC::parse)), - Self::parse_right_part, - ), + tuple(( + Token::directive_indicator, + Token::directive_export, + WSoC::parse, + Self::parse_body, + )), )(input) - .map(|(rest, (predicate, instructions))| { + .map(|(rest, (_, _, _, (predicate, instructions)))| { let rest_span = rest.span; ( diff --git a/nemo/src/parser/ast/directive/import.rs b/nemo/src/parser/ast/directive/import.rs index 1b4621144..3a0a68229 100644 --- a/nemo/src/parser/ast/directive/import.rs +++ b/nemo/src/parser/ast/directive/import.rs @@ -1,6 +1,6 @@ //! This module defines the [Import] directive. -use nom::sequence::{preceded, separated_pair, tuple}; +use nom::sequence::tuple; use crate::parser::{ ast::{ @@ -36,22 +36,15 @@ impl<'a> Import<'a> { &self.instructions } - /// Parse the left part of the import directive. - fn parse_left_part(input: ParserInput<'a>) -> ParserResult<'a, StructureTag<'a>> { - preceded( - tuple(( - Token::at, - Token::directive_import, - WSoC::parse_whitespace_comment, - WSoC::parse, - )), + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (StructureTag, Map)> { + tuple(( StructureTag::parse, - )(input) - } - - /// Parse the right part of the import directive. - fn parse_right_part(input: ParserInput<'a>) -> ParserResult<'a, Map<'a>> { - Map::parse(input) + WSoC::parse, + Token::import_assignment, + WSoC::parse, + Map::parse, + ))(input) + .map(|(rest, (predicate, _, _, _, instructions))| (rest, (predicate, instructions))) } } @@ -74,13 +67,14 @@ impl<'a> ProgramAST<'a> for Import<'a> { context( CONTEXT, - separated_pair( - Self::parse_left_part, - tuple((WSoC::parse, Token::arrow, WSoC::parse)), - Self::parse_right_part, - ), + tuple(( + Token::directive_indicator, + Token::directive_import, + WSoC::parse, + Self::parse_body, + )), )(input) - .map(|(rest, (predicate, instructions))| { + .map(|(rest, (_, _, _, (predicate, instructions)))| { let rest_span = rest.span; ( diff --git a/nemo/src/parser/ast/directive/output.rs b/nemo/src/parser/ast/directive/output.rs index 71ae107e9..7d321ff76 100644 --- a/nemo/src/parser/ast/directive/output.rs +++ b/nemo/src/parser/ast/directive/output.rs @@ -3,7 +3,10 @@ use nom::sequence::{preceded, tuple}; use crate::parser::{ - ast::{comment::wsoc::WSoC, tag::structure::StructureTag, token::Token, ProgramAST}, + ast::{ + comment::wsoc::WSoC, sequence::Sequence, tag::structure::StructureTag, token::Token, + ProgramAST, + }, context::{context, ParserContext}, input::ParserInput, span::Span, @@ -16,14 +19,18 @@ pub struct Output<'a> { /// [ProgramSpan] associated with this node span: Span<'a>, - /// The predicate - predicate: StructureTag<'a>, + /// A sequence of predicates + predicate: Sequence<'a, StructureTag<'a>>, } impl<'a> Output<'a> { /// Return the output predicate. - pub fn predicate(&self) -> &StructureTag<'a> { - &self.predicate + pub fn predicate(&self) -> Vec<&StructureTag<'a>> { + self.predicate.iter().collect() + } + + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, Sequence<'a, StructureTag>> { + Sequence::::parse(input) } } @@ -48,12 +55,11 @@ impl<'a> ProgramAST<'a> for Output<'a> { CONTEXT, preceded( tuple(( - Token::at, + Token::directive_indicator, Token::directive_output, - WSoC::parse_whitespace_comment, WSoC::parse, )), - StructureTag::parse, + Self::parse_body, ), )(input) .map(|(rest, predicate)| { @@ -95,7 +101,7 @@ mod test { assert!(result.is_ok()); let result = result.unwrap(); - assert_eq!(expected, result.1.predicate().to_string()); + assert_eq!(expected, result.1.predicate()[0].to_string()); } } } diff --git a/nemo/src/parser/ast/directive/prefix.rs b/nemo/src/parser/ast/directive/prefix.rs index 3954cbb09..24949d818 100644 --- a/nemo/src/parser/ast/directive/prefix.rs +++ b/nemo/src/parser/ast/directive/prefix.rs @@ -19,7 +19,7 @@ pub struct Prefix<'a> { /// The prefix prefix: Token<'a>, /// Its value - value: Iri<'a>, + iri: Iri<'a>, } impl<'a> Prefix<'a> { @@ -29,8 +29,16 @@ impl<'a> Prefix<'a> { } /// Return the value of the prefix. - pub fn value(&self) -> &Iri<'a> { - &self.value + pub fn iri(&self) -> &Iri<'a> { + &self.iri + } + + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (Token, Iri)> { + separated_pair( + Token::name, + tuple((WSoC::parse, Token::namespace_separator, WSoC::parse)), + Iri::parse, + )(input) } } @@ -38,7 +46,7 @@ const CONTEXT: ParserContext = ParserContext::Prefix; impl<'a> ProgramAST<'a> for Prefix<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { - self.value.children() + vec![&self.iri] } fn span(&self) -> Span<'a> { @@ -55,16 +63,11 @@ impl<'a> ProgramAST<'a> for Prefix<'a> { CONTEXT, preceded( tuple(( - Token::at, + Token::directive_indicator, Token::directive_prefix, - WSoC::parse_whitespace_comment, WSoC::parse, )), - separated_pair( - Token::name, - tuple((WSoC::parse, Token::colon, WSoC::parse)), - Iri::parse, - ), + Self::parse_body, ), )(input) .map(|(rest, (prefix, value))| { @@ -75,7 +78,7 @@ impl<'a> ProgramAST<'a> for Prefix<'a> { Self { span: input_span.until_rest(&rest_span), prefix, - value, + iri: value, }, ) }) @@ -113,7 +116,7 @@ mod test { assert!(result.is_ok()); let result = result.unwrap(); - assert_eq!(expected, (result.1.prefix(), result.1.value().content())); + assert_eq!(expected, (result.1.prefix(), result.1.iri().content())); } } } diff --git a/nemo/src/parser/ast/directive/unknown.rs b/nemo/src/parser/ast/directive/unknown.rs index 37ca30130..ad6e71844 100644 --- a/nemo/src/parser/ast/directive/unknown.rs +++ b/nemo/src/parser/ast/directive/unknown.rs @@ -3,7 +3,7 @@ use nom::{ bytes::complete::is_not, combinator::recognize, - sequence::{pair, preceded, separated_pair}, + sequence::{preceded, separated_pair}, }; use nom_supreme::error::{BaseErrorKind, Expectation}; use strum::IntoEnumIterator; @@ -90,8 +90,15 @@ impl<'a> ProgramAST<'a> for UnknownDirective<'a> { context( CONTEXT, separated_pair( - preceded(Token::at, Self::parse_unknown), - pair(WSoC::parse_whitespace_comment, WSoC::parse), + preceded(Token::directive_indicator, Self::parse_unknown), + WSoC::parse, + // FIXME: Rework error recovery, because this recognises an `.` in an IRI, + // e.g. in `@baseerror + // ^ + // That means that content == "" which + // will also produce an error. + // NOTE: Maybe we could try to parse the "body" of the other directives and if + // one succeeds give a hint what directive could be the correct. recognize(is_not(".")), ), )(input) @@ -157,4 +164,25 @@ mod test { assert!(result.is_err()); } } + + #[test] + fn error_recovery() { + let test = [( + "@test .", + ("test", " "), + )]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = UnknownDirective::parse(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!( + expected, + (result.1.name().as_ref(), result.1.content().as_ref()) + ); + } + } } diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index 07fb2c745..252977007 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -95,12 +95,12 @@ impl<'a> Expression<'a> { /// Parse complex expressions, except arithmetic and infix. pub fn parse_complex(input: ParserInput<'a>) -> ParserResult<'a, Self> { alt(( - map(Tuple::parse, Self::Tuple), map(Aggregation::parse, Self::Aggregation), map(Operation::parse, Self::Operation), map(Atom::parse, Self::Atom), map(Map::parse, Self::Map), map(Negation::parse, Self::Negation), + map(Tuple::parse, Self::Tuple), ))(input) } } @@ -155,9 +155,9 @@ impl<'a> ProgramAST<'a> for Expression<'a> { context( CONTEXT, alt(( + map(Arithmetic::parse, Self::Arithmetic), Self::parse_complex, map(InfixExpression::parse, Self::Infix), - map(Arithmetic::parse, Self::Arithmetic), Self::parse_basic, )), )(input) @@ -212,4 +212,12 @@ mod test { assert_eq!(result.1.context_type(), expect); } } + + #[test] + fn complex_expression() { + let input = "?distance = SQRT(POW(?Xp - ?Xr, 2.0) + POW(?Yp - ?Yr, 2.0))"; + let parser_input = ParserInput::new(input, ParserState::default()); + let result = Expression::parse(parser_input); + assert!(result.is_ok()); + } } diff --git a/nemo/src/parser/ast/expression/basic/iri.rs b/nemo/src/parser/ast/expression/basic/iri.rs index a77db8130..307d46474 100644 --- a/nemo/src/parser/ast/expression/basic/iri.rs +++ b/nemo/src/parser/ast/expression/basic/iri.rs @@ -47,7 +47,7 @@ impl<'a> ProgramAST<'a> for Iri<'a> { context( CONTEXT, - delimited(Token::open_chevrons, Token::iri, Token::closed_chevrons), + delimited(Token::open_iri, Token::iri, Token::close_iri), )(input) .map(|(rest, content)| { let rest_span = rest.span; diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs index 7e10e861c..72be5e755 100644 --- a/nemo/src/parser/ast/expression/basic/string.rs +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -44,7 +44,7 @@ impl<'a> StringLiteral<'a> { /// Parse the language tag of the string. pub fn parse_language_tag(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { - pair(Token::at, Token::name)(input).map(|(rest, (_, tag))| (rest, tag)) + pair(Token::lang_tag_indicator, Token::name)(input).map(|(rest, (_, tag))| (rest, tag)) } } diff --git a/nemo/src/parser/ast/expression/basic/variable.rs b/nemo/src/parser/ast/expression/basic/variable.rs index f11d14a4c..83384444f 100644 --- a/nemo/src/parser/ast/expression/basic/variable.rs +++ b/nemo/src/parser/ast/expression/basic/variable.rs @@ -20,13 +20,13 @@ use crate::parser::{ #[func(pub fn token(token: TokenKind) -> Option)] pub enum VariableType { /// Universal variable - #[assoc(token = TokenKind::QuestionMark)] + #[assoc(token = TokenKind::UniversalIndicator)] Universal, /// Existential variable - #[assoc(token = TokenKind::ExclamationMark)] + #[assoc(token = TokenKind::ExistentialIndicator)] Existential, /// Anonymous variable - #[assoc(token = TokenKind::Underscore)] + #[assoc(token = TokenKind::AnonVal)] Anonymous, } @@ -57,8 +57,8 @@ impl<'a> Variable<'a> { /// Parse the variable prefix fn parse_variable_prefix(input: ParserInput<'a>) -> ParserResult<'a, VariableType> { alt(( - Token::question_mark, - Token::exclamation_mark, + Token::universal_indicator, + Token::existential_indicator, Token::underscore, ))(input) .map(|(rest, result)| { diff --git a/nemo/src/parser/ast/expression/complex/aggregation.rs b/nemo/src/parser/ast/expression/complex/aggregation.rs index 069493939..6f4a83bc7 100644 --- a/nemo/src/parser/ast/expression/complex/aggregation.rs +++ b/nemo/src/parser/ast/expression/complex/aggregation.rs @@ -81,17 +81,18 @@ impl<'a> ProgramAST<'a> for Aggregation<'a> { context( CONTEXT, pair( - preceded(Token::hash, AggregationTag::parse), + preceded(Token::aggregate_indicator, AggregationTag::parse), delimited( - pair(Token::open_parenthesis, WSoC::parse), + pair(Token::aggregate_open, WSoC::parse), pair( Expression::parse, opt(preceded( + // TODO: What is the semicolon for? tuple((WSoC::parse, Token::semicolon, WSoC::parse)), ExpressionSequenceSimple::parse, )), ), - pair(WSoC::parse, Token::closed_parenthesis), + pair(WSoC::parse, Token::aggregate_close), ), ), )(input) diff --git a/nemo/src/parser/ast/expression/complex/arithmetic.rs b/nemo/src/parser/ast/expression/complex/arithmetic.rs index 8b16932a2..de47c9044 100644 --- a/nemo/src/parser/ast/expression/complex/arithmetic.rs +++ b/nemo/src/parser/ast/expression/complex/arithmetic.rs @@ -1,22 +1,20 @@ //! This module defines [Arithmetic]. #![allow(missing_docs)] +use ascii_tree::write_tree; use enum_assoc::Assoc; use nom::{ branch::alt, - combinator::map, - multi::{many0, many1}, - sequence::{delimited, pair, preceded, separated_pair, tuple}, + multi::many0, + sequence::{delimited, pair, preceded, separated_pair}, }; use nom_supreme::error::{BaseErrorKind, Expectation}; use crate::parser::{ ast::{ + ast_to_ascii_tree, comment::wsoc::WSoC, - expression::{ - basic::{number::Number, variable::Variable}, - Expression, - }, + expression::Expression, token::{Token, TokenKind}, ProgramAST, }, @@ -27,8 +25,6 @@ use crate::parser::{ ParserResult, }; -use super::operation::Operation; - /// Types of arithmetic operations #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] #[func(pub fn token(token: TokenKind) -> Option)] @@ -40,7 +36,7 @@ pub enum ArithmeticOperation { #[assoc(token = TokenKind::Minus)] Subtraction, /// Multiplication - #[assoc(token = TokenKind::Star)] + #[assoc(token = TokenKind::Multiplication)] Multiplication, /// Division #[assoc(token = TokenKind::Division)] @@ -100,6 +96,19 @@ impl<'a> Arithmetic<'a> { pub fn right(&self) -> &Expression<'a> { &self.right } + + /// Return a formatted ascii tree to pretty print the AST + pub fn ascii_tree(&self) -> String { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self)).unwrap(); + format!("{output}") + } +} + +impl std::fmt::Display for Arithmetic<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.ascii_tree()) + } } #[derive(Debug)] @@ -143,67 +152,33 @@ impl<'a> Arithmetic<'a> { /// Parse an expression enclosed in parenthesis. fn parse_parenthesized_expression(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { - delimited( - pair(Token::open_parenthesis, WSoC::parse), - Expression::parse, - pair(WSoC::parse, Token::closed_parenthesis), - )(input) - } - - /// Parse an arithmetic expression enclosed in parenthesis. - fn parse_parenthesized_arithmetic(input: ParserInput<'a>) -> ParserResult<'a, Self> { delimited( pair(Token::open_parenthesis, WSoC::parse), Self::parse, pair(WSoC::parse, Token::closed_parenthesis), )(input) + .map(|(rest, arithmetic_expr)| (rest, Expression::Arithmetic(arithmetic_expr))) } + // /// Parse an arithmetic expression enclosed in parenthesis. + // fn parse_parenthesized_arithmetic(input: ParserInput<'a>) -> ParserResult<'a, Self> { + // delimited( + // pair(Token::open_parenthesis, WSoC::parse), + // Self::parse, + // pair(WSoC::parse, Token::closed_parenthesis), + // )(input) + // } + /// Parse factor. fn parse_factor(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { alt(( Self::parse_non_arithmetic, Self::parse_parenthesized_expression, ))(input) - - // let input_span = input.span; - - // alt(( - // map( - // tuple(( - // Self::parse_non_arithmetic, - // delimited( - // WSoC::parse, - // ArithmeticOperation::parse_multiplicative, - // WSoC::parse, - // ), - // Self::parse_non_arithmetic, - // )), - // |(left, kind, right)| (Box::new(left), kind, Box::new(right)), - // ), - // map(Self::parse_parenthesized, |arithmetic| { - // (arithmetic.left, arithmetic.kind, arithmetic.right) - // }), - // ))(input) - // .map(|(rest, (left, kind, right))| { - // let rest_span = rest.span; - - // ( - // rest, - // Self { - // span: input_span.until_rest(&rest_span), - // kind, - // left, - // right, - // }, - // ) - // }) } /// fn parse_product(input: ParserInput<'a>) -> ParserResult<'a, ArithmeticChain<'a>> { - let input_span = input.span; - pair( Self::parse_factor, many0(preceded( @@ -252,7 +227,7 @@ const CONTEXT: ParserContext = ParserContext::Arithmetic; impl<'a> ProgramAST<'a> for Arithmetic<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { - todo!() + vec![&*self.left, &*self.right] } fn span(&self) -> Span<'a> { @@ -263,32 +238,6 @@ impl<'a> ProgramAST<'a> for Arithmetic<'a> { where Self: Sized + 'a, { - // let input_span = input.span; - - // context( - // CONTEXT, - // pair( - // StructureTag::parse, - // delimited( - // pair(Token::open_parenthesis, WSoC::parse), - // ExpressionSequenceSimple::parse, - // pair(WSoC::parse, Token::closed_parenthesis), - // ), - // ), - // )(input) - // .map(|(rest, (tag, expressions))| { - // let rest_span = rest.span; - - // ( - // rest, - // Self { - // span: input_span.until_rest(&rest_span), - // tag, - // expressions, - // }, - // ) - // }) - let arithmetic_parser = |input: ParserInput<'a>| { if let Ok((rest, expression)) = Self::parse_sum(input.clone()) { if let Expression::Arithmetic(result) = expression { @@ -325,10 +274,11 @@ mod test { /// Count the number of expressions contained in an arithmetic expression fn count_expression<'a>(expression: &Expression<'a>) -> usize { - if let Expression::Arithmetic(arithmetic) = expression { - count_expression(arithmetic.left()) + count_expression(arithmetic.right()) - } else { - 1 + match expression { + Expression::Arithmetic(arithmetic) => { + count_expression(arithmetic.left()) + count_expression(arithmetic.right()) + } + _ => 1, } } @@ -337,22 +287,24 @@ mod test { let test = vec![ ("1 * 2", 2), ("1 * 2 * ?y", 3), - ("(1 * 2)", 2), - ("1 * (2 / ?y)", 3), + ("1 * (2 / ?y)", 3), // FIXME: Span has missing `)` ("(1 / 2) * ?y", 3), ("1 + 2", 2), ("1 + 2 + ?x", 3), - ("1 + 2 * (3 * ?y)", 4), - ("1 + (2 * 3) * ?y + 4", 5), - ("(1 + (2 * ((3 * ?y))))", 4), - // ("1 + 2 * POW(3, 4)", 3), + ("1 + 2 * (3 * ?y)", 4), // FIXME: This test produces weird spans + ("1 + (2 * 3) * ?y + 4", 5), // FIXME: Here the spans are also wrong + ("1 + (2 * ((3 * ?y)))", 4), + ("1 + 2 * POW(3, 4)", 3), // FIXME: The same + ("2 * (((18 + 3)))", 3), ]; for (input, expected) in test { let parser_input = ParserInput::new(input, ParserState::default()); let result = all_consuming(Arithmetic::parse)(parser_input); - - assert!(result.is_ok()); + match &result { + Ok((_, ast)) => println!("{ast}"), + Err(_) => assert!(false), + } let result = result.unwrap(); assert_eq!( diff --git a/nemo/src/parser/ast/expression/complex/atom.rs b/nemo/src/parser/ast/expression/complex/atom.rs index c2d644799..e11498b36 100644 --- a/nemo/src/parser/ast/expression/complex/atom.rs +++ b/nemo/src/parser/ast/expression/complex/atom.rs @@ -66,9 +66,9 @@ impl<'a> ProgramAST<'a> for Atom<'a> { pair( StructureTag::parse, delimited( - pair(Token::open_parenthesis, WSoC::parse), + pair(Token::atom_open, WSoC::parse), ExpressionSequenceSimple::parse, - pair(WSoC::parse, Token::closed_parenthesis), + pair(WSoC::parse, Token::atom_close), ), ), )(input) diff --git a/nemo/src/parser/ast/expression/complex/infix.rs b/nemo/src/parser/ast/expression/complex/infix.rs index 79be90530..e25767a2b 100644 --- a/nemo/src/parser/ast/expression/complex/infix.rs +++ b/nemo/src/parser/ast/expression/complex/infix.rs @@ -97,9 +97,9 @@ impl<'a> InfixExpression<'a> { /// Parse non-infix [Expression]s pub fn parse_non_infix(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { alt(( + map(Operation::parse, Expression::Operation), map(Arithmetic::parse, Expression::Arithmetic), map(Aggregation::parse, Expression::Aggregation), - map(Operation::parse, Expression::Operation), map(Atom::parse, Expression::Atom), map(Tuple::parse, Expression::Tuple), map(Map::parse, Expression::Map), diff --git a/nemo/src/parser/ast/expression/complex/map.rs b/nemo/src/parser/ast/expression/complex/map.rs index 05c25bd14..2f0b8c600 100644 --- a/nemo/src/parser/ast/expression/complex/map.rs +++ b/nemo/src/parser/ast/expression/complex/map.rs @@ -7,8 +7,11 @@ use nom::{ use crate::parser::{ ast::{ - comment::wsoc::WSoC, expression::Expression, sequence::key_value::KeyValueSequence, - tag::structure::StructureTag, token::Token, ProgramAST, + comment::wsoc::WSoC, + sequence::{key_value::KeyValuePair, Sequence}, + tag::structure::StructureTag, + token::Token, + ProgramAST, }, context::{context, ParserContext}, input::ParserInput, @@ -25,12 +28,12 @@ pub struct Map<'a> { /// Tag of this map, if it exists tag: Option>, /// List of key-value pairs - key_value: KeyValueSequence<'a>, + key_value: Sequence<'a, KeyValuePair<'a>>, } impl<'a> Map<'a> { /// Return an iterator over the underlying [Expression]s. - pub fn key_value(&self) -> impl Iterator, Expression<'a>)> { + pub fn key_value(&self) -> impl Iterator { self.key_value.iter() } @@ -50,10 +53,10 @@ impl<'a> ProgramAST<'a> for Map<'a> { result.push(tag) } - for (key, value) in &self.key_value { - result.push(key); - result.push(value); + for pair in &self.key_value { + result.push(pair); } + // result.push(&key_value); result } @@ -73,9 +76,9 @@ impl<'a> ProgramAST<'a> for Map<'a> { pair( opt(terminated(StructureTag::parse, opt(WSoC::parse))), delimited( - pair(Token::open_brace, WSoC::parse), - KeyValueSequence::parse, - pair(WSoC::parse, Token::closed_brace), + pair(Token::map_open, WSoC::parse), + Sequence::::parse, + pair(WSoC::parse, Token::map_close), ), ), )(input) @@ -122,6 +125,7 @@ mod test { (Some("abc".to_string()), 3), ), ("{a:1, b: POW(1, 2)}", (None, 2)), + ("{a:b, c:d,}", (None, 2)), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/expression/complex/operation.rs b/nemo/src/parser/ast/expression/complex/operation.rs index d5f7710ac..c029c48b8 100644 --- a/nemo/src/parser/ast/expression/complex/operation.rs +++ b/nemo/src/parser/ast/expression/complex/operation.rs @@ -5,9 +5,8 @@ use nom::sequence::{delimited, pair}; use crate::{ parser::{ ast::{ - comment::wsoc::WSoC, expression::Expression, - sequence::simple::ExpressionSequenceSimple, tag::operation::OperationTag, token::Token, - ProgramAST, + comment::wsoc::WSoC, expression::Expression, sequence::Sequence, + tag::operation::OperationTag, token::Token, ProgramAST, }, context::{context, ParserContext}, input::ParserInput, @@ -28,7 +27,7 @@ pub struct Operation<'a> { /// Type of operation tag: OperationTag<'a>, /// List of underlying expressions - expressions: ExpressionSequenceSimple<'a>, + expressions: Sequence<'a, Expression<'a>>, } impl<'a> Operation<'a> { @@ -72,9 +71,9 @@ impl<'a> ProgramAST<'a> for Operation<'a> { pair( OperationTag::parse, delimited( - pair(Token::open_parenthesis, WSoC::parse), - ExpressionSequenceSimple::parse, - pair(WSoC::parse, Token::closed_parenthesis), + pair(Token::operation_open, WSoC::parse), + Sequence::::parse, + pair(WSoC::parse, Token::operation_close), ), ), )(input) @@ -100,7 +99,6 @@ impl<'a> ProgramAST<'a> for Operation<'a> { #[cfg(test)] mod test { use nom::combinator::all_consuming; - use strum::IntoEnumIterator; use crate::{ parser::{ diff --git a/nemo/src/parser/ast/expression/complex/parenthesised_expression.rs b/nemo/src/parser/ast/expression/complex/parenthesised_expression.rs new file mode 100644 index 000000000..85fe2ab43 --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/parenthesised_expression.rs @@ -0,0 +1,89 @@ +use nom::sequence::{delimited, pair}; + +use crate::parser::{ + ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, + context::ParserContext, + input::ParserInput, + span::Span, + ParserResult, +}; + +#[derive(Debug)] +pub struct ParenthesisedExpression<'a> { + span: Span<'a>, + expression: Expression<'a>, +} + +impl<'a> ParenthesisedExpression<'a> { + pub fn expression(&self) -> &Expression { + &self.expression + } + + pub fn parse_expression(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + Self::parse(input).map(|(rest, paren_expr)| (rest, paren_expr.expression)) + } +} + +const CONTEXT: ParserContext = ParserContext::ParenthesisedExpression; + +impl<'a> ProgramAST<'a> for ParenthesisedExpression<'a> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST> { + vec![&self.expression] + } + + fn span(&self) -> Span<'a> { + self.span + } + + /// Parse an expression enclosed in parenthesis. + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + delimited( + pair(Token::open_parenthesis, WSoC::parse), + Expression::parse, + pair(WSoC::parse, Token::closed_parenthesis), + )(input) + .map(|(rest, expression)| { + let rest_span = rest.span; + ( + rest, + ParenthesisedExpression { + span: input_span.until_rest(&rest_span), + expression, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::ParserState; + + use super::*; + + #[test] + fn paren_expr() { + let test = ["(1 * 2)"]; + + for input in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Expression::parse)(parser_input); + dbg!(&result); + + assert!(result.is_ok()); + + // let result = result.unwrap(); + // assert_eq!(result.1.context_type(), expect); + } + } +} diff --git a/nemo/src/parser/ast/expression/complex/tuple.rs b/nemo/src/parser/ast/expression/complex/tuple.rs index 3f42b21a5..4a7d13ab7 100644 --- a/nemo/src/parser/ast/expression/complex/tuple.rs +++ b/nemo/src/parser/ast/expression/complex/tuple.rs @@ -1,14 +1,10 @@ //! This module defines [Tuple]. -use nom::{ - combinator::opt, - sequence::{delimited, pair, terminated, tuple}, -}; +use nom::sequence::{delimited, pair}; use crate::parser::{ ast::{ - comment::wsoc::WSoC, expression::Expression, sequence::one::ExpressionSequenceOne, - token::Token, ProgramAST, + comment::wsoc::WSoC, expression::Expression, sequence::Sequence, token::Token, ProgramAST, }, context::{context, ParserContext}, input::ParserInput, @@ -23,7 +19,7 @@ pub struct Tuple<'a> { span: Span<'a>, /// List of underlying expressions - expressions: ExpressionSequenceOne<'a>, + expressions: Sequence<'a, Expression<'a>>, } impl<'a> Tuple<'a> { @@ -59,12 +55,9 @@ impl<'a> ProgramAST<'a> for Tuple<'a> { context( CONTEXT, delimited( - pair(Token::open_parenthesis, WSoC::parse), - terminated( - ExpressionSequenceOne::parse, - opt(tuple((WSoC::parse, Token::comma, WSoC::parse))), - ), - pair(WSoC::parse, Token::closed_parenthesis), + pair(Token::tuple_open, WSoC::parse), + Sequence::parse_with_first_trailing, + pair(WSoC::parse, Token::tuple_close), ), )(input) .map(|(rest, expressions)| { @@ -103,11 +96,16 @@ mod test { ("( 1 ,)", 1), ("( 1 , 2 )", 2), ("( 1 , 2 ,)", 2), + ("( 1, 2, 3 )", 3), + ("(1,2,3,)", 3), ]; for (input, expected) in test { let parser_input = ParserInput::new(input, ParserState::default()); let result = all_consuming(Tuple::parse)(parser_input); + if result.is_err() { + dbg!(&result); + } assert!(result.is_ok()); diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 4ae0dbdeb..4eb5ed5ec 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -1,12 +1,15 @@ //! This module defines [Program]. +use ascii_tree::write_tree; + use nom::{ combinator::opt, multi::many0, - sequence::{delimited, pair, preceded}, + sequence::{delimited, pair}, }; use crate::parser::{ + ast::ast_to_ascii_tree, context::{context, ParserContext}, error::{recover, report_error}, input::ParserInput, @@ -29,7 +32,7 @@ pub struct Program<'a> { /// Top level comment comment: Option>, /// Statements - statements: Vec>, + statements: Vec>>, } impl<'a> Program<'a> { @@ -40,9 +43,16 @@ impl<'a> Program<'a> { } /// Return an iterator of statements in the program. - pub fn statements(&self) -> impl Iterator> { + pub fn statements(&self) -> impl Iterator>> { self.statements.iter() } + + /// Return a formatted ascii tree to pretty print the AST + pub fn ascii_tree(&self) -> String { + let mut output = String::new(); + write_tree(&mut output, &ast_to_ascii_tree(self)).unwrap(); + format!("{output}") + } } const CONTEXT: ParserContext = ParserContext::Program; @@ -57,7 +67,11 @@ impl<'a> ProgramAST<'a> for Program<'a> { } for statement in self.statements() { - result.push(statement); + if let Some(s) = statement { + result.push(s); + } else { + result.push(statement); + } } result @@ -77,25 +91,29 @@ impl<'a> ProgramAST<'a> for Program<'a> { CONTEXT, pair( opt(TopLevelComment::parse), - delimited( + many0(delimited( WSoC::parse, - many0(preceded( - WSoC::parse, - recover(report_error(Statement::parse)), - )), + recover(report_error(Statement::parse)), WSoC::parse, - ), + )), ), )(input) .map(|(rest, (comment, statements))| { let rest_span = rest.span; + // TODO: Remove if debug info is no longer needed + if !rest_span.0.is_empty() { + println!( + "\x1b[91mUNPARSED INPUT:\x1b[0m {:?}\n", + rest.span.0.fragment() + ); + } ( rest, Self { span: input_span.until_rest(&rest_span), comment, - statements: statements.into_iter().flatten().collect(), + statements, }, ) }) @@ -106,6 +124,12 @@ impl<'a> ProgramAST<'a> for Program<'a> { } } +impl std::fmt::Display for Program<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.ascii_tree()) + } +} + #[cfg(test)] mod test { use nom::combinator::all_consuming; diff --git a/nemo/src/parser/ast/rule.rs b/nemo/src/parser/ast/rule.rs index 8e2008329..d648d9494 100644 --- a/nemo/src/parser/ast/rule.rs +++ b/nemo/src/parser/ast/rule.rs @@ -13,8 +13,8 @@ use crate::parser::{ }; use super::{ - attribute::Attribute, comment::wsoc::WSoC, expression::Expression, - sequence::simple::ExpressionSequenceSimple, token::Token, ProgramAST, + attribute::Attribute, comment::wsoc::WSoC, expression::Expression, sequence::Sequence, + token::Token, ProgramAST, }; /// A rule describing a logical implication @@ -27,9 +27,9 @@ pub struct Rule<'a> { attributes: Vec>, /// Head of the rule - head: ExpressionSequenceSimple<'a>, + head: Sequence<'a, Expression<'a>>, /// Body of the rule, - body: ExpressionSequenceSimple<'a>, + body: Sequence<'a, Expression<'a>>, } impl<'a> Rule<'a> { @@ -77,9 +77,9 @@ impl<'a> ProgramAST<'a> for Rule<'a> { tuple(( many0(Attribute::parse), (separated_pair( - ExpressionSequenceSimple::parse, - tuple((WSoC::parse, Token::arrow, WSoC::parse)), - ExpressionSequenceSimple::parse, + Sequence::::parse, + tuple((WSoC::parse, Token::rule_arrow, WSoC::parse)), + Sequence::::parse, )), )), )(input) diff --git a/nemo/src/parser/ast/sequence.rs b/nemo/src/parser/ast/sequence.rs index f7ffe5252..24b233055 100644 --- a/nemo/src/parser/ast/sequence.rs +++ b/nemo/src/parser/ast/sequence.rs @@ -4,3 +4,176 @@ pub mod declare; pub mod key_value; pub mod one; pub mod simple; + +use std::vec::IntoIter; + +use nom::{ + combinator::opt, + multi::separated_list1, + sequence::{terminated, tuple}, +}; + +use crate::parser::{ + ast::{comment::wsoc::WSoC, token::Token, ProgramAST}, + context::ParserContext, + input::ParserInput, + span::Span, + ParserResult, +}; + +const CONTEXT: ParserContext = ParserContext::Sequence; + +/// Sequence of comma-delimited AST nodes. +#[derive(Debug, Clone)] +pub struct Sequence<'a, T> { + /// [Span] associated with this sequence + _span: Span<'a>, + + /// The elements of the sequence + elements: Vec, +} + +impl<'a, T> Sequence<'a, T> { + /// Return an iterator over the elements. + pub fn iter(&self) -> impl Iterator { + self.into_iter() + } +} + +impl<'a, T: ProgramAST<'a> + 'a> Sequence<'a, T> { + /// Parse one element with a trailing [SequenceSeparator](crate::parser::ast::token::TokenKind::SequenceSeparator) + pub fn parse_first_trailing(input: ParserInput<'a>) -> ParserResult<'a, Self> { + let input_span = input.span; + tuple((T::parse, WSoC::parse, Token::seq_sep))(input).map(|(rest, (t, _, _))| { + let rest_span = rest.span; + ( + rest, + Self { + _span: input_span.until_rest(&rest_span), + elements: vec![t], + }, + ) + }) + } + + /// Parse a sequence, where the first element must have a trailing separator and after + /// that zero or more elements. + pub fn parse_with_first_trailing(input: ParserInput<'a>) -> ParserResult<'a, Self> { + let input_span = input.span; + tuple((Self::parse_first_trailing, WSoC::parse, Self::parse))(input).map( + |(rest, (first, _, rest_seq))| { + let rest_span = rest.span; + let mut first_vec = first.elements; + let mut rest_vec = rest_seq.elements; + first_vec.append(&mut rest_vec); + ( + rest, + Self { + _span: input_span.until_rest(&rest_span), + elements: first_vec, + }, + ) + }, + ) + } + + /// The same as [Self::parse], but it must return at least one element. + pub fn parse1(input: ParserInput<'a>) -> ParserResult<'a, Self> { + let input_span = input.span; + terminated( + separated_list1(tuple((WSoC::parse, Token::seq_sep, WSoC::parse)), T::parse), + opt(tuple((WSoC::parse, Token::seq_sep, WSoC::parse))), + )(input) + .map(|(rest, vec)| { + let rest_span = rest.span; + ( + rest, + Sequence { + _span: input_span.until_rest(&rest_span), + elements: vec, + }, + ) + }) + } +} + +impl<'a, T: std::fmt::Debug + Sync + ProgramAST<'a>> ProgramAST<'a> for Sequence<'a, T> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST> { + let mut vec: Vec<&dyn ProgramAST> = Vec::new(); + for elem in &self.elements { + vec.push(elem); + } + vec + } + + fn span(&self) -> Span<'a> { + self._span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + opt(terminated( + separated_list1(tuple((WSoC::parse, Token::seq_sep, WSoC::parse)), T::parse), + opt(tuple((WSoC::parse, Token::seq_sep, WSoC::parse))), + ))(input) + .map(|(rest, vec)| { + let rest_span = rest.span; + ( + rest, + Sequence { + _span: input_span.until_rest(&rest_span), + elements: vec.unwrap_or(Vec::new()), + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +impl<'a, 'b, T> IntoIterator for &'b Sequence<'a, T> { + type Item = &'b T; + type IntoIter = std::slice::Iter<'b, T>; + + fn into_iter(self) -> Self::IntoIter { + self.elements.iter() + } +} + +impl<'a, T> IntoIterator for Sequence<'a, T> { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.elements.into_iter() + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ast::expression::basic::number::Number, ParserState}; + + use super::*; + + #[test] + fn with_trailing() { + let test = [ + "1,", "1 ,", "1,2", "1 ,2", "1, 2", "1 , 2", "1,2,", "1 ,2,", "1, 2,", "1 ,2 ,", + "1 , 2,", "1 , 2 ,", + ]; + + for input in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Sequence::::parse_with_first_trailing)(parser_input); + + assert!(result.is_ok()); + } + } +} diff --git a/nemo/src/parser/ast/sequence/declare.rs b/nemo/src/parser/ast/sequence/declare.rs index c6be8b6b1..0fe376ffd 100644 --- a/nemo/src/parser/ast/sequence/declare.rs +++ b/nemo/src/parser/ast/sequence/declare.rs @@ -1,11 +1,6 @@ //! This module defines [DeclareSequence]. -use std::vec::IntoIter; - -use nom::{ - multi::separated_list1, - sequence::{separated_pair, tuple}, -}; +use nom::sequence::{separated_pair, tuple}; use crate::parser::{ ast::{ @@ -14,75 +9,56 @@ use crate::parser::{ token::Token, ProgramAST, }, + context::ParserContext, input::ParserInput, span::Span, ParserResult, }; -/// Sequence of name-type declarations -#[derive(Debug)] -pub struct DeclareSequence<'a> { - /// [ProgramSpan] associated with this sequence - _span: Span<'a>, +const CONTEXT: ParserContext = ParserContext::DeclareNameTypePair; - /// List of name-type pairs - pairs: Vec<(ParameterName<'a>, DataTypeTag<'a>)>, +/// A pair of a name and a data type. +#[derive(Debug, Clone)] +pub struct NameTypePair<'a> { + _span: Span<'a>, + name: ParameterName<'a>, + datatype: DataTypeTag<'a>, } -impl<'a> DeclareSequence<'a> { - /// Return an iterator over the name-type pairs. - pub fn iter(&self) -> impl Iterator, DataTypeTag<'a>)> { - self.into_iter() +impl<'a> ProgramAST<'a> for NameTypePair<'a> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST> { + vec![&self.name, &self.datatype] } - /// Parse a single name-type pair - fn parse_name_type_pair( - input: ParserInput<'a>, - ) -> ParserResult<'a, (ParameterName<'a>, DataTypeTag<'a>)> { - separated_pair( - ParameterName::parse, - tuple((WSoC::parse, Token::colon, WSoC::parse)), - DataTypeTag::parse, - )(input) + fn span(&self) -> Span<'a> { + self._span } - /// Parse a comma separated list of [Expression]s. - pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { let input_span = input.span; - - separated_list1( - tuple((WSoC::parse, Token::comma, WSoC::parse)), - Self::parse_name_type_pair, + separated_pair( + ParameterName::parse, + tuple((WSoC::parse, Token::name_datatype_separator, WSoC::parse)), + DataTypeTag::parse, )(input) - .map(|(rest, pairs)| { + .map(|(rest, (name, datatype))| { let rest_span = rest.span; - ( rest, - Self { + NameTypePair { _span: input_span.until_rest(&rest_span), - pairs, + name, + datatype, }, ) }) } -} - -impl<'a, 'b> IntoIterator for &'b DeclareSequence<'a> { - type Item = &'b (ParameterName<'a>, DataTypeTag<'a>); - type IntoIter = std::slice::Iter<'b, (ParameterName<'a>, DataTypeTag<'a>)>; - - fn into_iter(self) -> Self::IntoIter { - self.pairs.iter() - } -} - -impl<'a> IntoIterator for DeclareSequence<'a> { - type Item = (ParameterName<'a>, DataTypeTag<'a>); - type IntoIter = IntoIter<(ParameterName<'a>, DataTypeTag<'a>)>; - fn into_iter(self) -> Self::IntoIter { - self.pairs.into_iter() + fn context(&self) -> ParserContext { + CONTEXT } } @@ -92,7 +68,11 @@ mod test { use crate::{ parser::{ - ast::{sequence::declare::DeclareSequence, tag::parameter::Parameter}, + ast::{ + sequence::{declare::NameTypePair, Sequence}, + tag::parameter::Parameter, + ProgramAST, + }, input::ParserInput, ParserState, }, @@ -112,7 +92,7 @@ mod test { for (input, expected) in test { let parser_input = ParserInput::new(input, ParserState::default()); - let result = all_consuming(DeclareSequence::parse)(parser_input); + let result = all_consuming(Sequence::::parse)(parser_input); assert!(result.is_ok()); @@ -122,7 +102,10 @@ mod test { result .1 .into_iter() - .map(|(name, datatype)| (name.parameter().clone(), datatype.data_type())) + .map(|NameTypePair { name, datatype, .. }| ( + name.parameter().clone(), + datatype.data_type() + )) .collect::>() ); } diff --git a/nemo/src/parser/ast/sequence/key_value.rs b/nemo/src/parser/ast/sequence/key_value.rs index 68738397c..46e8069ae 100644 --- a/nemo/src/parser/ast/sequence/key_value.rs +++ b/nemo/src/parser/ast/sequence/key_value.rs @@ -1,83 +1,57 @@ //! This module defines [KeyValueSequence]. -use std::vec::IntoIter; - -use nom::{ - multi::separated_list0, - sequence::{separated_pair, tuple}, -}; +use nom::sequence::{separated_pair, tuple}; use crate::parser::{ ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, + context::ParserContext, input::ParserInput, span::Span, ParserResult, }; -/// Sequence of comma-delimited expressions +/// Pairs of Expressions, separated by [KEY_VALUE_ASSIGN][nemo_physical::datavalues::syntax::map::KEY_VALUE_ASSIGN] #[derive(Debug)] -pub struct KeyValueSequence<'a> { - /// [ProgramSpan] associated with this sequence - _span: Span<'a>, - - /// List of key-value pairs - expressions: Vec<(Expression<'a>, Expression<'a>)>, +pub struct KeyValuePair<'a> { + span: Span<'a>, + key: Expression<'a>, + value: Expression<'a>, } -impl<'a> KeyValueSequence<'a> { - /// Return an iterator over the [Expression] pairs. - pub fn iter(&self) -> impl Iterator, Expression<'a>)> { - self.into_iter() +impl<'a> ProgramAST<'a> for KeyValuePair<'a> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST> { + vec![&self.key, &self.value] } - /// Parse a single key-value pair - fn parse_key_value_pair( - input: ParserInput<'a>, - ) -> ParserResult<'a, (Expression<'a>, Expression<'a>)> { - separated_pair( - Expression::parse, - tuple((WSoC::parse, Token::colon, WSoC::parse)), - Expression::parse, - )(input) + fn span(&self) -> Span<'a> { + self.span } - /// Parse a comma separated list of [Expression]s. - pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { let input_span = input.span; - - separated_list0( - tuple((WSoC::parse, Token::comma, WSoC::parse)), - Self::parse_key_value_pair, + separated_pair( + Expression::parse, + tuple((WSoC::parse, Token::k_v_assignment, WSoC::parse)), + Expression::parse, )(input) - .map(|(rest, expressions)| { + .map(|(rest, (key, value))| { let rest_span = rest.span; - ( rest, - Self { - _span: input_span.until_rest(&rest_span), - expressions, + KeyValuePair { + span: input_span.until_rest(&rest_span), + key, + value, }, ) }) } -} - -impl<'a, 'b> IntoIterator for &'b KeyValueSequence<'a> { - type Item = &'b (Expression<'a>, Expression<'a>); - type IntoIter = std::slice::Iter<'b, (Expression<'a>, Expression<'a>)>; - - fn into_iter(self) -> Self::IntoIter { - self.expressions.iter() - } -} - -impl<'a> IntoIterator for KeyValueSequence<'a> { - type Item = (Expression<'a>, Expression<'a>); - type IntoIter = IntoIter<(Expression<'a>, Expression<'a>)>; - fn into_iter(self) -> Self::IntoIter { - self.expressions.into_iter() + fn context(&self) -> ParserContext { + ParserContext::KeyValuePair } } @@ -86,7 +60,12 @@ mod test { use nom::combinator::all_consuming; use crate::parser::{ - ast::sequence::key_value::KeyValueSequence, input::ParserInput, ParserState, + ast::{ + sequence::{key_value::KeyValuePair, Sequence}, + ProgramAST, + }, + input::ParserInput, + ParserState, }; #[test] @@ -103,7 +82,7 @@ mod test { for (input, expected) in test { let parser_input = ParserInput::new(input, ParserState::default()); - let result = all_consuming(KeyValueSequence::parse)(parser_input); + let result = all_consuming(Sequence::::parse)(parser_input); assert!(result.is_ok()); diff --git a/nemo/src/parser/ast/sequence/one.rs b/nemo/src/parser/ast/sequence/one.rs index d88f29772..a63901244 100644 --- a/nemo/src/parser/ast/sequence/one.rs +++ b/nemo/src/parser/ast/sequence/one.rs @@ -31,7 +31,7 @@ impl<'a> ExpressionSequenceOne<'a> { /// Parse a sequence of length one. fn parse_sequence_single(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { - tuple((Expression::parse, WSoC::parse, Token::comma))(input) + tuple((Expression::parse, WSoC::parse, Token::seq_sep))(input) .map(|(rest, (result, _, _))| (rest, result)) } @@ -39,9 +39,9 @@ impl<'a> ExpressionSequenceOne<'a> { fn parse_sequence(input: ParserInput<'a>) -> ParserResult<'a, Vec>> { tuple(( Expression::parse, - tuple((WSoC::parse, Token::comma, WSoC::parse)), + tuple((WSoC::parse, Token::seq_sep, WSoC::parse)), separated_list1( - tuple((WSoC::parse, Token::comma, WSoC::parse)), + tuple((WSoC::parse, Token::seq_sep, WSoC::parse)), Expression::parse, ), ))(input) diff --git a/nemo/src/parser/ast/sequence/simple.rs b/nemo/src/parser/ast/sequence/simple.rs index fbb4a4c8b..d6c8e86df 100644 --- a/nemo/src/parser/ast/sequence/simple.rs +++ b/nemo/src/parser/ast/sequence/simple.rs @@ -32,7 +32,7 @@ impl<'a> ExpressionSequenceSimple<'a> { let input_span = input.span; separated_list1( - tuple((WSoC::parse, Token::comma, WSoC::parse)), + tuple((WSoC::parse, Token::seq_sep, WSoC::parse)), Expression::parse, )(input) .map(|(rest, expressions)| { diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs index 4e73978f2..7e7a2220f 100644 --- a/nemo/src/parser/ast/statement.rs +++ b/nemo/src/parser/ast/statement.rs @@ -2,10 +2,10 @@ use nom::{ branch::alt, - character::complete::line_ending, combinator::{map, opt}, - sequence::{delimited, pair, terminated}, + sequence::{pair, terminated}, }; +use nom_locate::LocatedSpan; use crate::parser::{ context::{context, ParserContext}, @@ -103,12 +103,8 @@ impl<'a> ProgramAST<'a> for Statement<'a> { context( CONTEXT, pair( - opt(terminated(DocComment::parse, line_ending)), - delimited( - WSoC::parse, - StatementKind::parse, - pair(WSoC::parse, Token::dot), - ), + opt(DocComment::parse), + terminated(StatementKind::parse, pair(WSoC::parse, Token::dot)), ), )(input) .map(|(rest, (comment, statement))| { @@ -142,9 +138,12 @@ mod test { }; #[test] - fn parse_directive() { + fn parse_statement() { let test = vec![ - ("/// A fact \n a(1, 2) .", ParserContext::Expression), + ( + "/// A fact\n/// with a multiline doc comment. \n a(1, 2) .", + ParserContext::Expression, + ), ("/// A rule \n a(1, 2) :- b(2, 1) .", ParserContext::Rule), ( "/// A directive \n \t@declare a(_: int, _: int) .", @@ -163,3 +162,25 @@ mod test { } } } + +// TODO: Remove this when the debug error statement printing in the ast is no longer needed +impl<'a> ProgramAST<'a> for Option> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST> { + vec![] + } + + fn span(&self) -> Span<'a> { + Span(LocatedSpan::new("ERROR!")) + } + + fn parse(_input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + todo!() + } + + fn context(&self) -> ParserContext { + ParserContext::Statement + } +} diff --git a/nemo/src/parser/ast/tag/datatype.rs b/nemo/src/parser/ast/tag/datatype.rs index 98bfe5808..5e300c241 100644 --- a/nemo/src/parser/ast/tag/datatype.rs +++ b/nemo/src/parser/ast/tag/datatype.rs @@ -17,7 +17,7 @@ use crate::{ }; /// Tags that are used to identify operations -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct DataTypeTag<'a> { /// [ProgramSpan] associated with this node span: Span<'a>, diff --git a/nemo/src/parser/ast/tag/parameter.rs b/nemo/src/parser/ast/tag/parameter.rs index 0a5aa545b..0ad7c3474 100644 --- a/nemo/src/parser/ast/tag/parameter.rs +++ b/nemo/src/parser/ast/tag/parameter.rs @@ -20,7 +20,7 @@ pub enum Parameter { } /// Tags that are used to give names to certain objects -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ParameterName<'a> { /// [ProgramSpan] associated with this node span: Span<'a>, diff --git a/nemo/src/parser/ast/tag/structure.rs b/nemo/src/parser/ast/tag/structure.rs index 11001b15e..07c5ec14d 100644 --- a/nemo/src/parser/ast/tag/structure.rs +++ b/nemo/src/parser/ast/tag/structure.rs @@ -76,7 +76,7 @@ impl<'a> ProgramAST<'a> for StructureTag<'a> { CONTEXT, alt(( map( - separated_pair(Token::name, Token::double_colon, Token::name), + separated_pair(Token::name, Token::namespace_separator, Token::name), |(prefix, tag)| StructureTagKind::Prefixed { prefix, tag }, ), map(Token::name, StructureTagKind::Plain), diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 8b172af4a..beba27a8e 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -5,122 +5,146 @@ use enum_assoc::Assoc; use nom::{ branch::alt, - bytes::complete::{is_not, tag}, - character::complete::{alpha1, alphanumeric1, digit1, multispace1}, - combinator::{map, recognize}, + bytes::complete::{is_a, is_not, tag}, + character::complete::{alpha1, alphanumeric1, digit1, multispace1, space0, space1}, + combinator::{map, opt, recognize, verify}, multi::many0, sequence::pair, }; -use crate::parser::{ - context::{context, ParserContext}, - span::Span, - ParserInput, ParserResult, +use crate::{ + parser::{ + context::{context, ParserContext}, + span::Span, + ParserInput, ParserResult, + }, + syntax::{ + self, comment, + datavalues::{self, boolean, iri, map, string, tuple, RDF_DATATYPE_INDICATOR}, + directive, + expression::{aggregate, atom, operation, variable}, + operator, rule, + }, }; /// Enumeration of all accepted kinds of [Token]s #[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] #[func(pub fn name(&self) -> &'static str)] pub enum TokenKind { - /// Question mark, used to mark universal variables - #[assoc(name = "?")] - QuestionMark, - /// Exclamation mark, used to mark existential variables - #[assoc(name = "!")] - ExclamationMark, - /// Open parenthesis + /// Opening parenthesis for parenthesised arithmitic terms #[assoc(name = "(")] OpenParenthesis, - /// Closed parenthesis + /// Closing parenthesis for parenthesised arithmitic terms #[assoc(name = ")")] ClosedParenthesis, - /// Open bracket - #[assoc(name = "[")] - OpenBracket, - /// Closed bracket - #[assoc(name = "]")] - ClosedBracket, - /// Open brace - #[assoc(name = "{")] - OpenBrace, - /// Closed brace - #[assoc(name = "}")] - ClosedBrace, - /// Open Chevrons - #[assoc(name = "<")] - OpenChevrons, - /// Closed Chevrons - #[assoc(name = ">")] - ClosedChevrons, - /// Dot - #[assoc(name = ".")] - Dot, - /// Comma - #[assoc(name = ",")] - Comma, - /// Arrow, used to separate rules - #[assoc(name = ":-")] - Arrow, - /// Colon - #[assoc(name = ":")] - Colon, - /// Double Colon - #[assoc(name = "::")] - DoubleColon, - /// Semicolon + /// Opening delimiter for maps + #[assoc(name = map::OPEN)] + MapOpen, + /// Closing delimiter for maps + #[assoc(name = map::CLOSE)] + MapClose, + /// Opening delimiter for operations + #[assoc(name = operation::OPEN)] + OperationOpen, + /// Closing delimiter for operations + #[assoc(name = operation::CLOSE)] + OperationClose, + /// Opening delimiter for tuples + #[assoc(name = tuple::OPEN)] + TupleOpen, + /// Closing delimiter for tuples + #[assoc(name = tuple::CLOSE)] + TupleClose, + /// Semicolon, in aggregates? (for what?) #[assoc(name = ";")] Semicolon, - /// Greater than - #[assoc(name = ">")] + /// [UNIVERSAL_INDICATOR](variable::UNIVERSAL_INDICATOR), used to mark universal variables + #[assoc(name = variable::UNIVERSAL_INDICATOR)] + UniversalIndicator, + /// [EXISTENTIAL_INDICATOR](variable::EXISTENTIAL_INDICATOR), used to mark existential variables + #[assoc(name = variable::EXISTENTIAL_INDICATOR)] + ExistentialIndicator, + /// Opening delimiter for term sequence of atoms + #[assoc(name = atom::OPEN)] + AtomOpen, + /// Closing delimiter for term sequence of atoms + #[assoc(name = atom::CLOSE)] + AtomClose, + /// Opening delimiter for IRIs + #[assoc(name = iri::OPEN)] + IriOpen, + /// Closing delimiter for IRIs + #[assoc(name = iri::CLOSE)] + IriClose, + /// Separator for namespaces as defined in [NAMESPACE_SEPARATOR](directive::NAMESPACE_SEPARATOR) + #[assoc(name = directive::NAMESPACE_SEPARATOR)] + NamespaceSeparator, + /// Sequence separator as defined in [SEQUENCE_SEPARATOR](syntax::SEQUENCE_SEPARATOR) + #[assoc(name = syntax::SEQUENCE_SEPARATOR)] + SequenceSeparator, + /// Map key value assignment as defined in [KEY_VALUE_ASSIGN](map::KEY_VALUE_ASSIGN) + #[assoc(name = map::KEY_VALUE_ASSIGN)] + KeyValueAssignment, + /// Arrow, used to separate rules as defined in [ARROW](rule::ARROW) + #[assoc(name = rule::ARROW)] + RuleArrow, + /// Greater than as defined in [GREATER](operator::GREATER) + #[assoc(name = operator::GREATER)] Greater, - /// Greater than or equal - #[assoc(name = ">=")] + /// Greater than or equal as defined in [GREATER_EQUAL](operator::GREATER_EQUAL) + #[assoc(name = operator::GREATER_EQUAL)] GreaterEqual, - /// Less than - #[assoc(name = "<")] + /// Less than as defined in [LESS](operator::LESS) + #[assoc(name = operator::LESS)] Less, - /// Less than or equal - #[assoc(name = "<=")] + /// Less than or equal as defined in [LESS_EQUAL](operator::LESS_EQUAL) + #[assoc(name = operator::LESS_EQUAL)] LessEqual, - /// Equal - #[assoc(name = "=")] + /// Equal as defined in [EQUAL](operator::EQUAL) + #[assoc(name = operator::EQUAL)] Equal, - /// Unequal - #[assoc(name = "!=")] + /// Unequal as defined in [UNEQUAL](operator::UNEQUAL) + #[assoc(name = operator::UNEQUAL)] Unequal, /// Tilde, used for negation - #[assoc(name = "~")] - Tilde, + #[assoc(name = atom::NEG)] + Neg, /// Double caret - #[assoc(name = "^^")] - DoubleCaret, - /// Hash, used in front of aggregates - #[assoc(name = "#")] - Hash, - /// Underscore, used for anonymous variables - #[assoc(name = "_")] - Underscore, - /// At, used to indicate directives - #[assoc(name = "@")] - At, - /// Plus - #[assoc(name = "+")] + #[assoc(name = RDF_DATATYPE_INDICATOR)] + RdfDatatypeIndicator, + /// Hash, used in front of aggregates as defined in [INDICATOR](aggregate::INDICATOR) + #[assoc(name = aggregate::INDICATOR)] + AggregateIndicator, + /// Aggregate open + #[assoc(name = aggregate::OPEN)] + AggregateOpen, + /// Aggregate close + #[assoc(name = aggregate::CLOSE)] + AggregateClose, + /// Underscore, used for anonymous values as defined in [ANONYMOUS](datavalues::ANONYMOUS) + #[assoc(name = datavalues::ANONYMOUS)] + AnonVal, + /// Plus as defined in [PLUS](operator::PLUS) + #[assoc(name = operator::PLUS)] Plus, - /// Minus - #[assoc(name = "-")] + /// Minus as defined in [MINUS](operator::MINUS) + #[assoc(name = operator::MINUS)] Minus, - /// Star - #[assoc(name = "*")] - Star, - /// Division - #[assoc(name = "/")] + /// Star as defined in [MUL](operator::MUL) + #[assoc(name = operator::MUL)] + Multiplication, + /// Division as defined in [DIV](operator::DIV) + #[assoc(name = operator::DIV)] Division, /// True - #[assoc(name = "true")] + #[assoc(name = boolean::TRUE)] True, /// False - #[assoc(name = "false")] + #[assoc(name = boolean::FALSE)] False, + /// Dot for numbers + #[assoc(name = datavalues::DOT)] + Dot, /// Quote #[assoc(name = "\"")] Quote, @@ -151,40 +175,65 @@ pub enum TokenKind { /// String #[assoc(name = "string")] String, - /// Token marking a normal comment - #[assoc(name = "//")] + /// Token marking language tag + #[assoc(name = string::LANG_TAG)] + LangTagIndicator, + /// Token marking a normal comment as defined in [COMMENT](comment::COMMENT) + #[assoc(name = comment::COMMENT)] Comment, - /// Token marking the beginning of a closed comment - #[assoc(name = "/*")] + /// Token marking the beginning of a closed comment (can be multiple lines) as defined + /// in [CLOSED_OPEN](comment::CLOSED_OPEN) + #[assoc(name = comment::CLOSED_OPEN)] OpenComment, - /// Token marking the beginning of a closed comment - #[assoc(name = "*/")] + /// Token marking the beginning of a closed comment (can be multiple lines) as defined + /// in [CLOSED_CLOSE](comment::CLOSED_CLOSE) + #[assoc(name = comment::CLOSED_CLOSE)] CloseComment, - /// Token marking a doc comment attached to e.g. a rule - #[assoc(name = "///")] + /// Token marking a doc comment attached to e.g. a rule as defined in [DOC_COMMENT](comment::DOC_COMMENT) + #[assoc(name = comment::DOC_COMMENT)] DocComment, - /// Token marking the top level comment - #[assoc(name = "//!")] + /// Token marking the top level comment as defined in [TOP_LEVEL](comment::TOP_LEVEL) + #[assoc(name = comment::TOP_LEVEL)] TopLevelComment, + /// Directive keyword indicator as defined in [INDICATOR_TOKEN](directive::INDICATOR_TOKEN) + #[assoc(name = directive::INDICATOR_TOKEN)] + DirectiveIndicator, /// Token for the base directive - #[assoc(name = "base")] + #[assoc(name = directive::BASE)] BaseDirective, /// Token for the declare directive - #[assoc(name = "declare")] + #[assoc(name = directive::DECLARE)] DeclareDirective, /// Token for the export directive - #[assoc(name = "export")] + #[assoc(name = directive::EXPORT)] ExportDirective, /// Token for the import directive - #[assoc(name = "import")] + #[assoc(name = directive::IMPORT)] ImportDirective, /// Token for the output directive - #[assoc(name = "output")] + #[assoc(name = directive::OUTPUT)] OutputDirective, /// Token for the prefix directive - #[assoc(name = "prefix")] + #[assoc(name = directive::PREFIX)] PrefixDirective, - /// White spaces + /// Token for the import assignment + #[assoc(name = directive::IMPORT_ASSIGNMENT)] + ImportAssignment, + /// Token for the export assignment + #[assoc(name = directive::EXPORT_ASSIGNMENT)] + ExportAssignment, + #[assoc(name = directive::NAME_DATATYPE_SEPARATOR)] + NameDatatypeSeparator, + /// Opening token for attributes + #[assoc(name = rule::OPEN_ATTRIBUTE)] + OpenAttribute, + /// Closing token for attributes + #[assoc(name = rule::CLOSE_ATTRIBUTE)] + CloseAttribute, + /// Space (space, tab) + #[assoc(name = "space")] + Space, + /// White spaces (space, tab, newlines) #[assoc(name = "whitespace")] Whitespace, /// Double new line @@ -301,6 +350,32 @@ impl<'a> Token<'a> { ) } + /// Parse [TokenKind::Space], zero or more + pub fn space0(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context(ParserContext::token(TokenKind::Space), space0)(input).map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::Space, + }, + ) + }) + } + + /// Parse [TokenKind::Space], one or more + pub fn space1(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context(ParserContext::token(TokenKind::Space), space1)(input).map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::Space, + }, + ) + }) + } + /// Parse [TokenKind::Whitespace]. pub fn whitespace(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { context(ParserContext::token(TokenKind::Whitespace), multispace1)(input).map( @@ -333,6 +408,143 @@ impl<'a> Token<'a> { }) } + pub fn directive_base(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context( + ParserContext::token(TokenKind::BaseDirective), + // This should get parsed like this and not with `tag("base")`, because + // @baseerror would get matched and rest would be "error" and that will cause an + // error. The desired behaviour is, that "baseerror" gets matched as a whole and + // produces an [UnknownDirective]. + verify(Self::name, |tag| *tag.span.0.fragment() == directive::BASE), + )(input) + .map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::BaseDirective, + }, + ) + }) + } + + pub fn directive_declare(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context( + ParserContext::token(TokenKind::DeclareDirective), + // The reasoning behind using `verify` is the same as in the `directive_base` function. + verify(Self::name, |tag| { + *tag.span.0.fragment() == directive::DECLARE + }), + )(input) + .map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::DeclareDirective, + }, + ) + }) + } + pub fn directive_export(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context( + ParserContext::token(TokenKind::ExportDirective), + // The reasoning behind using `verify` is the same as in the `directive_base` function. + verify(Self::name, |tag| { + *tag.span.0.fragment() == directive::EXPORT + }), + )(input) + .map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::ExportDirective, + }, + ) + }) + } + pub fn directive_import(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context( + ParserContext::token(TokenKind::ImportDirective), + // The reasoning behind using `verify` is the same as in the `directive_base` function. + verify(Self::name, |tag| { + *tag.span.0.fragment() == directive::IMPORT + }), + )(input) + .map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::ImportDirective, + }, + ) + }) + } + pub fn directive_output(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context( + ParserContext::token(TokenKind::OutputDirective), + // The reasoning behind using `verify` is the same as in the `directive_base` function. + verify(Self::name, |tag| { + *tag.span.0.fragment() == directive::OUTPUT + }), + )(input) + .map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::OutputDirective, + }, + ) + }) + } + pub fn directive_prefix(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + context( + ParserContext::token(TokenKind::PrefixDirective), + // The reasoning behind using `verify` is the same as in the `directive_base` function. + verify(Self::name, |tag| { + *tag.span.0.fragment() == directive::PREFIX + }), + )(input) + .map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::PrefixDirective, + }, + ) + }) + } + + pub fn comment(input: ParserInput<'a>) -> ParserResult<'a, Token> { + context( + ParserContext::token(TokenKind::Comment), + verify( + alt(( + recognize(pair( + tag(comment::COMMENT_LONG), + opt(is_a(comment::COMMENT_EXT)), + )), + tag(comment::DOC_COMMENT), + tag(comment::COMMENT), + )), + |result: &ParserInput| *result.span.0.fragment() != comment::DOC_COMMENT, + ), + )(input) + .map(|(rest, result)| { + ( + rest, + Token { + span: result.span, + kind: TokenKind::Comment, + }, + ) + }) + } + /// Create [TokenKind::Error]. pub fn error(span: Span<'a>) -> Token<'a> { Token { @@ -341,21 +553,14 @@ impl<'a> Token<'a> { } } + string_token!(directive_indicator, TokenKind::DirectiveIndicator); string_token!(open_parenthesis, TokenKind::OpenParenthesis); string_token!(closed_parenthesis, TokenKind::ClosedParenthesis); - string_token!(open_brace, TokenKind::OpenBrace); - string_token!(closed_brace, TokenKind::ClosedBrace); - string_token!(open_chevrons, TokenKind::OpenChevrons); - string_token!(closed_chevrons, TokenKind::ClosedChevrons); - string_token!(open_bracket, TokenKind::OpenBracket); - string_token!(closed_bracket, TokenKind::ClosedBracket); - string_token!(question_mark, TokenKind::QuestionMark); - string_token!(exclamation_mark, TokenKind::ExclamationMark); + string_token!(open_iri, TokenKind::IriOpen); + string_token!(close_iri, TokenKind::IriClose); string_token!(dot, TokenKind::Dot); - string_token!(comma, TokenKind::Comma); - string_token!(arrow, TokenKind::Arrow); - string_token!(colon, TokenKind::Colon); - string_token!(double_colon, TokenKind::DoubleColon); + string_token!(seq_sep, TokenKind::SequenceSeparator); + string_token!(arrow, TokenKind::RuleArrow); string_token!(semicolon, TokenKind::Semicolon); string_token!(greater, TokenKind::Greater); string_token!(greater_equal, TokenKind::GreaterEqual); @@ -363,18 +568,18 @@ impl<'a> Token<'a> { string_token!(less_equal, TokenKind::LessEqual); string_token!(equal, TokenKind::Equal); string_token!(unequal, TokenKind::Unequal); - string_token!(tilde, TokenKind::Tilde); - string_token!(double_caret, TokenKind::DoubleCaret); - string_token!(hash, TokenKind::Hash); - string_token!(underscore, TokenKind::Underscore); - string_token!(at, TokenKind::At); + string_token!(tilde, TokenKind::Neg); + string_token!(double_caret, TokenKind::RdfDatatypeIndicator); + string_token!(aggregate_indicator, TokenKind::AggregateIndicator); + string_token!(aggregate_open, TokenKind::AggregateOpen); + string_token!(aggregate_close, TokenKind::AggregateClose); + string_token!(underscore, TokenKind::AnonVal); string_token!(plus, TokenKind::Plus); string_token!(minus, TokenKind::Minus); - string_token!(star, TokenKind::Star); + string_token!(star, TokenKind::Multiplication); string_token!(division, TokenKind::Division); string_token!(boolean_true, TokenKind::True); string_token!(boolean_false, TokenKind::False); - string_token!(comment, TokenKind::Comment); string_token!(open_comment, TokenKind::OpenComment); string_token!(close_comment, TokenKind::CloseComment); string_token!(doc_comment, TokenKind::DocComment); @@ -385,10 +590,54 @@ impl<'a> Token<'a> { string_token!(exponent_upper, TokenKind::ExponentUpper); string_token!(type_marker_double, TokenKind::TypeMarkerDouble); string_token!(type_marker_float, TokenKind::TypeMarkerFloat); - string_token!(directive_base, TokenKind::BaseDirective); - string_token!(directive_declare, TokenKind::DeclareDirective); - string_token!(directive_export, TokenKind::ExportDirective); - string_token!(directive_import, TokenKind::ImportDirective); - string_token!(directive_output, TokenKind::OutputDirective); - string_token!(directive_prefix, TokenKind::PrefixDirective); + string_token!(import_assignment, TokenKind::ImportAssignment); + string_token!(export_assignment, TokenKind::ExportAssignment); + string_token!(k_v_assignment, TokenKind::KeyValueAssignment); + string_token!(atom_open, TokenKind::AtomOpen); + string_token!(atom_close, TokenKind::AtomClose); + string_token!(map_open, TokenKind::MapOpen); + string_token!(map_close, TokenKind::MapClose); + string_token!(operation_open, TokenKind::OperationOpen); + string_token!(operation_close, TokenKind::OperationClose); + string_token!(tuple_open, TokenKind::TupleOpen); + string_token!(tuple_close, TokenKind::TupleClose); + string_token!(namespace_separator, TokenKind::NamespaceSeparator); + string_token!(open_attribute, TokenKind::OpenAttribute); + string_token!(close_attribute, TokenKind::CloseAttribute); + string_token!(rule_arrow, TokenKind::RuleArrow); + string_token!(universal_indicator, TokenKind::UniversalIndicator); + string_token!(existential_indicator, TokenKind::ExistentialIndicator); + string_token!(lang_tag_indicator, TokenKind::LangTagIndicator); + string_token!(name_datatype_separator, TokenKind::NameDatatypeSeparator); +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::ParserState; + + use super::*; + + #[test] + fn comment() { + let test = [ + ("/", Err("/")), + ("//", Ok("//")), + ("///", Err("///")), + ("////", Ok("////")), + ("/////", Ok("/////")), + ("///////////////", Ok("///////////////")), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Token::comment)(parser_input); + + match result { + Ok(_) => assert_eq!(result.is_ok(), expected.is_ok()), + Err(_) => assert_eq!(result.is_err(), expected.is_err()), + } + } + } } diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index 1ec938a2b..f8e74d9e3 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -62,17 +62,29 @@ pub enum ParserContext { #[assoc(name = "prefix directive")] Prefix, /// Unknown directive - #[assoc(name = "directive")] + #[assoc(name = "unknown directive")] UnknownDirective, + /// Name type pairs in declare directive + #[assoc(name = "name type pair")] + DeclareNameTypePair, /// Expression #[assoc(name = "expression")] Expression, + /// Parenthesised expression + #[assoc(name = "parenthesised expression")] + ParenthesisedExpression, /// Tuple #[assoc(name = "tuple")] Tuple, /// Map #[assoc(name = "map")] Map, + /// Key value pairs in maps + #[assoc(name = "key value pair")] + KeyValuePair, + /// Sequence + #[assoc(name = "sequence")] + Sequence, /// Arithmetic expression #[assoc(name = "arithmetic expression")] Arithmetic, @@ -98,7 +110,7 @@ pub enum ParserContext { #[assoc(name = "negation")] Negation, /// Infix - #[assoc(name = "expression")] // TODO: Is there a better name? + #[assoc(name = "expression")] // TODO: Is there a better name? -- "infix expression"? Infix, /// Comment #[assoc(name = "comment")] From 3a9e6e86c7fde5bc9d3b41622b711e677a3be784 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 30 Jul 2024 12:30:38 +0200 Subject: [PATCH 139/214] Change visibility --- nemo/src/io/lexer.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index eca3cff25..bb8d66398 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -1,4 +1,7 @@ //! Lexical tokenization of rulewerk-style rules. +// FIXME: temporary, because this module probably will get removed +#![allow(unused)] +#![allow(missing_docs)] use std::{cell::RefCell, ops::Range}; @@ -7,7 +10,7 @@ use nom::{ branch::alt, bytes::complete::{is_not, tag, take, take_till}, character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace0, multispace1}, - combinator::{all_consuming, cut, map, opt, recognize}, + combinator::{cut, map, opt, recognize}, error::ParseError, multi::{many0, many1}, sequence::{delimited, pair, tuple}, @@ -18,7 +21,7 @@ use nom_supreme::{context::ContextError, error::GenericErrorTree}; use tower_lsp::lsp_types::SymbolKind; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) enum Context { +pub enum Context { Tag(&'static str), Exponent, Punctuations, From a39e45bd44ebecf781f6fbeca91311a01cc22fe0 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 30 Jul 2024 12:31:31 +0200 Subject: [PATCH 140/214] Add debug printing of the AST --- nemo-cli/src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 54ee753fa..fb462f143 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -178,6 +178,7 @@ fn run(mut cli: CliApp) -> Result<(), Error> { std::process::exit(1); } }; + log::debug!("AST:\n{program_ast}"); let program = match rule_model::translation::ASTProgramTranslation::initialize( &rules_content, From 5efe1f78a765d4a5e1b9c687d43104140db6a884 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 30 Jul 2024 12:32:09 +0200 Subject: [PATCH 141/214] Add temporary test files --- testfile.rls | 17 +++-- testfile1.rls | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++ testfile2.rls | 6 +- testfile3.rls | 10 +++ 4 files changed, 198 insertions(+), 7 deletions(-) create mode 100644 testfile1.rls create mode 100644 testfile3.rls diff --git a/testfile.rls b/testfile.rls index 07e551c67..1df8ea366 100644 --- a/testfile.rls +++ b/testfile.rls @@ -1,10 +1,19 @@ +/// base directive @base . +/// prefix directive @prefix rdfs: . -@import sourceA :- csv { resource = "sources/dataA.csv" } . +///import directive +@import sourceA :- csv { resource : "sources/dataA.csv" } . +/// export directive @export a :- csv {} . -@output a . -% Facts: +/// output directive +@output a, b, c . +/// declare directive +@declare father(_:any, _:any). +@declare mother ( child : any , mother : any ) . + +// Facts: father(alice, bob). mother(bob, carla). father(bob, darius). @@ -14,7 +23,7 @@ mother(, ). father(, ). mother(, ). -% Rules: +// Rules: parent(?X, ?Y) :- mother(?X, ?Y). parent(?X, ?Y) :- father(?X, ?Y). parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . diff --git a/testfile1.rls b/testfile1.rls new file mode 100644 index 000000000..a2c0ef447 --- /dev/null +++ b/testfile1.rls @@ -0,0 +1,172 @@ +//!This ruleset normalises an EL ontology in OWL/RDF encoding. +//!Unsupported OWL EL features include: oneOf, allDisjoint. +//!The encoding used for property chains is also slightly antique. +//! +//!The ruleset computes facts for the following predicates: +//! nf:isMainClass(?C): ?C is an "interesting" class (not just an auxiliary class expression) +//! nf:isSubClass(?C): ?C occurs in a subclass position (i.e., negatively) +//! nf:conj(?C,?D1,?D2): ?C is the conjunction of ?D1 and ?D2 +//! nf:exists(?C,?P,?D): ?C is the existential restriction of property ?P with values from ?D +//! nf:subClassOf(?C,?D): ?C is syntactically specified to be a subclass of ?D +//! nf:subPropChain(?S1,?S2,?R): there was a role chain axiom ?S1 o ?S2 -> ?R +//! nf:subProp(?R,?S): ?R is a subproperty of ?S (directly or indirectly) +//! +//!Also, this program implements EL reasoning based on the (optimized) encoding described here: +//!https://link.springer.com/article/10.1007/s10817-013-9296-3 +//!The description of rules at the bottom reference the names from Figure 3. +//! +//!The reasoning is applied to the normalised ontologies. + +@prefix nf: . +@prefix inf: . +@prefix owl: . +@prefix rdf: . +@prefix sct: . +@prefix xsd: . +@prefix rdfs: . +@prefix genid: . + +@import TRIPLE :- rdf { resource : "galen-el.nt.gz" } . + +// ↓================ Errors ================↓ +// The constant names with prefixes don't get parsed properly +///Mark classes: +ClassObject(owl::someValuesFrom) . +ClassObject(rdf::first) . +ClassObject(rdfs::subClassOf) . +ClassObject(owl::equivalentClass) . +ClassSubject(rdfs::subClassOf) . +ClassSubject(owl::equivalentClass) . +// ↑================ Errors ================↑ +class(?O) :- TRIPLE(?X, ?P, ?O), ClassObject(?P) . +class(?X) :- TRIPLE(?X, ?P, ?O), ClassSubject(?P) . + +//Distinguish auxiliary class expressions from primary classes: + +// ↓================ Errors ================↓ +/// Mark auxiliary existential role restrictions: +synEx(?Y,?P,?X), auxClass(?X) :- TRIPLE(?X, owl:someValuesFrom, ?Y), TRIPLE(?X, owl:onProperty, ?P) . + +//Mark auxiliary conjunctions: + +next(?L1,?L2) :- TRIPLE(?L1,rdf:rest,?L2) . + first(?L1) :- TRIPLE(?X, owl:intersectionOf, ?L1) . +// ↑================ Errors ================↑ +nonfirst(?L2) :- first(?L1), next(?L1,?L2) . +nonfirst(?L2) :- nonfirst(?L1), next(?L1,?L2) . + last(?Ln) :- next(?Ln,rdf:nil) . + nonlast(?L) :- next(?L,?Ln), last(?Ln) . + nonlast(?L1) :- next(?L1,?L2), nonlast(?L2) . + in(?L,?C) :- TRIPLE(?L,rdf:first,?C) . + +//Mark conjunctions: +synConj(?X,?C1,?C2), auxClass(?X) :- + TRIPLE(?X, owl:intersectionOf, ?L1), next(?L1,?L2), last(?L2), in(?L1,?C1), in(?L2,?C2) . +synConj(?X,?C1,?L2), auxClass(?X) :- + TRIPLE(?X, owl:intersectionOf, ?L1), next(?L1,?L2), nonlast(?L2), in(?L1,?C1) . +synConj(?L1,?C1,?L2), auxClass(?L1) :- + nonfirst(?L1), next(?L1,?L2), nonlast(?L2), in(?L1,?C1) . +synConj(?L1,?C1,?C2), auxClass(?L1) :- + nonfirst(?L1), next(?L1,?L2), last(?L2), in(?L1,?C1), in(?L2,?C2) . + +//The other classes are "main classes" that are not normalised: +nf:isMainClass(?X) :- class(?X), ~auxClass(?X) . + +//Normalise auxiliary nested class expressions: + repOf(?X,?X) :- nf:isMainClass(?X) . //keep main classes unchanged + synExRep(?X,?P,?Rep) :- synEx(?Y,?P,?X), repOf(?Y,?Rep) . +nf:exists(!New,?P,?Rep) :- synExRep(?X,?P,?Rep) . + repOf(?X,?N) :- synExRep(?X,?P,?Rep), nf:exists(?N,?P,?Rep) . + // nf:exists(!New,?P,?Rep) :- synEx(?Y,?P,?X), repOf(?Y,?Rep) . + // repOf(?X,?N) :- synEx(?Y,?P), repOf(?Y,?Rep), nf:exists(?N,?P,?Rep) . +nf:conj(!New,?R1,?R2) :- synConj(?X,?C1,?C2), repOf(?C1,?R1), repOf(?C2,?R2) . + repOf(?X,?N) :- synConj(?X,?C1,?C2), repOf(?C1,?R1), repOf(?C2,?R2), nf:conj(?N,?R1,?R2) . + + +///Extract old-style property chains: +nf:subPropChain(?S,?T,?R), nf:subProp(?R,?R) :- + TRIPLE(?L,rdfs:subPropertyOf,?R), TRIPLE(?L,owl:propertyChain,?L1), + in(?L1,?S), next(?L1,?L2), in(?L2,?T) . + +///Initialise subsumption axioms: + prepareSco(?X,?Y) :- TRIPLE(?X, rdfs:subClassOf, ?Y) . + prepareSco(?X,?Y), prepareSco(?Y,?X) :- TRIPLE(?X, owl:equivalentClass, ?Y) . +nf:subClassOf(?RX,?RY), nf:isSubClass(?RX) :- prepareSco(?X,?Y), repOf(?X,?RX), repOf(?Y,?RY) . + +///Initialise disjointness: +nf:subClassOf(!C,owl:Nothing), nf:conj(!C,?X,?Y), nf:isSubClass(!C), nf:isSubClass(?X), nf:isSubClass(?Y) + :- TRIPLE(?X,owl:disjointWith,?Y) . + +///Mark classes in subclass position recursively: + nf:isSubClass(?D) :- nf:exists(?C, ?P, ?D), nf:isSubClass(?C) . +nf:isSubClass(?C1), nf:isSubClass(?C2) :- nf:conj(?X, ?C1, ?C2), nf:isSubClass(?X) . + +///Precompute role hierarchy: +directSubProp(?R,?S) :- TRIPLE(?R,rdfs:subPropertyOf,?S) . +//Initialise role hierarchy only for roles in subclass positions: +nf:subProp(?P,?P) :- nf:exists(?C,?P,?D), nf:isSubClass(?C) . +nf:subProp(?R,?T) :- nf:subProp(?R,?S), directSubProp(?S,?T) . + +//Inference rules + +//Start classification for all named classes +inf:init(?C) :- nf:isMainClass(?C) . +//R_init +inf:init(?C) :- inf:ex(?E, ?R, ?C) . + +//R_0: Every class is a sub class of itself +inf:subClassOf(?C, ?C) :- inf:init(?C) . +//R_\top: Every class is a sub class of owl:Thing +inf:subClassOf(?C, "") :- nf:isMainClass(?C) . + +//R_\sqcap^-: +// If ?C is contained in the intersection of ?D1 and ?D1, +// then ?C is contained in ?D1 and ?C is contained in ?D2. +inf:subClassOf(?C,?D1), inf:subClassOf(?C,?D2) :- inf:subClassOf(?C,?Y), nf:conj(?Y,?D1,?D2) . + +//R_\sqcap^+: +// If ?C is contained in ?D1 and ?D2, +// then ?C is contained in the intersection ?I of ?D1 ?D2 +inf:subClassOf(?C, ?I) :- + inf:subClassOf(?C, ?D1), inf:subClassOf(?C, ?D2), + nf:conj(?I, ?D1, ?D2), nf:isSubClass(?I) . + +//R_\exists^-: +// If every ?Y has an R-relation to a ?C +// and every ?E is a ?Y, +// then every ?E has an ?R-relation to a ?C +inf:ex(?E, ?R, ?C) :- inf:subClassOf(?E, ?Y), nf:exists(?Y, ?R, ?C) . + +//R_\exists^+: +// If every ?E has an ?R-relation to a ?C, +// and every ?C is a ?D, +// and ?R is a subproperty of ?S, +// then every ?E has an ?S-relation to a ?D +// (i.e. every ?E is an ?Y where ?Y is the class of +// of individuals that have an ?S-relation to a ?D) +inf:subClassOf(?E, ?Y) :- + inf:ex(?E, ?R, ?C), inf:subClassOf(?C, ?D), nf:subProp(?R, ?S), + nf:exists(?Y, ?S, ?D), nf:isSubClass(?Y) . + +//R_\circ: +// If ?E has an ?R1-relation to a ?C and ?C has an ?R2-relation to a ?D, +// and ?R1 is a subproperty of ?S1 and ?R2 is a subproperty of ?S2 +// then ?E has an ?S-relation to a ?D +inf:ex(?E, ?S, ?D) :- + inf:ex(?E, ?R1, ?C), inf:ex(?C, ?R2, ?D), + nf:subProp(?R1, ?S1), nf:subProp(?R2, ?S2), + nf:subPropChain(?S1, ?S2, ?S) . + +//R_\sqsubseteq: Transitive closure of the subclass-of relation +inf:subClassOf(?C,?E) :- inf:subClassOf(?C,?D), nf:subClassOf(?D,?E) . + +//R_\bot: If every ?E has an R-relation to a ?C but ?C is empty, then ?E is also empty +inf:subClassOf(?E, "") :- + inf:ex(?E,?R,?C), inf:subClassOf(?C,"") . + +///Extract final results for main classes + +mainSubClassOf(?A,?B) :- + inf:subClassOf(?A,?B), nf:isMainClass(?A), nf:isMainClass(?B) . + +@export mainSubClassOf :- csv{compression:"gzip"} . diff --git a/testfile2.rls b/testfile2.rls index 95963d67b..e576fb8d5 100644 --- a/testfile2.rls +++ b/testfile2.rls @@ -1,12 +1,12 @@ -% Facts: -father( % father predicate means, that 'alice has father bob' +// Facts: +father( // father predicate means, that 'alice has father bob' alice, bob). mother bob, carla). father(bob darius). mother(alice, carla . -% Rules: +// Rules: parent(?X, ?Y) :- mother(?X, ?Y) parent(?X, ?Y) :- father(?X, ?Y). parent( ?X , ?Y :- ~sibling( ?X , ?Y ) . diff --git a/testfile3.rls b/testfile3.rls new file mode 100644 index 000000000..e72804564 --- /dev/null +++ b/testfile3.rls @@ -0,0 +1,10 @@ +//! This is just a testfile. +//! This testfile is used to check the behaviour of the parser. + +//// normal comment. + +/// @base . +/// @basetest . +#[whohoo(my, first, attribute)] +#[but(can, it, parse, 2)] // this attribute is only ther for testing purposes +head(?x) :- body(?x, constant). From d81993a40977164bdd362d6804267d65413eb72b Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 31 Jul 2024 11:12:33 +0200 Subject: [PATCH 142/214] Translation ast to logical --- nemo-physical/src/datavalues/any_datavalue.rs | 14 +- nemo-physical/src/datavalues/errors.rs | 8 +- nemo/src/parser/ast/directive/prefix.rs | 5 + nemo/src/parser/ast/directive/unknown.rs | 5 + .../src/parser/ast/expression/basic/number.rs | 98 +++++- .../ast/expression/complex/aggregation.rs | 5 + nemo/src/parser/ast/program.rs | 2 +- nemo/src/parser/ast/tag/aggregation.rs | 5 + nemo/src/parser/span.rs | 16 + nemo/src/rule_model/components/atom.rs | 16 +- nemo/src/rule_model/components/fact.rs | 12 +- .../components/import_export/file_formats.rs | 12 +- nemo/src/rule_model/components/term.rs | 22 +- .../rule_model/components/term/aggregate.rs | 35 +- .../rule_model/components/term/function.rs | 11 +- nemo/src/rule_model/components/term/map.rs | 24 +- nemo/src/rule_model/error.rs | 27 +- nemo/src/rule_model/error/hint.rs | 8 + nemo/src/rule_model/error/hint/similar.rs | 10 + nemo/src/rule_model/error/info.rs | 19 ++ .../src/rule_model/error/translation_error.rs | 79 ++++- nemo/src/rule_model/program.rs | 301 ++---------------- nemo/src/rule_model/translation.rs | 172 +++------- nemo/src/rule_model/translation/basic.rs | 9 + .../src/rule_model/translation/basic/blank.rs | 20 ++ .../rule_model/translation/basic/boolean.rs | 23 ++ .../rule_model/translation/basic/constant.rs | 20 ++ .../rule_model/translation/basic/number.rs | 51 +++ nemo/src/rule_model/translation/basic/rdf.rs | 25 ++ .../rule_model/translation/basic/string.rs | 24 ++ .../rule_model/translation/basic/variable.rs | 51 +++ nemo/src/rule_model/translation/complex.rs | 9 + .../translation/complex/aggregation.rs | 42 +++ .../translation/complex/arithmetic.rs | 38 +++ .../translation/complex/function.rs | 23 ++ .../rule_model/translation/complex/infix.rs | 41 +++ .../src/rule_model/translation/complex/map.rs | 28 ++ .../translation/complex/operation.rs | 23 ++ .../rule_model/translation/complex/tuple.rs | 22 ++ nemo/src/rule_model/translation/directive.rs | 46 +++ .../rule_model/translation/directive/base.rs | 35 ++ .../translation/directive/declare.rs | 22 ++ .../translation/directive/import_export.rs | 132 ++++++++ .../translation/directive/output.rs | 23 ++ .../translation/directive/prefix.rs | 42 +++ .../translation/directive/unknown.rs | 22 ++ nemo/src/rule_model/translation/rule.rs | 172 ++++++++++ 47 files changed, 1393 insertions(+), 456 deletions(-) create mode 100644 nemo/src/rule_model/error/hint/similar.rs create mode 100644 nemo/src/rule_model/error/info.rs create mode 100644 nemo/src/rule_model/translation/basic.rs create mode 100644 nemo/src/rule_model/translation/basic/blank.rs create mode 100644 nemo/src/rule_model/translation/basic/boolean.rs create mode 100644 nemo/src/rule_model/translation/basic/constant.rs create mode 100644 nemo/src/rule_model/translation/basic/number.rs create mode 100644 nemo/src/rule_model/translation/basic/rdf.rs create mode 100644 nemo/src/rule_model/translation/basic/string.rs create mode 100644 nemo/src/rule_model/translation/basic/variable.rs create mode 100644 nemo/src/rule_model/translation/complex.rs create mode 100644 nemo/src/rule_model/translation/complex/aggregation.rs create mode 100644 nemo/src/rule_model/translation/complex/arithmetic.rs create mode 100644 nemo/src/rule_model/translation/complex/function.rs create mode 100644 nemo/src/rule_model/translation/complex/infix.rs create mode 100644 nemo/src/rule_model/translation/complex/map.rs create mode 100644 nemo/src/rule_model/translation/complex/operation.rs create mode 100644 nemo/src/rule_model/translation/complex/tuple.rs create mode 100644 nemo/src/rule_model/translation/directive.rs create mode 100644 nemo/src/rule_model/translation/directive/base.rs create mode 100644 nemo/src/rule_model/translation/directive/declare.rs create mode 100644 nemo/src/rule_model/translation/directive/import_export.rs create mode 100644 nemo/src/rule_model/translation/directive/output.rs create mode 100644 nemo/src/rule_model/translation/directive/prefix.rs create mode 100644 nemo/src/rule_model/translation/directive/unknown.rs create mode 100644 nemo/src/rule_model/translation/rule.rs diff --git a/nemo-physical/src/datavalues/any_datavalue.rs b/nemo-physical/src/datavalues/any_datavalue.rs index 30d37e255..691790e1d 100644 --- a/nemo-physical/src/datavalues/any_datavalue.rs +++ b/nemo-physical/src/datavalues/any_datavalue.rs @@ -150,22 +150,24 @@ impl AnyDataValue { if let Some(value) = dict.id_to_datavalue(usize::try_from(id).unwrap()) { Ok(value) } else { - Err(DataValueCreationError::InternalError(Box::new( + Err(DataValueCreationError::InternalError( InternalDataValueCreationError::DictionaryIdNotFound( usize::try_from(id).unwrap(), - ), - ))) + ) + .to_string(), + )) } } StorageValueT::Id64(id) => { if let Some(value) = dict.id_to_datavalue(usize::try_from(id).unwrap()) { Ok(value) } else { - Err(DataValueCreationError::InternalError(Box::new( + Err(DataValueCreationError::InternalError( InternalDataValueCreationError::DictionaryIdNotFound( usize::try_from(id).unwrap(), - ), - ))) + ) + .to_string(), + )) } } StorageValueT::Int64(num) => Ok(AnyDataValue::new_integer_from_i64(num)), diff --git a/nemo-physical/src/datavalues/errors.rs b/nemo-physical/src/datavalues/errors.rs index dca9bf90b..5ed7cac6f 100644 --- a/nemo-physical/src/datavalues/errors.rs +++ b/nemo-physical/src/datavalues/errors.rs @@ -6,7 +6,7 @@ use thiserror::Error; /// Potential errors encountered when trying to construct [super::DataValue]s. #[allow(variant_size_differences)] -#[derive(Error, Debug)] +#[derive(Error, Debug, Clone)] pub enum DataValueCreationError { /// Error for floating point numbers that are not finite #[error("floating point number must represent a finite value (no infinity, no NaN)")] @@ -45,8 +45,8 @@ pub enum DataValueCreationError { }, /// Generic error for issues that should not arise when using the public API (and should maybe never arise /// if the crate works as intended) - #[error("internal error when trying to create a datavalue: {0}")] - InternalError(Box), + #[error("internal error when trying to create a datavalue {0}")] + InternalError(String), } impl PartialEq for DataValueCreationError { @@ -88,7 +88,7 @@ impl PartialEq for DataValueCreationError { /// Conceivable internal errors that we distinguish. These should not surface in /// normal operation. -#[derive(Error, Debug)] +#[derive(Error, Debug, Clone)] pub(crate) enum InternalDataValueCreationError { /// Error when retrieving a value from the dictionary #[error("could not recover DataValue from dictionary: id {0} not found")] diff --git a/nemo/src/parser/ast/directive/prefix.rs b/nemo/src/parser/ast/directive/prefix.rs index f4ccf2e26..b729cfa0f 100644 --- a/nemo/src/parser/ast/directive/prefix.rs +++ b/nemo/src/parser/ast/directive/prefix.rs @@ -28,6 +28,11 @@ impl<'a> Prefix<'a> { self.prefix.to_string() } + /// Return the [Token] containing the prefix + pub fn prefix_token(&self) -> &Token<'a> { + &self.prefix + } + /// Return the value of the prefix. pub fn value(&self) -> &Iri<'a> { &self.value diff --git a/nemo/src/parser/ast/directive/unknown.rs b/nemo/src/parser/ast/directive/unknown.rs index f8f58b3a3..43d07f702 100644 --- a/nemo/src/parser/ast/directive/unknown.rs +++ b/nemo/src/parser/ast/directive/unknown.rs @@ -37,6 +37,11 @@ impl<'a> UnknownDirective<'a> { self.name.to_string() } + /// Return the token containing the name of the directive. + pub fn name_token(&self) -> &Token<'a> { + &self.name + } + /// Return the content of the directive. pub fn content(&self) -> String { self.content.0.to_string() diff --git a/nemo/src/parser/ast/expression/basic/number.rs b/nemo/src/parser/ast/expression/basic/number.rs index fe668dc54..a01906808 100644 --- a/nemo/src/parser/ast/expression/basic/number.rs +++ b/nemo/src/parser/ast/expression/basic/number.rs @@ -21,7 +21,7 @@ use crate::parser::{ #[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] #[func(pub fn token(token: &TokenKind) -> Option)] #[func(pub fn print(&self) -> &'static str)] -enum NumberTypeMarker { +pub enum NumberTypeMarker { /// Marks a number as a 32-bit floating point number #[assoc(token = TokenKind::TypeMarkerFloat)] #[assoc(print = "f")] @@ -55,18 +55,94 @@ pub struct Number<'a> { span: ProgramSpan<'a>, /// Sign of the integer part - _integer_sign: NumberSign, + integer_sign: NumberSign, /// The integer part of the number - _integer: Token<'a>, + integer: Token<'a>, /// The fractional part of the number - _fractional: Option>, + fractional: Option>, /// Sign and exponent of the number - _exponent: Option<(NumberSign, Token<'a>)>, + exponent: Option<(NumberSign, Token<'a>)>, /// Type - _type_marker: Option, + type_marker: Option, +} + +/// Value of [Number] +#[derive(Debug)] +pub enum NumberValue { + /// Integer value + Integer(i64), + /// 32-bit floating point value + Float(f32), + /// 64-bit floating point value + Double(f64), + /// Value doesn't fit into the above types + Large(String), } impl<'a> Number<'a> { + /// Return whether the number contains a fractional part. + pub fn is_fractional(&self) -> bool { + self.fractional.is_some() + } + + /// Return whether the number contains an exponential part. + pub fn is_exponential(&self) -> bool { + self.exponent.is_some() + } + + /// Return the [NumberTypeMarker] of this number. + pub fn type_marker(&self) -> Option { + self.type_marker + } + + /// Recreate the number string without the type marker. + fn number_string(&self) -> String { + let integer = format!( + "{}{}", + self.integer_sign.print(), + self.integer.span().0.to_string() + ); + + let fractional = if let Some(fractional) = &self.fractional { + format!(".{}", fractional.span().0.to_string()) + } else { + String::default() + }; + + let exponent = if let Some((sign, exponent)) = &self.exponent { + format!("e{}{}", sign.print(), exponent.span().0.to_string()) + } else { + String::default() + }; + + format!("{}{}{}", integer, fractional, exponent) + } + + /// Return the value of this number, represented as a [NumberValue]. + pub fn value(&self) -> NumberValue { + let string = self.number_string(); + + if let Ok(integer) = str::parse::(&string) { + return NumberValue::Integer(integer); + } + + if let Some(NumberTypeMarker::Float) = self.type_marker { + if let Ok(float) = str::parse::(&string) { + return NumberValue::Float(float); + } + } + + if let Ok(double) = str::parse::(&string) { + return NumberValue::Double(double); + } + + if let Ok(float) = str::parse::(&string) { + return NumberValue::Float(float); + } + + NumberValue::Large(string) + } + /// Parse the sign of the number fn parse_sign(input: ParserInput<'a>) -> ParserResult<'a, NumberSign> { alt((Token::plus, Token::minus))(input).map(|(rest, sign)| { @@ -143,11 +219,11 @@ impl<'a> ProgramAST<'a> for Number<'a> { rest, Number { span: input_span.until_rest(&rest_span), - _integer_sign: integer_sign.unwrap_or_default(), - _integer: integer, - _fractional: fractional, - _exponent: exponent, - _type_marker: type_marker, + integer_sign: integer_sign.unwrap_or_default(), + integer, + fractional, + exponent, + type_marker, }, ) }, diff --git a/nemo/src/parser/ast/expression/complex/aggregation.rs b/nemo/src/parser/ast/expression/complex/aggregation.rs index 60ac63d56..c07c5b30b 100644 --- a/nemo/src/parser/ast/expression/complex/aggregation.rs +++ b/nemo/src/parser/ast/expression/complex/aggregation.rs @@ -42,6 +42,11 @@ impl<'a> Aggregation<'a> { &self.aggregate } + /// Return the tag that specifies the aggregate operation. + pub fn tag(&self) -> &AggregationTag<'a> { + &self.tag + } + /// Return the expressions specifying the distinct variables pub fn distinct(&self) -> impl Iterator> { self.distinct.iter().flat_map(|distinct| distinct.iter()) diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 3da33b2a4..5995a364f 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -166,6 +166,6 @@ mod test { assert!(result.comment.is_some()); assert_eq!(result.statements.len(), 2); - // assert_eq!(parser_input.state.errors.borrow().len(), 3); + assert_eq!(parser_input.state.errors.borrow().len(), 2); } } diff --git a/nemo/src/parser/ast/tag/aggregation.rs b/nemo/src/parser/ast/tag/aggregation.rs index ef64fd5a8..c19aa8271 100644 --- a/nemo/src/parser/ast/tag/aggregation.rs +++ b/nemo/src/parser/ast/tag/aggregation.rs @@ -31,6 +31,11 @@ impl<'a> AggregationTag<'a> { pub fn operation(&self) -> Option { self.kind } + + /// Return a string representation of the content of this tag. + pub fn content(&self) -> String { + self.span.0.to_string() + } } const CONTEXT: ParserContext = ParserContext::AggregationTag; diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 6fee3eddf..5eef16e6b 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -142,4 +142,20 @@ impl<'a> ProgramSpan<'a> { )) } } + + /// Return a [ProgramSpan] that points to the beginning. + pub fn beginning(&self) -> Self { + unsafe { + if self.0.is_empty() { + self.clone() + } else { + Self(LocatedSpan::new_from_raw_offset( + self.0.location_offset(), + self.0.location_line(), + &self.0[0..1], + (), + )) + } + } + } } diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index 156c2e007..4409ca88e 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -53,14 +53,24 @@ impl Atom { } } + /// Return the predicate of this atom. + pub fn predicate(&self) -> Tag { + self.predicate.clone() + } + /// Return an iterator over the subterms of this atom. pub fn subterms(&self) -> impl Iterator { self.terms.iter() } - /// Return an mutable iterator over the subterms of this atom. - pub fn subterms_mut(&mut self) -> impl Iterator { - self.terms.iter_mut() + /// Return the number of subterms in this atom. + pub fn len(&self) -> usize { + self.terms.len() + } + + /// Return whether this atom contains no subterms. + pub fn is_empty(&self) -> bool { + self.len() == 0 } } diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index ed6b24b3a..5e06a575b 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -4,7 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; -use super::{term::Term, ProgramComponent, Tag}; +use super::{atom::Atom, term::Term, ProgramComponent, Tag}; /// A (ground) fact #[derive(Debug, Clone, Eq)] @@ -45,6 +45,16 @@ impl Fact { } } +impl From for Fact { + fn from(value: Atom) -> Self { + Self { + origin: value.origin().clone(), + predicate: value.predicate(), + terms: value.subterms().cloned().collect(), + } + } +} + impl Display for Fact { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!("{}(", self.predicate))?; diff --git a/nemo/src/rule_model/components/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs index 7d9c7ce99..30cc78505 100644 --- a/nemo/src/rule_model/components/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -4,6 +4,7 @@ use std::{collections::HashMap, fmt::Display}; use enum_assoc::Assoc; +use strum_macros::EnumIter; use crate::rule_model::{ components::import_export::attributes::ImportExportAttribute, @@ -20,7 +21,7 @@ pub(crate) enum AttributeRequirement { } /// Supported file formats -#[derive(Assoc, Debug, Copy, Clone, Eq, PartialEq, Hash)] +#[derive(Assoc, EnumIter, Debug, Copy, Clone, Eq, PartialEq, Hash)] #[func(pub fn name(&self) -> &'static str)] #[func(pub fn extension(&self) -> &'static str)] #[func(pub fn attributes(&self) -> HashMap)] @@ -90,6 +91,15 @@ pub enum FileFormat { TriG, } +/// List of RDF [FileFormat]s +pub const FILE_FORMATS_RDF: &'static [FileFormat] = &[ + FileFormat::NQuads, + FileFormat::NTriples, + FileFormat::RDFXML, + FileFormat::TriG, + FileFormat::Turtle, +]; + impl Display for FileFormat { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.name()) diff --git a/nemo/src/rule_model/components/term.rs b/nemo/src/rule_model/components/term.rs index 19bbd5969..b7d18c835 100644 --- a/nemo/src/rule_model/components/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -15,6 +15,7 @@ pub mod tuple; use std::fmt::{Debug, Display}; +use aggregate::Aggregate; use function::FunctionTerm; use map::Map; use nemo_physical::datavalues::AnyDataValue; @@ -40,6 +41,8 @@ use super::{IterableVariables, ProgramComponent}; pub enum Term { /// Unstructured, primitive term Primitive(Primitive), + /// Aggregate + Aggregate(Aggregate), /// Abstract function over a list of terms FunctionTerm(FunctionTerm), /// Map of terms @@ -156,6 +159,12 @@ impl From for Term { } } +impl From for Term { + fn from(value: Aggregate) -> Self { + Self::Aggregate(value) + } +} + impl Display for Term { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -164,6 +173,7 @@ impl Display for Term { Term::Map(term) => write!(f, "{}", term), Term::Operation(term) => write!(f, "{}", term), Term::Tuple(term) => write!(f, "{}", term), + Term::Aggregate(term) => write!(f, "{}", term), } } } @@ -183,6 +193,7 @@ impl ProgramComponent for Term { Term::Map(map) => map.origin(), Term::Operation(operation) => operation.origin(), Term::Tuple(tuple) => tuple.origin(), + Term::Aggregate(aggregate) => aggregate.origin(), } } @@ -196,6 +207,7 @@ impl ProgramComponent for Term { Term::Map(map) => Term::Map(map.set_origin(origin)), Term::Operation(operation) => Term::Operation(operation.set_origin(origin)), Term::Tuple(tuple) => Term::Tuple(tuple.set_origin(origin)), + Term::Aggregate(aggregate) => Term::Aggregate(aggregate.set_origin(origin)), } } @@ -214,6 +226,7 @@ impl IterableVariables for Term { let mut iter_map = None; let mut iter_operation = None; let mut iter_tuple = None; + let mut iter_aggregate = None; match self { Term::Primitive(primitive) => iter_primitive = Some(primitive.variables()), @@ -221,6 +234,7 @@ impl IterableVariables for Term { Term::Map(map) => iter_map = Some(map.variables()), Term::Operation(operation) => iter_operation = Some(operation.variables()), Term::Tuple(tuple) => iter_tuple = Some(tuple.variables()), + Term::Aggregate(aggregate) => iter_aggregate = Some(aggregate.variables()), } Box::new( @@ -230,7 +244,8 @@ impl IterableVariables for Term { .chain(iter_function.into_iter().flatten()) .chain(iter_map.into_iter().flatten()) .chain(iter_operation.into_iter().flatten()) - .chain(iter_tuple.into_iter().flatten()), + .chain(iter_tuple.into_iter().flatten()) + .chain(iter_aggregate.into_iter().flatten()), ) } @@ -240,6 +255,7 @@ impl IterableVariables for Term { let mut iter_map = None; let mut iter_operation = None; let mut iter_tuple = None; + let mut iter_aggregate = None; match self { Term::Primitive(primitive) => iter_primitive = Some(primitive.variables_mut()), @@ -247,6 +263,7 @@ impl IterableVariables for Term { Term::Map(map) => iter_map = Some(map.variables_mut()), Term::Operation(operation) => iter_operation = Some(operation.variables_mut()), Term::Tuple(tuple) => iter_tuple = Some(tuple.variables_mut()), + Term::Aggregate(aggregate) => iter_aggregate = Some(aggregate.variables_mut()), } Box::new( @@ -256,7 +273,8 @@ impl IterableVariables for Term { .chain(iter_function.into_iter().flatten()) .chain(iter_map.into_iter().flatten()) .chain(iter_operation.into_iter().flatten()) - .chain(iter_tuple.into_iter().flatten()), + .chain(iter_tuple.into_iter().flatten()) + .chain(iter_aggregate.into_iter().flatten()), ) } } diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 2b44682fc..74c5fd07e 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -51,7 +51,7 @@ pub struct Aggregate { /// Type of aggregate operation kind: AggregateKind, /// Expression over which to aggregate - aggregate: Term, + aggregate: Box, /// Distinct variables distinct: Vec, } @@ -66,7 +66,7 @@ impl Aggregate { Self { origin: Origin::default(), kind, - aggregate, + aggregate: Box::new(aggregate), distinct: distinct.into_iter().collect(), } } @@ -102,6 +102,21 @@ impl Aggregate { ) -> Self { Self::new(AggregateKind::MaxNumber, aggregate, distinct) } + + /// Return a reference to aggregated term. + pub fn aggregate_term(&self) -> &Term { + &self.aggregate + } + + /// Return the kind of aggregate. + pub fn kind(&self) -> AggregateKind { + self.kind + } + + /// Return a iterator over the distinct variables + pub fn distinct(&self) -> impl Iterator { + self.distinct.iter() + } } impl Display for Aggregate { @@ -136,6 +151,20 @@ impl Hash for Aggregate { } } +impl PartialOrd for Aggregate { + fn partial_cmp(&self, other: &Self) -> Option { + match self.kind.partial_cmp(&other.kind) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + match self.aggregate.partial_cmp(&other.aggregate) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + self.distinct.partial_cmp(&other.distinct) + } +} + impl ProgramComponent for Aggregate { fn parse(_string: &str) -> Result where @@ -156,7 +185,7 @@ impl ProgramComponent for Aggregate { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, _builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index 00871002b..bbf36ead3 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -54,9 +54,14 @@ impl FunctionTerm { self.terms.iter() } - /// Return an mutable iterator over the subterms of this function terms. - pub fn subterms_mut(&mut self) -> impl Iterator { - self.terms.iter_mut() + /// Return the number of subterms contains in this function term. + pub fn len(&self) -> usize { + self.terms.len() + } + + /// Return whether this function terms contains no subterms. + pub fn is_empty(&self) -> bool { + self.len() == 0 } } diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index 3188ade7f..e06bfae59 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -1,4 +1,4 @@ -//! This module defines [Map] +//! This module defines [Map]. use std::{fmt::Display, hash::Hash}; @@ -44,6 +44,26 @@ impl Map { map: map.into_iter().collect(), } } + + /// Return the tag of this map. + pub fn tag(&self) -> Option { + self.tag.clone() + } + + /// Return an iterator over the key value pairs in this map. + pub fn key_value(&self) -> impl Iterator { + self.map.iter() + } + + /// Return the number of entries in this map. + pub fn len(&self) -> usize { + self.map.len() + } + + /// Return whether this map is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } } impl Display for Map { @@ -54,7 +74,7 @@ impl Display for Map { ))?; for (term_index, (key, value)) in self.map.iter().enumerate() { - f.write_fmt(format_args!("{} = {}", key, value))?; + f.write_fmt(format_args!("{}: {}", key, value))?; if term_index < self.map.len() - 1 { f.write_str(", ")?; diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index e371732d9..fd0fa38de 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -2,6 +2,7 @@ //! while working with nemo programs. pub mod hint; +pub mod info; pub mod translation_error; pub mod validation_error; @@ -99,16 +100,16 @@ where } /// Add a new label to the error. - pub fn add_label( + pub fn add_label( &mut self, kind: ComplexErrorLabelKind, reference: Reference, - message: String, + message: Message, ) -> &mut Self { self.labels.push(ComplexErrorLabel { kind: kind, reference, - message, + message: message.to_string(), }); self @@ -135,7 +136,7 @@ where let color = match label.kind { ComplexErrorLabelKind::Error => Color::Red, ComplexErrorLabelKind::Warning => Color::Yellow, - ComplexErrorLabelKind::Information => Color::Blue, + ComplexErrorLabelKind::Information => Color::BrightBlue, }; report = report.with_label( @@ -228,11 +229,11 @@ impl TranslationError { } /// Add a new label to the error. - pub fn add_label( + pub fn add_label( mut self, kind: ComplexErrorLabelKind, range: CharacterRange, - message: String, + message: Message, ) -> Self { self.info.add_label(kind, range, message); @@ -285,6 +286,16 @@ impl ProgramError { } } + /// Return the note attached to this error, if it exists. + pub fn note(&self) -> Option { + match self { + ProgramError::TranslationError(error) => error.kind.note(), + ProgramError::ValidationError(error) => error.kind.note(), + } + .map(|string| string.to_string()) + } + + /// Append the information of this error to a [ReportBuilder]. pub fn report<'a, Translation>( &'a self, mut report: ReportBuilder<'a, (String, Range)>, @@ -298,6 +309,10 @@ impl ProgramError { .with_code(self.error_code()) .with_message(self.message()); + if let Some(note) = self.note() { + report = report.with_note(note); + } + match self { ProgramError::TranslationError(error) => { error diff --git a/nemo/src/rule_model/error/hint.rs b/nemo/src/rule_model/error/hint.rs index dc3e69f8b..c54fa1807 100644 --- a/nemo/src/rule_model/error/hint.rs +++ b/nemo/src/rule_model/error/hint.rs @@ -1,6 +1,8 @@ //! This module defines [Hint] #![allow(missing_docs)] +pub(crate) mod similar; + use enum_assoc::Assoc; /// Hints for error messages @@ -12,3 +14,9 @@ pub enum Hint { #[assoc(message = format!("similar {} exists: `{}`", _kind, _name))] SimilarExists { kind: String, name: String }, } + +impl std::fmt::Display for Hint { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.message().fmt(f) + } +} diff --git a/nemo/src/rule_model/error/hint/similar.rs b/nemo/src/rule_model/error/hint/similar.rs new file mode 100644 index 000000000..53be7176f --- /dev/null +++ b/nemo/src/rule_model/error/hint/similar.rs @@ -0,0 +1,10 @@ +//! This module defines a helper function for computin g + +use super::Hint; + +impl Hint { + /// Checks whether a similar string exist in a collection of source strings + pub fn similar() -> Option { + todo!() + } +} diff --git a/nemo/src/rule_model/error/info.rs b/nemo/src/rule_model/error/info.rs new file mode 100644 index 000000000..b6aea309b --- /dev/null +++ b/nemo/src/rule_model/error/info.rs @@ -0,0 +1,19 @@ +//! This module defines [Info]. +#![allow(missing_docs)] + +use enum_assoc::Assoc; + +/// Infos for error messages +#[derive(Assoc, Debug, Copy, Clone)] +#[func(pub fn message(&self) -> String)] +pub enum Info { + /// First definition occurred somewhere + #[assoc(message = format!("first definition occurred here"))] + FirstDefinition, +} + +impl std::fmt::Display for Info { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.message().fmt(f) + } +} diff --git a/nemo/src/rule_model/error/translation_error.rs b/nemo/src/rule_model/error/translation_error.rs index f7e6db7b2..0f611ab80 100644 --- a/nemo/src/rule_model/error/translation_error.rs +++ b/nemo/src/rule_model/error/translation_error.rs @@ -2,6 +2,7 @@ #![allow(missing_docs)] use enum_assoc::Assoc; +use nemo_physical::datavalues::DataValueCreationError; use thiserror::Error; /// Types of errors that occur @@ -11,24 +12,92 @@ use thiserror::Error; #[func(pub fn note(&self) -> Option<&'static str>)] #[func(pub fn code(&self) -> usize)] pub enum TranslationErrorKind { - /// A negated atom was used in the head of a rule + /// A non-atom was used in the head of a rule #[error(r#"{0} used in rule head"#)] #[assoc(note = "rule head must only use atoms")] #[assoc(code = 101)] HeadNonAtom(String), + /// A non-literal was used in the body of a rule + #[error(r#"{0} used in rule body"#)] + #[assoc(note = "rule body must only use literals or boolean operations")] + #[assoc(code = 102)] + BodyNonLiteral(String), /// An undefined prefix was used #[error(r#"unknown prefix: `{0}`"#)] #[assoc(note = "prefix must be defined using @prefix")] - #[assoc(code = 102)] - UnknownPrefix(String), + #[assoc(code = 103)] + PrefixUnknown(String), /// Unnamed non-anonymous variable #[error(r#"unnamed variable"#)] #[assoc(note = "variables starting with ? or ! must have a name")] - #[assoc(code = 103)] + #[assoc(code = 104)] UnnamedVariable, /// Named non-anonymous variable #[error(r#"anonymous variable with name: ``"#)] #[assoc(note = "anonymous variables cannot have a name")] - #[assoc(code = 104)] + #[assoc(code = 105)] NamedAnonymous(String), + /// Negation of a non-atom + #[error(r#"found negated {0}"#)] + #[assoc(note = "negation can only be applied to atoms")] + #[assoc(code = 106)] + NegatedNonAtom(String), + /// Error in creating an any datavalue term + #[error(transparent)] + #[assoc(code = 107)] + DataValueCreationError(DataValueCreationError), + /// Unknown aggregation + #[error(r#"unknown aggregation: `{0}`"#)] + #[assoc(note = "supported aggregates are sum, count, min, and max")] + #[assoc(code = 108)] + AggregationUnknown(String), + /// Distinct non-variable + #[error(r#"expected variable found {0}"#)] + #[assoc(note = "the aggregation term is followed by a list of distinct variables")] + #[assoc(code = 109)] + AggregationDistinctNonVariable(String), + /// Infix expression as inner term + #[error(r#"comparison not allowed within an atom"#)] + #[assoc(code = 110)] + InnerExpressionInfix, + /// Negation as inner term + #[error(r#"negation not allowed within an atom"#)] + #[assoc(code = 111)] + InnerExpressionNegation, + /// Negation as inner term + #[error(r#"unknown directive: `{0}`"#)] + #[assoc(code = 112)] + DirectiveUnknown(String), + /// Base was redefined + #[error(r#"base has been redefined"#)] + #[assoc(note = "program may only contain one @base statement")] + #[assoc(code = 113)] + BaseRedefinition, + /// Prefix was redefined + #[error(r#"prefix has been redefined"#)] + #[assoc(code = 114)] + PrefixRedefinition, + /// Unknown file format + #[error(r#"unknown file format: `{0}`"#)] + #[assoc(code = 115)] + FileFormatUnknown(String), + /// Missing file format + #[error("missing file format")] + #[assoc(code = 116)] + FileFormatMissing, + /// RDF unspecified missing extension + #[error("no file extension specified")] + #[assoc(note = "rdf imports/exports must have file extension nt, nq, ttl, trig, or rdf.")] + #[assoc(code = 117)] + RdfUnspecifiedMissingExtension, + /// RDF unspecified missing extension + #[error("`{0}` is not an rdf format")] + #[assoc(note = "rdf imports/exports must have file extension nt, nq, ttl, trig, or rdf.")] + #[assoc(code = 118)] + RdfUnspecifiedUnknownExtension(String), + + /// Unsupported: Declare statements + #[error(r#"declare statements are currently unsupported"#)] + #[assoc(code = 899)] + UnsupportedDeclare, } diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index f23c269a9..41da6ddda 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -1,25 +1,10 @@ //! This module defines [Program]. -use nemo_physical::datavalues::AnyDataValue; - -use crate::{io::parser::ast, rule_model::components::term::tuple::Tuple}; - -use super::{ - components::{ - atom::Atom, - fact::Fact, - import_export::{ExportDirective, ImportDirective}, - literal::Literal, - output::Output, - rule::{Rule, RuleBuilder}, - term::{ - function::FunctionTerm, - operation::{operation_kind::OperationKind, Operation}, - Term, - }, - ProgramComponent, - }, - origin::Origin, +use super::components::{ + fact::Fact, + import_export::{ExportDirective, ImportDirective}, + output::Output, + rule::Rule, }; /// Representation of a nemo program @@ -40,272 +25,38 @@ pub struct Program { /// Builder for [Program]s #[derive(Debug, Default)] pub struct ProgramBuilder { - /// Rules - rules: Vec, + /// The constructed program + program: Program, } impl ProgramBuilder { /// Finish building and return a [Program]. pub fn finalize(self) -> Program { - Program { - imports: Vec::default(), - exports: Vec::default(), - rules: self.rules, - facts: Vec::default(), - outputs: Vec::default(), - } + self.program } /// Add a [Rule]. pub fn add_rule(&mut self, rule: Rule) { - self.rules.push(rule) + self.program.rules.push(rule) } -} - -// impl Program { -// /// Build a [Program] from an [ast::program::Program]. -// pub fn from_ast(ast_program: ast::program::Program) -> Self { -// let mut program = Program::default(); - -// for (_statement_index, statement) in ast_program.statements.iter().enumerate() { -// match statement { -// ast::statement::Statement::Directive(directive) => { -// program.ast_build_directive(directive); -// } -// ast::statement::Statement::Fact { -// span: _span, -// doc_comment: _doc_comment, -// fact: _atom, -// dot: _dot, -// } => todo!(), -// ast::statement::Statement::Rule { head, body, .. } => { -// program.ast_build_rule(head, body); -// } -// ast::statement::Statement::Comment(_) => todo!(), -// ast::statement::Statement::Error(_) => todo!(), -// } -// } - -// program -// } - -// fn ast_build_rule( -// &mut self, -// head: &ast::List, -// body: &ast::List, -// ) { -// let origin = Origin::External(self.rules.len()); - -// let mut rule_builder = RuleBuilder::default().origin(origin); - -// // TODO: Implement a normal iterator to avoid cloning -// for (head_index, head_atom) in head.clone().into_iter().enumerate() { -// let origin = Origin::External(head_index); -// if let Literal::Positive(atom) = Self::ast_build_literal(origin, &head_atom) { -// rule_builder.add_head_atom_mut(atom); -// } else { -// unreachable!("head must only contain positive atoms") -// } -// } - -// // TODO: Implement a normal iterator to avoid cloning -// for (body_index, body_atom) in body.clone().into_iter().enumerate() { -// let origin = Origin::External(body_index); -// rule_builder.add_body_literal_mut(Self::ast_build_literal(origin, &body_atom)); -// } - -// self.rules.push(rule_builder.finalize()); -// } - -// fn ast_build_literal(origin: Origin, atom: &ast::atom::Atom) -> Literal { -// match atom { -// ast::atom::Atom::Positive(positive_atom) => { -// Literal::Positive(Self::ast_build_atom(origin, positive_atom)) -// } -// ast::atom::Atom::Negative { -// atom: negative_atom, -// .. -// } => Literal::Negative(Self::ast_build_atom(origin, negative_atom)), -// ast::atom::Atom::InfixAtom { -// lhs, -// operation, -// rhs, -// .. -// } => { -// let left = Self::ast_build_inner_term(Origin::External(0), lhs); -// let right = Self::ast_build_inner_term(Origin::External(1), rhs); - -// Literal::Operation( -// Operation::new_from_name(&operation.to_string(), vec![left, right]) -// .expect("unkown infix operation"), -// ) -// } -// ast::atom::Atom::Map(_) => { -// // Return unsupported error -// todo!() -// } -// } -// } - -// fn ast_build_atom(origin: Origin, atom: &ast::named_tuple::NamedTuple) -> Atom { -// let predicate_name = atom.identifier.to_string(); -// let subterms = match &atom.tuple.terms { -// Some(terms) => terms.to_item_vec(), -// None => vec![], -// }; -// let mut translated_subterms = Vec::new(); - -// for (term_index, subterm) in subterms.into_iter().enumerate() { -// let origin = Origin::External(term_index); -// translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); -// } - -// Atom::new(&predicate_name, translated_subterms).set_origin(origin) -// } - -// fn ast_build_inner_term(origin: Origin, term: &ast::term::Term) -> Term { -// match term { -// ast::term::Term::Primitive(primitive) => Self::ast_build_primitive(origin, primitive), -// ast::term::Term::UniversalVariable(name) => Term::universal_variable(&name.to_string()), -// ast::term::Term::ExistentialVariable(name) => { -// Term::existential_variable(&name.to_string()) -// } -// ast::term::Term::Binary { -// lhs, -// operation, -// rhs, -// .. -// } => { -// let left = Self::ast_build_inner_term(Origin::External(0), lhs); -// let right = Self::ast_build_inner_term(Origin::External(1), rhs); - -// Term::Operation( -// Operation::new_from_name(&operation.to_string(), vec![left, right]) -// .expect("unrecognized binary operation"), -// ) -// } -// ast::term::Term::Aggregation { -// operation: _, -// terms: _, -// .. -// } => { -// todo!() -// } -// ast::term::Term::Tuple(tuple) => Self::ast_build_inner_tuple(origin, tuple), -// ast::term::Term::NamedTuple(named_tuple) => { -// Self::ast_build_inner_named_tuple(origin, named_tuple) -// } -// ast::term::Term::Map(_) => todo!(), -// ast::term::Term::Blank(_) => todo!(), -// } -// .set_origin(origin) -// } - -// fn ast_build_primitive(origin: Origin, primitive: &ast::term::Primitive) -> Term { -// match primitive { -// ast::term::Primitive::Constant(value) => { -// Term::ground(AnyDataValue::new_iri(value.to_string())) -// } -// ast::term::Primitive::PrefixedConstant { -// span: _, -// prefix: _, -// colon: _, -// constant: _, -// } => todo!(), -// ast::term::Primitive::Number { -// span: _, -// sign: _, -// before: _, -// dot: _, -// after: _, -// exponent: _, -// } => todo!(), -// ast::term::Primitive::String(string) => { -// Term::ground(AnyDataValue::new_plain_string(string.to_string())) -// } -// ast::term::Primitive::Iri(iri) => Term::ground(AnyDataValue::new_iri(iri.to_string())), -// ast::term::Primitive::RdfLiteral { string, iri, .. } => { -// Term::ground(AnyDataValue::new_other(string.to_string(), iri.to_string())) -// } -// } -// .set_origin(origin) -// } - -// fn ast_build_inner_tuple(_origin: Origin, tuple: &ast::tuple::Tuple) -> Term { -// let subterms = match &tuple.terms { -// Some(terms) => terms.to_item_vec(), -// None => vec![], -// }; - -// let mut translated_subterms = Vec::new(); - -// for (term_index, subterm) in subterms.into_iter().enumerate() { -// let origin = Origin::External(term_index); -// translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); -// } - -// Term::Tuple(Tuple::new(translated_subterms)) -// } - -// fn ast_build_inner_named_tuple( -// _origin: Origin, -// named_tuple: &ast::named_tuple::NamedTuple, -// ) -> Term { -// let subterms = match &named_tuple.tuple.terms { -// Some(terms) => terms.to_item_vec(), -// None => vec![], -// }; - -// let mut translated_subterms = Vec::new(); + /// Add a [Fact]. + pub fn add_fact(&mut self, fact: Fact) { + self.program.facts.push(fact); + } -// for (term_index, subterm) in subterms.into_iter().enumerate() { -// let origin = Origin::External(term_index); -// translated_subterms.push(Self::ast_build_inner_term(origin, &subterm)); -// } + /// Add a [ImportDirective]. + pub fn add_import(&mut self, import: ImportDirective) { + self.program.imports.push(import); + } -// let name = &named_tuple.identifier.to_string(); -// match OperationKind::from_name(name) { -// Some(kind) => Term::Operation(Operation::new(kind, translated_subterms)), -// None => Term::FunctionTerm(FunctionTerm::new(name, translated_subterms)), -// } -// } + /// Add a [ExportDirective]. + pub fn add_export(&mut self, export: ExportDirective) { + self.program.exports.push(export); + } -// fn ast_build_directive(&mut self, directive: &ast::directive::Directive) { -// match directive { -// ast::directive::Directive::Base { base_iri: _, .. } => { -// // self.base = Some(Base::new(base_iri.to_string())); -// // TODO: Set origin -// } -// ast::directive::Directive::Prefix { -// span: _, -// doc_comment: _, -// prefix: _, -// prefix_iri: _, -// dot: _, -// } => todo!(), -// ast::directive::Directive::Import { -// span: _, -// doc_comment: _, -// predicate: _, -// arrow: _, -// map: _, -// dot: _, -// } => todo!(), -// ast::directive::Directive::Export { -// span: _, -// doc_comment: _, -// predicate: _, -// arrow: _, -// map: _, -// dot: _, -// } => todo!(), -// ast::directive::Directive::Output { -// span: _, -// doc_comment: _, -// predicates: _, -// dot: _, -// } => todo!(), -// } -// } -// } + /// Add a [Output]. + pub fn add_output(&mut self, output: Output) { + self.program.outputs.push(output); + } +} diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index 51a75e040..378ff162d 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -1,18 +1,21 @@ //! This module defines [ASTProgramTranslation]. +pub(crate) mod basic; +pub(crate) mod complex; +pub(crate) mod directive; +pub(crate) mod rule; + use std::{collections::HashMap, ops::Range}; -use ariadne::{Color, Label, Report, ReportKind, Source}; +use ariadne::{Report, ReportKind, Source}; use crate::{ parser::ast::{self, ProgramAST}, - rule_model::{ - components::rule::RuleBuilder, error::hint::Hint, origin::Origin, program::ProgramBuilder, - }, + rule_model::{origin::Origin, program::ProgramBuilder}, }; use super::{ - components::{atom::Atom, literal::Literal, rule::Rule, term::Term, ProgramComponent}, + components::fact::Fact, error::{ translation_error::TranslationErrorKind, ProgramError, TranslationError, ValidationErrorBuilder, @@ -33,11 +36,13 @@ pub struct ASTProgramTranslation<'a> { origin_map: HashMap>, /// Prefix mapping - prefix_mapping: HashMap, + prefix_mapping: HashMap)>, /// Base - base: Option, + base: Option<(String, &'a ast::directive::base::Base<'a>)>, - /// Builder for [ValidationError]s + /// Builder for the [Program]s + program_builder: ProgramBuilder, + /// Builder for validation errors validation_error_builder: ValidationErrorBuilder, /// Errors @@ -55,6 +60,7 @@ impl<'a> ASTProgramTranslation<'a> { base: None, validation_error_builder: ValidationErrorBuilder::default(), errors: Vec::default(), + program_builder: ProgramBuilder::default(), } } @@ -130,18 +136,33 @@ impl<'a> ASTProgramTranslation<'a> { mut self, ast: &'a ast::program::Program<'a>, ) -> Result> { - let mut program_builder = ProgramBuilder::default(); + // First, handle all directives + for statement in ast.statements() { + if let ast::statement::StatementKind::Directive(directive) = statement.kind() { + if let Err(error) = self.handle_define_directive(directive) { + self.errors.push(ProgramError::TranslationError(error)); + } + } + } + // Now handle facts and rules for statement in ast.statements() { match statement.kind() { - ast::statement::StatementKind::Fact(_) => todo!(), + ast::statement::StatementKind::Fact(fact) => match self.build_head_atom(fact) { + Ok(atom) => self.program_builder.add_fact(Fact::from(atom)), + Err(error) => self.errors.push(ProgramError::TranslationError(error)), + }, ast::statement::StatementKind::Rule(rule) => match self.build_rule(rule) { - Ok(new_rule) => program_builder.add_rule(new_rule), + Ok(new_rule) => self.program_builder.add_rule(new_rule), Err(translation_error) => self .errors .push(ProgramError::TranslationError(translation_error)), }, - ast::statement::StatementKind::Directive(_) => todo!(), + ast::statement::StatementKind::Directive(directive) => { + if let Err(error) = self.handle_use_directive(directive) { + self.errors.push(ProgramError::TranslationError(error)); + } + } } } @@ -153,7 +174,7 @@ impl<'a> ASTProgramTranslation<'a> { ); if self.errors.is_empty() { - Ok(program_builder.finalize()) + Ok(self.program_builder.finalize()) } else { Err(ProgramErrorReport { input: self.input, @@ -164,123 +185,8 @@ impl<'a> ASTProgramTranslation<'a> { } } - fn build_rule(&mut self, rule: &'a ast::rule::Rule<'a>) -> Result { - let mut rule_builder = RuleBuilder::default().origin(self.register_node(rule)); - - for expression in rule.head() { - rule_builder.add_head_atom_mut(self.build_head_atom(expression)?); - } - - for expression in rule.body() { - rule_builder.add_body_literal_mut(self.build_body_literal(expression)?); - } - - let rule = rule_builder.finalize(); - - let _ = rule.validate(&mut self.validation_error_builder); - Ok(rule) - } - - fn build_body_literal( - &mut self, - head: &'a ast::expression::Expression<'a>, - ) -> Result { - let result = if let ast::expression::Expression::Atom(atom) = head { - let mut subterms = Vec::new(); - for expression in atom.expressions() { - subterms.push(self.build_inner_term(expression)?); - } - - Literal::Positive( - Atom::new(&self.resolve_tag(atom.tag())?, subterms) - .set_origin(self.register_node(atom)), - ) - } else { - todo!() - } - .set_origin(self.register_node(head)); - - Ok(result) - } - - fn build_head_atom( - &mut self, - head: &'a ast::expression::Expression<'a>, - ) -> Result { - let result = if let ast::expression::Expression::Atom(atom) = head { - let mut subterms = Vec::new(); - for expression in atom.expressions() { - subterms.push(self.build_inner_term(expression)?); - } - - Atom::new(&self.resolve_tag(atom.tag())?, subterms).set_origin(self.register_node(atom)) - } else { - return Err(TranslationError::new( - head.span(), - TranslationErrorKind::HeadNonAtom(head.context_type().name().to_string()), - )); - }; - - Ok(result) - } - - fn build_inner_term( - &mut self, - expression: &'a ast::expression::Expression, - ) -> Result { - Ok(match expression { - ast::expression::Expression::Arithmetic(_) => todo!(), - ast::expression::Expression::Atom(atom) => todo!(), - ast::expression::Expression::Blank(blank) => todo!(), - ast::expression::Expression::Boolean(boolean) => todo!(), - ast::expression::Expression::Constant(constant) => todo!(), - ast::expression::Expression::Number(number) => todo!(), - ast::expression::Expression::RdfLiteral(rdf_literal) => todo!(), - ast::expression::Expression::String(string) => todo!(), - ast::expression::Expression::Tuple(tuple) => todo!(), - ast::expression::Expression::Variable(variable) => match variable.kind() { - ast::expression::basic::variable::VariableType::Universal => { - if let Some(variable_name) = variable.name() { - Term::universal_variable(&variable_name) - .set_origin(self.register_node(variable)) - } else { - return Err(TranslationError::new( - variable.span(), - TranslationErrorKind::UnnamedVariable, - ) - .add_hint(Hint::AnonymousVariables)); - } - } - ast::expression::basic::variable::VariableType::Existential => { - if let Some(variable_name) = variable.name() { - Term::existential_variable(&variable_name) - .set_origin(self.register_node(variable)) - } else { - return Err(TranslationError::new( - variable.span(), - TranslationErrorKind::UnnamedVariable, - )); - } - } - ast::expression::basic::variable::VariableType::Anonymous => { - if variable.name().is_none() { - Term::anonymous_variable().set_origin(self.register_node(variable)) - } else { - return Err(TranslationError::new( - variable.span(), - TranslationErrorKind::NamedAnonymous(variable.span().0.to_string()), - )); - } - } - }, - ast::expression::Expression::Aggregation(_) => todo!(), - ast::expression::Expression::Infix(_) => todo!(), - ast::expression::Expression::Map(_) => todo!(), - ast::expression::Expression::Negation(_) => todo!(), - ast::expression::Expression::Operation(_) => todo!(), - }) - } - + /// Recreate the name from a [ast::tag::structure::StructureTag] + /// by resolving prefixes or bases. fn resolve_tag( &self, tag: &'a ast::tag::structure::StructureTag<'a>, @@ -289,19 +195,19 @@ impl<'a> ASTProgramTranslation<'a> { ast::tag::structure::StructureTagKind::Plain(token) => { let token_string = token.to_string(); - if let Some(base) = &self.base { + if let Some((base, _)) = &self.base { format!("{base}{token_string}") } else { token_string } } ast::tag::structure::StructureTagKind::Prefixed { prefix, tag } => { - if let Some(expanded_prefix) = self.prefix_mapping.get(&prefix.to_string()) { + if let Some((expanded_prefix, _)) = self.prefix_mapping.get(&prefix.to_string()) { format!("{expanded_prefix}{}", tag.to_string()) } else { return Err(TranslationError::new( prefix.span(), - TranslationErrorKind::UnknownPrefix(prefix.to_string()), + TranslationErrorKind::PrefixUnknown(prefix.to_string()), )); } } diff --git a/nemo/src/rule_model/translation/basic.rs b/nemo/src/rule_model/translation/basic.rs new file mode 100644 index 000000000..71a37f688 --- /dev/null +++ b/nemo/src/rule_model/translation/basic.rs @@ -0,0 +1,9 @@ +//! This module contains functions for translating basic AST expressions. + +pub(crate) mod blank; +pub(crate) mod boolean; +pub(crate) mod constant; +pub(crate) mod number; +pub(crate) mod rdf; +pub(crate) mod string; +pub(crate) mod variable; diff --git a/nemo/src/rule_model/translation/basic/blank.rs b/nemo/src/rule_model/translation/basic/blank.rs new file mode 100644 index 000000000..dc423c657 --- /dev/null +++ b/nemo/src/rule_model/translation/basic/blank.rs @@ -0,0 +1,20 @@ +//! This module contains a function to create a blank term +//! from the corresponding ast node. + +use nemo_physical::datavalues::AnyDataValue; + +use crate::parser::ast; + +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a boolean term from the corresponding AST node. + pub(crate) fn build_blank( + &mut self, + blank: &'a ast::expression::basic::blank::Blank, + ) -> Result { + let blank_string = format!("{}/{}", self.input_label, blank.name()); + + Ok(AnyDataValue::new_iri(blank_string)) + } +} diff --git a/nemo/src/rule_model/translation/basic/boolean.rs b/nemo/src/rule_model/translation/basic/boolean.rs new file mode 100644 index 000000000..60203aa6f --- /dev/null +++ b/nemo/src/rule_model/translation/basic/boolean.rs @@ -0,0 +1,23 @@ +//! This module contains a function to create a boolean term +//! from the corresponding ast node. + +use nemo_physical::datavalues::AnyDataValue; + +use crate::parser::ast; + +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a boolean term from the corresponding AST node. + pub(crate) fn build_boolean( + &mut self, + boolean: &'a ast::expression::basic::boolean::Boolean, + ) -> Result { + let truth = match boolean.value() { + ast::expression::basic::boolean::BooleanValue::False => false, + ast::expression::basic::boolean::BooleanValue::True => true, + }; + + Ok(AnyDataValue::new_boolean(truth)) + } +} diff --git a/nemo/src/rule_model/translation/basic/constant.rs b/nemo/src/rule_model/translation/basic/constant.rs new file mode 100644 index 000000000..1f56edcaf --- /dev/null +++ b/nemo/src/rule_model/translation/basic/constant.rs @@ -0,0 +1,20 @@ +//! This module contains a function to create an iri constant +//! from the corresponding ast node. + +use nemo_physical::datavalues::AnyDataValue; + +use crate::parser::ast; + +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a constant term from the corresponding AST node. + pub(crate) fn build_constant( + &mut self, + constant: &'a ast::expression::basic::constant::Constant, + ) -> Result { + let name = constant.name(); + + Ok(AnyDataValue::new_iri(name)) + } +} diff --git a/nemo/src/rule_model/translation/basic/number.rs b/nemo/src/rule_model/translation/basic/number.rs new file mode 100644 index 000000000..d0cc9ee5e --- /dev/null +++ b/nemo/src/rule_model/translation/basic/number.rs @@ -0,0 +1,51 @@ +//! This module contains a function to create a number term +//! from the corresponding ast node. + +use nemo_physical::datavalues::AnyDataValue; + +use crate::parser::ast; + +use crate::parser::ast::expression::basic::number::NumberTypeMarker; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a number term from the corresponding AST node. + pub(crate) fn build_number( + &mut self, + number: &'a ast::expression::basic::number::Number, + ) -> Result { + Ok(match number.value() { + ast::expression::basic::number::NumberValue::Integer(integer) => { + AnyDataValue::new_integer_from_i64(integer) + } + ast::expression::basic::number::NumberValue::Float(float) => { + AnyDataValue::new_float_from_f32(float).expect("NaN and infinity are not parsed") + } + ast::expression::basic::number::NumberValue::Double(double) => { + AnyDataValue::new_double_from_f64(double).expect("NaN and infinity are not parsed") + } + ast::expression::basic::number::NumberValue::Large(large) => { + let datatype_iri = if number.is_exponential() { + match number.type_marker() { + Some(NumberTypeMarker::Float) => "xsd:float", + Some(NumberTypeMarker::Double) | None => "xsd:double", + } + } else { + match number.type_marker() { + Some(NumberTypeMarker::Float) => "xsd:float", + Some(NumberTypeMarker::Double) => "xsd:double", + None => { + if number.is_fractional() { + "xsd:decimal" + } else { + "xsd:integer" + } + } + } + }; + + AnyDataValue::new_other(large, String::from(datatype_iri)) + } + }) + } +} diff --git a/nemo/src/rule_model/translation/basic/rdf.rs b/nemo/src/rule_model/translation/basic/rdf.rs new file mode 100644 index 000000000..54ad4fe79 --- /dev/null +++ b/nemo/src/rule_model/translation/basic/rdf.rs @@ -0,0 +1,25 @@ +//! This module contains a function that converts an rdf literal AST node +//! into its corresponding logical representation. + +use nemo_physical::datavalues::AnyDataValue; + +use crate::parser::ast::{self, ProgramAST}; + +use crate::rule_model::error::translation_error::TranslationErrorKind; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a term the corresponding rdf AST node. + pub(crate) fn build_rdf( + &mut self, + rdf: &'a ast::expression::basic::rdf_literal::RdfLiteral, + ) -> Result { + match AnyDataValue::new_from_typed_literal(rdf.content(), rdf.tag()) { + Ok(data_value) => Ok(data_value), + Err(error) => Err(TranslationError::new( + rdf.span(), + TranslationErrorKind::DataValueCreationError(error), + )), + } + } +} diff --git a/nemo/src/rule_model/translation/basic/string.rs b/nemo/src/rule_model/translation/basic/string.rs new file mode 100644 index 000000000..a9c1a1830 --- /dev/null +++ b/nemo/src/rule_model/translation/basic/string.rs @@ -0,0 +1,24 @@ +//! This module contains a function to create a string term +//! from the corresponding ast node. + +use nemo_physical::datavalues::AnyDataValue; + +use crate::parser::ast; + +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a string term from the corresponding AST node. + pub(crate) fn build_string( + &mut self, + string: &'a ast::expression::basic::string::StringLiteral, + ) -> Result { + let value = if let Some(language_tag) = string.language_tag() { + AnyDataValue::new_language_tagged_string(string.content(), language_tag) + } else { + AnyDataValue::new_plain_string(string.content()) + }; + + Ok(value) + } +} diff --git a/nemo/src/rule_model/translation/basic/variable.rs b/nemo/src/rule_model/translation/basic/variable.rs new file mode 100644 index 000000000..cbf84bce8 --- /dev/null +++ b/nemo/src/rule_model/translation/basic/variable.rs @@ -0,0 +1,51 @@ +//! This module contains a function to obtain a variable from the corresponding AST node. + +use crate::parser::ast::{self, ProgramAST}; + +use crate::rule_model::components::term::primitive::variable::Variable; +use crate::rule_model::components::ProgramComponent; +use crate::rule_model::error::hint::Hint; +use crate::rule_model::error::translation_error::TranslationErrorKind; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a variable term from the corresponding AST node. + pub(crate) fn build_variable( + &mut self, + variable: &'a ast::expression::basic::variable::Variable, + ) -> Result { + Ok(match variable.kind() { + ast::expression::basic::variable::VariableType::Universal => { + if let Some(variable_name) = variable.name() { + Variable::universal(&variable_name).set_origin(self.register_node(variable)) + } else { + return Err(TranslationError::new( + variable.span(), + TranslationErrorKind::UnnamedVariable, + ) + .add_hint(Hint::AnonymousVariables)); + } + } + ast::expression::basic::variable::VariableType::Existential => { + if let Some(variable_name) = variable.name() { + Variable::existential(&variable_name).set_origin(self.register_node(variable)) + } else { + return Err(TranslationError::new( + variable.span(), + TranslationErrorKind::UnnamedVariable, + )); + } + } + ast::expression::basic::variable::VariableType::Anonymous => { + if variable.name().is_none() { + Variable::anonymous().set_origin(self.register_node(variable)) + } else { + return Err(TranslationError::new( + variable.span(), + TranslationErrorKind::NamedAnonymous(variable.span().0.to_string()), + )); + } + } + }) + } +} diff --git a/nemo/src/rule_model/translation/complex.rs b/nemo/src/rule_model/translation/complex.rs new file mode 100644 index 000000000..493478289 --- /dev/null +++ b/nemo/src/rule_model/translation/complex.rs @@ -0,0 +1,9 @@ +//! This module contains functions for translating complex AST expressions. + +pub(crate) mod aggregation; +pub(crate) mod arithmetic; +pub(crate) mod function; +pub(crate) mod infix; +pub(crate) mod map; +pub(crate) mod operation; +pub(crate) mod tuple; diff --git a/nemo/src/rule_model/translation/complex/aggregation.rs b/nemo/src/rule_model/translation/complex/aggregation.rs new file mode 100644 index 000000000..e0d0f12a7 --- /dev/null +++ b/nemo/src/rule_model/translation/complex/aggregation.rs @@ -0,0 +1,42 @@ +//! This module contains a function to create an aggregation term +//! from the corresponding ast node. + +use crate::parser::ast::{self, ProgramAST}; + +use crate::rule_model::components::term::aggregate::Aggregate; +use crate::rule_model::error::translation_error::TranslationErrorKind; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create an aggregation term from the corresponding AST node. + pub(crate) fn build_aggregation( + &mut self, + aggregation: &'a ast::expression::complex::aggregation::Aggregation, + ) -> Result { + let kind = if let Some(kind) = aggregation.kind() { + kind + } else { + return Err(TranslationError::new( + aggregation.tag().span(), + TranslationErrorKind::AggregationUnknown(aggregation.tag().content()), + )); + }; + + let aggregate = self.build_inner_term(aggregation.aggregate())?; + let mut distinct = Vec::new(); + for expression in aggregation.distinct() { + if let ast::expression::Expression::Variable(variable) = expression { + distinct.push(self.build_variable(variable)?); + } else { + return Err(TranslationError::new( + expression.span(), + TranslationErrorKind::AggregationDistinctNonVariable( + expression.context_type().name().to_string(), + ), + )); + } + } + + Ok(Aggregate::new(kind, aggregate, distinct)) + } +} diff --git a/nemo/src/rule_model/translation/complex/arithmetic.rs b/nemo/src/rule_model/translation/complex/arithmetic.rs new file mode 100644 index 000000000..46fd8b923 --- /dev/null +++ b/nemo/src/rule_model/translation/complex/arithmetic.rs @@ -0,0 +1,38 @@ +//! This module contains a function to create a arithmetic term +//! from the corresponding ast node. + +use crate::parser::ast; + +use crate::rule_model::components::term::operation::operation_kind::OperationKind; +use crate::rule_model::components::term::operation::Operation; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a arithmetic term from the corresponding AST node. + pub(crate) fn build_arithmetic( + &mut self, + arithmetic: &'a ast::expression::complex::arithmetic::Arithmetic, + ) -> Result { + let kind = match arithmetic.kind() { + ast::expression::complex::arithmetic::ArithmeticOperation::Addition => { + OperationKind::NumericSum + } + ast::expression::complex::arithmetic::ArithmeticOperation::Subtraction => { + OperationKind::NumericSubtraction + } + ast::expression::complex::arithmetic::ArithmeticOperation::Multiplication => { + OperationKind::NumericProduct + } + ast::expression::complex::arithmetic::ArithmeticOperation::Division => { + OperationKind::NumericDivision + } + }; + + let subterms = vec![ + self.build_inner_term(arithmetic.left())?, + self.build_inner_term(arithmetic.right())?, + ]; + + Ok(Operation::new(kind, subterms)) + } +} diff --git a/nemo/src/rule_model/translation/complex/function.rs b/nemo/src/rule_model/translation/complex/function.rs new file mode 100644 index 000000000..11c73dce2 --- /dev/null +++ b/nemo/src/rule_model/translation/complex/function.rs @@ -0,0 +1,23 @@ +//! This module contains a function to create a function term +//! from the corresponding ast node. + +use crate::parser::ast; + +use crate::rule_model::components::term::function::FunctionTerm; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a function term from the corresponding AST node. + pub(crate) fn build_function( + &mut self, + function: &'a ast::expression::complex::atom::Atom, + ) -> Result { + let name = self.resolve_tag(function.tag())?; + let mut subterms = Vec::new(); + for expression in function.expressions() { + subterms.push(self.build_inner_term(expression)?); + } + + Ok(FunctionTerm::new(&name, subterms)) + } +} diff --git a/nemo/src/rule_model/translation/complex/infix.rs b/nemo/src/rule_model/translation/complex/infix.rs new file mode 100644 index 000000000..7584e06df --- /dev/null +++ b/nemo/src/rule_model/translation/complex/infix.rs @@ -0,0 +1,41 @@ +//! This module contains a function to create an operation +//! from an infix ast node. + +use crate::parser::ast; + +use crate::rule_model::components::term::operation::operation_kind::OperationKind; +use crate::rule_model::components::term::operation::Operation; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create an [Operation] from an infix AST node. + pub(crate) fn build_infix( + &mut self, + infix: &'a ast::expression::complex::infix::InfixExpression, + ) -> Result { + let kind = match infix.kind() { + ast::expression::complex::infix::InfixExpressionKind::Equality => OperationKind::Equal, + ast::expression::complex::infix::InfixExpressionKind::Inequality => { + OperationKind::Unequals + } + ast::expression::complex::infix::InfixExpressionKind::GreaterEqual => { + OperationKind::NumericGreaterthaneq + } + ast::expression::complex::infix::InfixExpressionKind::Greater => { + OperationKind::NumericGreaterthan + } + ast::expression::complex::infix::InfixExpressionKind::LessEqual => { + OperationKind::NumericLessthaneq + } + ast::expression::complex::infix::InfixExpressionKind::Less => { + OperationKind::NumericLessthan + } + }; + + let (left, right) = infix.pair(); + + let subterms = vec![self.build_inner_term(left)?, self.build_inner_term(right)?]; + + Ok(Operation::new(kind, subterms)) + } +} diff --git a/nemo/src/rule_model/translation/complex/map.rs b/nemo/src/rule_model/translation/complex/map.rs new file mode 100644 index 000000000..a1fa43624 --- /dev/null +++ b/nemo/src/rule_model/translation/complex/map.rs @@ -0,0 +1,28 @@ +//! This module contains a function to create a map term +//! from the corresponding ast node. + +use crate::parser::ast; + +use crate::rule_model::components::term::map::Map; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a map term from the corresponding AST node. + pub(crate) fn build_map( + &mut self, + map: &'a ast::expression::complex::map::Map, + ) -> Result { + let mut subterms = Vec::new(); + for (key, value) in map.key_value() { + let key = self.build_inner_term(key)?; + let value = self.build_inner_term(value)?; + + subterms.push((key, value)); + } + + Ok(match map.tag() { + Some(tag) => Map::new(&self.resolve_tag(tag)?, subterms), + None => Map::new_unnamed(subterms), + }) + } +} diff --git a/nemo/src/rule_model/translation/complex/operation.rs b/nemo/src/rule_model/translation/complex/operation.rs new file mode 100644 index 000000000..4f58abc1c --- /dev/null +++ b/nemo/src/rule_model/translation/complex/operation.rs @@ -0,0 +1,23 @@ +//! This module contains a function to create an operation term +//! from the corresponding ast node. + +use crate::parser::ast; + +use crate::rule_model::components::term::operation::Operation; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create an operation term from the corresponding AST node. + pub(crate) fn build_operation( + &mut self, + operation: &'a ast::expression::complex::operation::Operation, + ) -> Result { + let kind = operation.kind(); + let mut subterms = Vec::new(); + for expression in operation.expressions() { + subterms.push(self.build_inner_term(expression)?); + } + + Ok(Operation::new(kind, subterms)) + } +} diff --git a/nemo/src/rule_model/translation/complex/tuple.rs b/nemo/src/rule_model/translation/complex/tuple.rs new file mode 100644 index 000000000..69b9050b1 --- /dev/null +++ b/nemo/src/rule_model/translation/complex/tuple.rs @@ -0,0 +1,22 @@ +//! This module contains a function to create a tuple term +//! from the corresponding ast node. + +use crate::parser::ast; + +use crate::rule_model::components::term::tuple::Tuple; +use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a tuple term from the corresponding AST node. + pub(crate) fn build_tuple( + &mut self, + tuple: &'a ast::expression::complex::tuple::Tuple, + ) -> Result { + let mut subterms = Vec::new(); + for expression in tuple.expressions() { + subterms.push(self.build_inner_term(expression)?); + } + + Ok(Tuple::new(subterms)) + } +} diff --git a/nemo/src/rule_model/translation/directive.rs b/nemo/src/rule_model/translation/directive.rs new file mode 100644 index 000000000..0ec77263e --- /dev/null +++ b/nemo/src/rule_model/translation/directive.rs @@ -0,0 +1,46 @@ +//! This module contains functions for translating directive ast nodes. + +use crate::{parser::ast, rule_model::error::TranslationError}; + +use super::ASTProgramTranslation; + +pub(crate) mod base; +pub(crate) mod declare; +pub(crate) mod import_export; +pub(crate) mod output; +pub(crate) mod prefix; +pub(crate) mod unknown; + +impl<'a> ASTProgramTranslation<'a> { + /// Handle directive nodes that define names. + pub fn handle_define_directive( + &mut self, + directive: &'a ast::directive::Directive, + ) -> Result<(), TranslationError> { + match directive { + ast::directive::Directive::Base(base) => self.handle_base(base), + ast::directive::Directive::Prefix(prefix) => self.handle_prefix(prefix), + ast::directive::Directive::Declare(declare) => self.handle_declare(declare), + ast::directive::Directive::Export(_) + | ast::directive::Directive::Import(_) + | ast::directive::Directive::Output(_) + | ast::directive::Directive::Unknown(_) => Ok(()), + } + } + + /// Handle directive nodes that may use defined names. + pub fn handle_use_directive( + &mut self, + directive: &'a ast::directive::Directive, + ) -> Result<(), TranslationError> { + match directive { + ast::directive::Directive::Export(export) => self.handle_export(export), + ast::directive::Directive::Import(import) => self.handle_import(import), + ast::directive::Directive::Output(output) => self.handle_output(output), + ast::directive::Directive::Unknown(unknown) => self.handle_unknown_directive(unknown), + ast::directive::Directive::Base(_) + | ast::directive::Directive::Declare(_) + | ast::directive::Directive::Prefix(_) => Ok(()), + } + } +} diff --git a/nemo/src/rule_model/translation/directive/base.rs b/nemo/src/rule_model/translation/directive/base.rs new file mode 100644 index 000000000..1e0908ba3 --- /dev/null +++ b/nemo/src/rule_model/translation/directive/base.rs @@ -0,0 +1,35 @@ +//! This module contains a function for handling base statements. + +use crate::{ + parser::ast::{self, ProgramAST}, + rule_model::{ + error::{ + info::Info, translation_error::TranslationErrorKind, ComplexErrorLabelKind, + TranslationError, + }, + translation::ASTProgramTranslation, + }, +}; + +impl<'a> ASTProgramTranslation<'a> { + /// Handle a base ast node. + pub fn handle_base( + &mut self, + base: &'a ast::directive::base::Base, + ) -> Result<(), TranslationError> { + if let Some((_, first_base)) = &self.base { + return Err( + TranslationError::new(base.span(), TranslationErrorKind::BaseRedefinition) + .add_label( + ComplexErrorLabelKind::Information, + first_base.span().range(), + Info::FirstDefinition, + ), + ); + } + + self.base = Some((base.iri().content(), base)); + + Ok(()) + } +} diff --git a/nemo/src/rule_model/translation/directive/declare.rs b/nemo/src/rule_model/translation/directive/declare.rs new file mode 100644 index 000000000..cb7749639 --- /dev/null +++ b/nemo/src/rule_model/translation/directive/declare.rs @@ -0,0 +1,22 @@ +//! This module contains a function for handling declare statements. + +use crate::{ + parser::ast::{self, ProgramAST}, + rule_model::{ + error::{translation_error::TranslationErrorKind, TranslationError}, + translation::ASTProgramTranslation, + }, +}; + +impl<'a> ASTProgramTranslation<'a> { + /// Handle a declare ast node. + pub fn handle_declare( + &mut self, + declare: &'a ast::directive::declare::Declare, + ) -> Result<(), TranslationError> { + Err(TranslationError::new( + declare.span(), + TranslationErrorKind::UnsupportedDeclare, + )) + } +} diff --git a/nemo/src/rule_model/translation/directive/import_export.rs b/nemo/src/rule_model/translation/directive/import_export.rs new file mode 100644 index 000000000..bc24776d5 --- /dev/null +++ b/nemo/src/rule_model/translation/directive/import_export.rs @@ -0,0 +1,132 @@ +//! This module contains a function for handling import/export statements. + +use std::path::Path; + +use strum::IntoEnumIterator; + +use crate::{ + parser::ast::{self, ProgramAST}, + rule_model::{ + components::{ + import_export::{ + file_formats::{FileFormat, FILE_FORMATS_RDF}, + ExportDirective, ImportDirective, + }, + ProgramComponent, Tag, + }, + error::{translation_error::TranslationErrorKind, TranslationError}, + syntax::import_export::file_formats::FILE_FORMAT_RDF_UNSPECIFIED, + translation::ASTProgramTranslation, + }, +}; + +impl<'a> ASTProgramTranslation<'a> { + /// Find the extension given for this import/export statement. + fn import_export_extension( + &self, + map: &'a ast::expression::complex::map::Map, + ) -> Option<(String, &'a ast::expression::Expression<'a>)> { + for (key, value) in map.key_value() { + if let ast::expression::Expression::Constant(constant) = key { + if &constant.name() == "resource" { + if let ast::expression::Expression::String(string) = value { + return Some(( + Path::new(&string.content()) + .extension()? + .to_owned() + .into_string() + .ok()?, + value, + )); + } + } + } + } + + None + } + + /// Find the [FileFormat] associated in the given import/export map. + fn import_export_format( + &self, + map: &'a ast::expression::complex::map::Map, + ) -> Result { + if let Some(structure_tag) = map.tag() { + let format_tag = structure_tag.to_string(); + + if format_tag.to_ascii_lowercase() == FILE_FORMAT_RDF_UNSPECIFIED { + let extension = self.import_export_extension(map); + + if let Some((extension, origin)) = extension { + for &rdf_format in FILE_FORMATS_RDF { + if extension.to_ascii_lowercase() + == rdf_format.extension().to_ascii_lowercase() + { + return Ok(rdf_format); + } + } + + Err(TranslationError::new( + origin.span(), + TranslationErrorKind::RdfUnspecifiedUnknownExtension(extension), + )) + } else { + Err(TranslationError::new( + map.span().beginning(), + TranslationErrorKind::RdfUnspecifiedMissingExtension, + )) + } + } else { + for format in FileFormat::iter() { + if format_tag.to_ascii_lowercase() == format.name().to_ascii_lowercase() { + return Ok(format); + } + } + + Err(TranslationError::new( + structure_tag.span(), + TranslationErrorKind::FileFormatUnknown(structure_tag.to_string()), + )) + } + } else { + Err(TranslationError::new( + map.span().beginning(), + TranslationErrorKind::FileFormatMissing, + )) + } + } + + /// Handle a import ast node. + pub fn handle_import( + &mut self, + import: &'a ast::directive::import::Import, + ) -> Result<(), TranslationError> { + let predicate = Tag::new(self.resolve_tag(import.predicate())?); + let attributes = self.build_map(import.instructions())?; + let file_format = self.import_export_format(import.instructions())?; + + let import_directive = ImportDirective::new(predicate, file_format, attributes); + let _ = import_directive.validate(&mut self.validation_error_builder); + + self.program_builder.add_import(import_directive); + + Ok(()) + } + + /// Handle a export ast node. + pub fn handle_export( + &mut self, + export: &'a ast::directive::export::Export, + ) -> Result<(), TranslationError> { + let predicate = Tag::new(self.resolve_tag(export.predicate())?); + let attributes = self.build_map(export.instructions())?; + let file_format = self.import_export_format(export.instructions())?; + + let export_directive = ExportDirective::new(predicate, file_format, attributes); + let _ = export_directive.validate(&mut self.validation_error_builder); + + self.program_builder.add_export(export_directive); + + Ok(()) + } +} diff --git a/nemo/src/rule_model/translation/directive/output.rs b/nemo/src/rule_model/translation/directive/output.rs new file mode 100644 index 000000000..03606681d --- /dev/null +++ b/nemo/src/rule_model/translation/directive/output.rs @@ -0,0 +1,23 @@ +//! This module contains a function for handling output statements. + +use crate::{ + parser::ast::{self}, + rule_model::{ + components::{output::Output, Tag}, + error::TranslationError, + translation::ASTProgramTranslation, + }, +}; + +impl<'a> ASTProgramTranslation<'a> { + /// Handle a output ast node. + pub fn handle_output( + &mut self, + output: &'a ast::directive::output::Output, + ) -> Result<(), TranslationError> { + let predicate = Tag::new(self.resolve_tag(output.predicate())?); + self.program_builder.add_output(Output::new(predicate)); + + Ok(()) + } +} diff --git a/nemo/src/rule_model/translation/directive/prefix.rs b/nemo/src/rule_model/translation/directive/prefix.rs new file mode 100644 index 000000000..6faeecf16 --- /dev/null +++ b/nemo/src/rule_model/translation/directive/prefix.rs @@ -0,0 +1,42 @@ +//! This module contains a function for handling prefix statements. + +use std::collections::hash_map::Entry; + +use crate::{ + parser::ast::{self}, + rule_model::{ + error::{ + info::Info, translation_error::TranslationErrorKind, ComplexErrorLabelKind, + TranslationError, + }, + translation::ASTProgramTranslation, + }, +}; + +impl<'a> ASTProgramTranslation<'a> { + /// Handle a base ast node. + pub fn handle_prefix( + &mut self, + prefix: &'a ast::directive::prefix::Prefix, + ) -> Result<(), TranslationError> { + match self.prefix_mapping.entry(prefix.prefix()) { + Entry::Occupied(entry) => { + let (_, prefix_first) = entry.get(); + return Err(TranslationError::new( + prefix.prefix_token().span(), + TranslationErrorKind::PrefixRedefinition, + ) + .add_label( + ComplexErrorLabelKind::Information, + prefix_first.prefix_token().span().range(), + Info::FirstDefinition, + )); + } + Entry::Vacant(entry) => { + entry.insert((prefix.value().content(), prefix)); + } + } + + Ok(()) + } +} diff --git a/nemo/src/rule_model/translation/directive/unknown.rs b/nemo/src/rule_model/translation/directive/unknown.rs new file mode 100644 index 000000000..2ffc3be07 --- /dev/null +++ b/nemo/src/rule_model/translation/directive/unknown.rs @@ -0,0 +1,22 @@ +//! This module contains a function for handling unknown directive statements. + +use crate::{ + parser::ast::{self}, + rule_model::{ + error::{translation_error::TranslationErrorKind, TranslationError}, + translation::ASTProgramTranslation, + }, +}; + +impl<'a> ASTProgramTranslation<'a> { + /// Handle a unknown directive ast node. + pub fn handle_unknown_directive( + &mut self, + unknown: &'a ast::directive::unknown::UnknownDirective, + ) -> Result<(), TranslationError> { + Err(TranslationError::new( + unknown.name_token().span(), + TranslationErrorKind::DirectiveUnknown(unknown.name()), + )) + } +} diff --git a/nemo/src/rule_model/translation/rule.rs b/nemo/src/rule_model/translation/rule.rs new file mode 100644 index 000000000..7c7d6c973 --- /dev/null +++ b/nemo/src/rule_model/translation/rule.rs @@ -0,0 +1,172 @@ +//! This module contains functions for creating a [Rule] from the corresponding ast node. + +use crate::{ + parser::ast::{self, ProgramAST}, + rule_model::{ + components::{ + atom::Atom, + literal::Literal, + rule::{Rule, RuleBuilder}, + term::Term, + ProgramComponent, + }, + error::{translation_error::TranslationErrorKind, TranslationError}, + }, +}; + +use super::ASTProgramTranslation; + +impl<'a> ASTProgramTranslation<'a> { + /// Create a [Rule] from the corresponding AST node. + pub(crate) fn build_rule( + &mut self, + rule: &'a ast::rule::Rule<'a>, + ) -> Result { + let mut rule_builder = RuleBuilder::default().origin(self.register_node(rule)); + + for expression in rule.head() { + rule_builder.add_head_atom_mut(self.build_head_atom(expression)?); + } + + for expression in rule.body() { + rule_builder.add_body_literal_mut(self.build_body_literal(expression)?); + } + + let rule = rule_builder.finalize(); + + let _ = rule.validate(&mut self.validation_error_builder); + Ok(rule) + } + + /// Create a body [Literal] from the corresponding ast node. + fn build_body_literal( + &mut self, + body: &'a ast::expression::Expression<'a>, + ) -> Result { + let result = match body { + ast::expression::Expression::Atom(atom) => { + let mut subterms = Vec::new(); + for expression in atom.expressions() { + subterms.push(self.build_inner_term(expression)?); + } + + Literal::Positive( + Atom::new(&self.resolve_tag(atom.tag())?, subterms) + .set_origin(self.register_node(atom)), + ) + } + ast::expression::Expression::Negation(negated) => { + let atom = if let ast::expression::Expression::Atom(atom) = negated.expression() { + atom + } else { + return Err(TranslationError::new( + negated.span(), + TranslationErrorKind::NegatedNonAtom( + negated.expression().context_type().name().to_string(), + ), + )); + }; + + let mut subterms = Vec::new(); + for expression in atom.expressions() { + subterms.push(self.build_inner_term(expression)?); + } + + Literal::Negative( + Atom::new(&self.resolve_tag(atom.tag())?, subterms) + .set_origin(self.register_node(atom)), + ) + } + ast::expression::Expression::Infix(infix) => Literal::Operation( + self.build_infix(infix)? + .set_origin(self.register_node(infix)), + ), + ast::expression::Expression::Operation(operation) => { + let result = self.build_operation(operation)?; + + Literal::Operation(result.set_origin(self.register_node(operation))) + } + _ => { + return Err(TranslationError::new( + body.span(), + TranslationErrorKind::BodyNonLiteral(body.context_type().name().to_string()), + )) + } + } + .set_origin(self.register_node(body)); + + Ok(result) + } + + /// Create a head [Atom] from the corresponding ast node. + pub(crate) fn build_head_atom( + &mut self, + head: &'a ast::expression::Expression<'a>, + ) -> Result { + let result = if let ast::expression::Expression::Atom(atom) = head { + let mut subterms = Vec::new(); + for expression in atom.expressions() { + subterms.push(self.build_inner_term(expression)?); + } + + Atom::new(&self.resolve_tag(atom.tag())?, subterms).set_origin(self.register_node(atom)) + } else { + return Err(TranslationError::new( + head.span(), + TranslationErrorKind::HeadNonAtom(head.context_type().name().to_string()), + )); + }; + + Ok(result) + } + + /// Create a [Term] that occurs within a head atom or body literal. + pub(crate) fn build_inner_term( + &mut self, + expression: &'a ast::expression::Expression, + ) -> Result { + match expression { + ast::expression::Expression::Arithmetic(arithmetic) => { + self.build_arithmetic(arithmetic).map(Term::from) + } + ast::expression::Expression::Atom(function) => { + self.build_function(function).map(Term::from) + } + ast::expression::Expression::Blank(blank) => self.build_blank(blank).map(Term::from), + ast::expression::Expression::Boolean(boolean) => { + self.build_boolean(boolean).map(Term::from) + } + ast::expression::Expression::Constant(constant) => { + self.build_constant(constant).map(Term::from) + } + ast::expression::Expression::Number(number) => { + self.build_number(number).map(Term::from) + } + ast::expression::Expression::RdfLiteral(rdf_literal) => { + self.build_rdf(rdf_literal).map(Term::from) + } + ast::expression::Expression::String(string) => { + self.build_string(string).map(Term::from) + } + ast::expression::Expression::Tuple(tuple) => self.build_tuple(tuple).map(Term::from), + ast::expression::Expression::Variable(variable) => { + self.build_variable(variable).map(Term::from) + } + ast::expression::Expression::Aggregation(aggregation) => { + self.build_aggregation(aggregation).map(Term::from) + } + ast::expression::Expression::Map(map) => self.build_map(map).map(Term::from), + ast::expression::Expression::Operation(operation) => { + self.build_operation(operation).map(Term::from) + } + ast::expression::Expression::Negation(negation) => Err(TranslationError::new( + negation.span(), + TranslationErrorKind::InnerExpressionNegation, + )), + ast::expression::Expression::Infix(infix) => Err(TranslationError::new( + infix.span(), + TranslationErrorKind::InnerExpressionInfix, + )), + } + } +} From 62d2c88a7c1941b45f3e24256d123d6b8da7e31c Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 31 Jul 2024 11:42:42 +0200 Subject: [PATCH 143/214] Show parser errors --- nemo/src/parser.rs | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index 149fe4d90..5c9fb1835 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -102,19 +102,23 @@ impl<'a> Parser<'a> { pub fn parse(self) -> Result, ParserErrorReport<'a>> { let parser_input = ParserInput::new(&self.input, self.state.clone()); - let error_tree = match transform_error_tree(Program::parse)(parser_input) { - Ok((_input, program)) => return Ok(program), - Err(error_tree) => error_tree, - }; - - drop(error_tree); - - Err(ParserErrorReport { - input: self.input, - label: self.label, - errors: Rc::try_unwrap(self.state.errors) - .expect("there should only be one owner now") - .into_inner(), - }) + let (_, program) = Program::parse(parser_input).expect("parsing should always succeed"); + + if self.state.errors.borrow().is_empty() { + Ok(program) + } else { + Err(ParserErrorReport { + input: self.input, + label: self.label, + errors: Rc::try_unwrap(self.state.errors) + .expect("there should only be one owner now") + .into_inner(), + }) + } + + // let error_tree = match transform_error_tree(Program::parse)(parser_input) { + // Ok((_input, program)) => return Ok(program), + // Err(error_tree) => error_tree, + // }; } } From 23e16b60df6c0c7b5c1759ecd799a305c04bc339 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 31 Jul 2024 15:09:16 +0200 Subject: [PATCH 144/214] Change [ProgramSpan] in comments to [Span] --- nemo/src/parser/ast/comment/closed.rs | 2 +- nemo/src/parser/ast/comment/doc.rs | 2 +- nemo/src/parser/ast/comment/line.rs | 2 +- nemo/src/parser/ast/comment/toplevel.rs | 2 +- nemo/src/parser/ast/comment/wsoc.rs | 2 +- nemo/src/parser/ast/sequence/one.rs | 2 +- nemo/src/parser/ast/sequence/simple.rs | 2 +- nemo/src/parser/ast/token.rs | 2 +- nemo/src/parser/span.rs | 4 ++-- nemo/src/rule_model/error.rs | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/nemo/src/parser/ast/comment/closed.rs b/nemo/src/parser/ast/comment/closed.rs index a20c60331..638635ed8 100644 --- a/nemo/src/parser/ast/comment/closed.rs +++ b/nemo/src/parser/ast/comment/closed.rs @@ -16,7 +16,7 @@ use crate::parser::{ /// Closed comment #[derive(Debug)] pub struct ClosedComment<'a> { - /// [ProgramSpan] associated with this comment + /// [Span] associated with this comment span: Span<'a>, /// Part of the comment that contains the content diff --git a/nemo/src/parser/ast/comment/doc.rs b/nemo/src/parser/ast/comment/doc.rs index 8dc3bdca6..322f8b8ef 100644 --- a/nemo/src/parser/ast/comment/doc.rs +++ b/nemo/src/parser/ast/comment/doc.rs @@ -19,7 +19,7 @@ use crate::parser::{ /// Doc comment that is attached to e.g. rules #[derive(Debug)] pub struct DocComment<'a> { - /// [ProgramSpan] associated with this comment + /// [Span] associated with this comment span: Span<'a>, /// Each line of the comment diff --git a/nemo/src/parser/ast/comment/line.rs b/nemo/src/parser/ast/comment/line.rs index e4eff8c9b..82fb2e0f8 100644 --- a/nemo/src/parser/ast/comment/line.rs +++ b/nemo/src/parser/ast/comment/line.rs @@ -18,7 +18,7 @@ use crate::parser::{ /// Line comment #[derive(Debug)] pub struct LineComment<'a> { - /// [ProgramSpan] associated with this comment + /// [Span] associated with this comment span: Span<'a>, /// Part of the comment that contains the content diff --git a/nemo/src/parser/ast/comment/toplevel.rs b/nemo/src/parser/ast/comment/toplevel.rs index 1b9453141..94f5b12c5 100644 --- a/nemo/src/parser/ast/comment/toplevel.rs +++ b/nemo/src/parser/ast/comment/toplevel.rs @@ -19,7 +19,7 @@ use crate::parser::{ /// Doc comment that is attached to e.g. rules #[derive(Debug)] pub struct TopLevelComment<'a> { - /// [ProgramSpan] associated with this comment + /// [Span] associated with this comment span: Span<'a>, /// Each line of the comment diff --git a/nemo/src/parser/ast/comment/wsoc.rs b/nemo/src/parser/ast/comment/wsoc.rs index 8538d1c45..17a8a965e 100644 --- a/nemo/src/parser/ast/comment/wsoc.rs +++ b/nemo/src/parser/ast/comment/wsoc.rs @@ -25,7 +25,7 @@ pub enum CommentType<'a> { /// Represents a series of whitespaces or comments #[derive(Debug)] pub struct WSoC<'a> { - /// [ProgramSpan] associated with this comment + /// [Span] associated with this comment _span: Span<'a>, /// comments comments: Vec>, diff --git a/nemo/src/parser/ast/sequence/one.rs b/nemo/src/parser/ast/sequence/one.rs index a63901244..1f6a6b8f0 100644 --- a/nemo/src/parser/ast/sequence/one.rs +++ b/nemo/src/parser/ast/sequence/one.rs @@ -16,7 +16,7 @@ use crate::parser::{ /// A sequence of one must be followed by a comma #[derive(Debug)] pub struct ExpressionSequenceOne<'a> { - /// [ProgramSpan] associated with this sequence + /// [Span] associated with this sequence _span: Span<'a>, /// List of expressions diff --git a/nemo/src/parser/ast/sequence/simple.rs b/nemo/src/parser/ast/sequence/simple.rs index d6c8e86df..b2e213e07 100644 --- a/nemo/src/parser/ast/sequence/simple.rs +++ b/nemo/src/parser/ast/sequence/simple.rs @@ -14,7 +14,7 @@ use crate::parser::{ /// Sequence of comma-delimited expressions #[derive(Debug)] pub struct ExpressionSequenceSimple<'a> { - /// [ProgramSpan] associated with this sequence + /// [Span] associated with this sequence _span: Span<'a>, /// List of expressions diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index d0b736c90..2475dc571 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -283,7 +283,7 @@ macro_rules! string_token { } impl<'a> Token<'a> { - /// Return the [ProgramSpan] of this token. + /// Return the [Span] of this token. pub fn span(&self) -> Span<'a> { self.span } diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index f834b7f68..0a4ce2761 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -129,7 +129,7 @@ impl<'a> Span<'a> { } } - /// Create a [ProgramSpan] that encloses the given [ProgramSpan]s. + /// Create a [Span] that encloses the given [Span]s. /// TODO: Description and Specify safety conditions and verify that this is correct pub fn enclose(&self, first: &Self, second: &Self) -> Self { unsafe { @@ -143,7 +143,7 @@ impl<'a> Span<'a> { } } - /// Return a [ProgramSpan] that points to the beginning. + /// Return a [Span] that points to the beginning. pub fn beginning(&self) -> Self { unsafe { if self.0.is_empty() { diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 720ccc88f..07f411f2b 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -212,7 +212,7 @@ pub struct TranslationError { } impl TranslationError { - /// Create a new [TranslationError] from a given [ProgramSPan]. + /// Create a new [TranslationError] from a given [Span]. pub fn new<'a>(span: Span<'a>, kind: TranslationErrorKind) -> Self { let message = kind.to_string(); From 71d79100afd69f6b73be3d4410cb2c61ee835feb Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Fri, 2 Aug 2024 12:22:51 +0200 Subject: [PATCH 145/214] Validate main program components --- nemo-physical/src/error.rs | 3 - nemo/src/rule_model/components.rs | 41 ++-- nemo/src/rule_model/components/atom.rs | 41 +++- nemo/src/rule_model/components/base.rs | 73 ------ nemo/src/rule_model/components/fact.rs | 31 ++- .../rule_model/components/import_export.rs | 2 +- nemo/src/rule_model/components/literal.rs | 23 +- nemo/src/rule_model/components/output.rs | 6 +- nemo/src/rule_model/components/rule.rs | 217 +++++++++++++++--- nemo/src/rule_model/components/tag.rs | 85 +++++++ nemo/src/rule_model/components/term.rs | 66 +++++- .../rule_model/components/term/aggregate.rs | 39 +++- .../rule_model/components/term/function.rs | 40 +++- nemo/src/rule_model/components/term/map.rs | 22 +- .../rule_model/components/term/operation.rs | 36 ++- .../term/operation/operation_kind.rs | 159 +++++++------ .../rule_model/components/term/primitive.rs | 10 + .../components/term/primitive/ground.rs | 28 ++- .../components/term/primitive/variable.rs | 22 +- .../term/primitive/variable/existential.rs | 11 +- .../term/primitive/variable/universal.rs | 13 +- nemo/src/rule_model/components/term/tuple.rs | 22 +- .../rule_model/components/term/value_type.rs | 43 ++++ nemo/src/rule_model/error/hint.rs | 2 +- nemo/src/rule_model/error/hint/similar.rs | 32 ++- nemo/src/rule_model/error/info.rs | 3 + nemo/src/rule_model/error/validation_error.rs | 55 +++-- nemo/src/rule_model/translation.rs | 11 +- .../rule_model/translation/basic/variable.rs | 6 +- .../translation/complex/aggregation.rs | 15 +- .../translation/complex/arithmetic.rs | 15 +- .../translation/complex/function.rs | 14 +- .../rule_model/translation/complex/infix.rs | 15 +- .../src/rule_model/translation/complex/map.rs | 16 +- .../translation/complex/operation.rs | 13 +- .../rule_model/translation/complex/tuple.rs | 12 +- .../translation/directive/import_export.rs | 19 +- .../translation/directive/output.rs | 7 +- nemo/src/rule_model/translation/rule.rs | 36 +-- 39 files changed, 954 insertions(+), 350 deletions(-) delete mode 100644 nemo/src/rule_model/components/base.rs create mode 100644 nemo/src/rule_model/components/tag.rs create mode 100644 nemo/src/rule_model/components/term/value_type.rs diff --git a/nemo-physical/src/error.rs b/nemo-physical/src/error.rs index e8a5c76cd..7f0547baf 100644 --- a/nemo-physical/src/error.rs +++ b/nemo-physical/src/error.rs @@ -67,9 +67,6 @@ pub enum ReadingError { /// Error-Collection for all the possible Errors occurring in this crate #[derive(Error, Debug)] pub enum Error { - /// Permutation shall be sorted, but the input data is of different length - #[error("an invalid number of aggregated variables was provided: {0}")] - InvalidAggregatedVariableCount(usize), /// Permutation shall be sorted, but the input data is of different length #[error("the provided data-structures do not have the same length: {0:?}")] PermutationSortLen(Vec), diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index cfca4f2c7..ac1552c0e 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -1,14 +1,15 @@ //! This module defines the logical components that make up a program. #[macro_use] + pub mod atom; -pub mod base; pub mod datatype; pub mod fact; pub mod import_export; pub mod literal; pub mod output; pub mod rule; +pub mod tag; pub mod term; use std::fmt::{Debug, Display}; @@ -20,26 +21,24 @@ use super::{ origin::Origin, }; -/// Name of a term -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct Tag(String); - -impl Tag { - /// Create a new [Tag]. - pub fn new(name: String) -> Self { - Self(name) - } - - /// Validate term name. - pub fn is_valid(&self) -> bool { - !self.0.is_empty() - } -} - -impl Display for Tag { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&self.0) - } +/// TODO: Think whether this is needed +/// Types of [ProgramComponent]s +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ProgramComponentKind { + /// Variable + Variable, + /// Primitive groun term + PrimitiveGround, + /// Map + Map, + /// Tuple + Tuple, + /// Operation + Operation, + /// Function term + FunctionTerm, + /// Atom + Atom, } /// Trait implemented by objects that are part of the logical rule model of the nemo language. diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index 4409ca88e..f34cb7f01 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -3,13 +3,14 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - error::{ValidationError, ValidationErrorBuilder}, + error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, origin::Origin, }; use super::{ + tag::Tag, term::{primitive::variable::Variable, Term}, - IterableVariables, ProgramComponent, Tag, + IterableVariables, ProgramComponent, }; /// Atom @@ -33,22 +34,28 @@ pub struct Atom { macro_rules! atom { // Base case: no elements ($name:tt) => { - crate::rule_model::component::atom::Atom::new($name, Vec::new()) + crate::rule_model::components::atom::Atom::new( + crate::rule_model::components::tag::Tag::from($name), + Vec::new() + ) }; // Recursive case: handle each term, separated by commas ($name:tt; $($tt:tt)*) => {{ let mut terms = Vec::new(); term_list!(terms; $($tt)*); - crate::rule_model::components::atom::Atom::new($name, terms) + crate::rule_model::components::atom::Atom::new( + crate::rule_model::components::tag::Tag::from($name), + terms + ) }}; } impl Atom { /// Create a new [Atom]. - pub fn new>(predicate: &str, subterms: Terms) -> Self { + pub fn new>(predicate: Tag, subterms: Terms) -> Self { Self { origin: Origin::Created, - predicate: Tag::new(predicate.to_string()), + predicate, terms: subterms.into_iter().collect(), } } @@ -58,8 +65,8 @@ impl Atom { self.predicate.clone() } - /// Return an iterator over the subterms of this atom. - pub fn subterms(&self) -> impl Iterator { + /// Return an iterator over the arguments of this atom. + pub fn arguments(&self) -> impl Iterator { self.terms.iter() } @@ -130,10 +137,20 @@ impl ProgramComponent for Atom { Self: Sized, { if !self.predicate.is_valid() { - todo!() + builder.report_error( + self.predicate.origin().clone(), + ValidationErrorKind::InvalidTermTag(self.predicate.to_string()), + ); } - for term in self.subterms() { + if self.is_empty() { + builder.report_error( + self.origin.clone(), + ValidationErrorKind::UnsupportedAtomEmpty, + ); + } + + for term in self.arguments() { term.validate(builder)?; } @@ -158,9 +175,9 @@ mod test { #[test] fn atom_basic() { let variable = Variable::universal("u"); - let function = atom!("p"; 12, variable, !e, "abc", ?v); + let atom = atom!("p"; 12, variable, !e, "abc", ?v); - let variables = function.variables().cloned().collect::>(); + let variables = atom.variables().cloned().collect::>(); assert_eq!( variables, vec![ diff --git a/nemo/src/rule_model/components/base.rs b/nemo/src/rule_model/components/base.rs deleted file mode 100644 index a005bbc9e..000000000 --- a/nemo/src/rule_model/components/base.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! This module defines [Base] - -use std::{fmt::Display, hash::Hash}; - -use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; - -use super::ProgramComponent; - -/// Global prefix -#[derive(Debug, Clone, Eq)] -pub struct Base { - /// Origin of this component - origin: Origin, - - /// Prefix - base: String, -} - -impl Base { - /// Create a new [Base] - pub fn new(base: String) -> Self { - Self { - origin: Origin::default(), - base, - } - } -} - -impl Display for Base { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "@base {} .", self.base) - } -} - -impl PartialEq for Base { - fn eq(&self, other: &Self) -> bool { - self.base == other.base - } -} - -impl Hash for Base { - fn hash(&self, state: &mut H) { - self.base.hash(state); - } -} - -impl ProgramComponent for Base { - fn parse(_string: &str) -> Result - where - Self: Sized, - { - todo!() - } - - fn origin(&self) -> &Origin { - &self.origin - } - - fn set_origin(mut self, origin: Origin) -> Self - where - Self: Sized, - { - self.origin = origin; - self - } - - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> - where - Self: Sized, - { - todo!() - } -} diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index 5e06a575b..bc405d9ee 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -2,9 +2,12 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; +use crate::rule_model::{ + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, +}; -use super::{atom::Atom, term::Term, ProgramComponent, Tag}; +use super::{atom::Atom, tag::Tag, term::Term, IterableVariables, ProgramComponent}; /// A (ground) fact #[derive(Debug, Clone, Eq)] @@ -14,7 +17,6 @@ pub struct Fact { /// Predicate of the fact predicate: Tag, - /// List of [Term]s terms: Vec, } @@ -50,7 +52,7 @@ impl From for Fact { Self { origin: value.origin().clone(), predicate: value.predicate(), - terms: value.subterms().cloned().collect(), + terms: value.arguments().cloned().collect(), } } } @@ -108,6 +110,25 @@ impl ProgramComponent for Fact { where Self: Sized, { - todo!() + if !self.predicate.is_valid() { + builder.report_error( + self.predicate.origin().clone(), + ValidationErrorKind::InvalidTermTag(self.predicate.to_string()), + ); + } + + for term in self.subterms() { + if let Some(variable) = term.variables().next() { + builder.report_error( + variable.origin().clone(), + ValidationErrorKind::FactNonGround, + ); + continue; + } + + term.validate(builder)?; + } + + Ok(()) } } diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index 13eaefd74..c500ee3d7 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -11,7 +11,7 @@ use file_formats::FileFormat; use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; -use super::{term::map::Map, ProgramComponent, Tag}; +use super::{tag::Tag, term::map::Map, ProgramComponent}; /// An import/export specification. This object captures all information that is typically /// present in an import or export directive in a Nemo program, including the main format, diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index e82e84f3d..b36e82696 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -4,7 +4,11 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::error::{ValidationError, ValidationErrorBuilder}; -use super::{atom::Atom, term::operation::Operation, ProgramComponent}; +use super::{ + atom::Atom, + term::{operation::Operation, Term}, + ProgramComponent, +}; /// Literal /// @@ -21,6 +25,17 @@ pub enum Literal { Operation(Operation), } +impl Literal { + /// Return an iterator over the arguments contained in this literal. + pub fn arguments(&self) -> Box + '_> { + match self { + Literal::Positive(literal) => Box::new(literal.arguments()), + Literal::Negative(literal) => Box::new(literal.arguments()), + Literal::Operation(literal) => Box::new(literal.arguments()), + } + } +} + impl Display for Literal { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -62,6 +77,10 @@ impl ProgramComponent for Literal { where Self: Sized, { - todo!() + match self { + Literal::Positive(literal) => literal.validate(builder), + Literal::Negative(literal) => literal.validate(builder), + Literal::Operation(literal) => literal.validate(builder), + } } } diff --git a/nemo/src/rule_model/components/output.rs b/nemo/src/rule_model/components/output.rs index 028871d84..f35229a83 100644 --- a/nemo/src/rule_model/components/output.rs +++ b/nemo/src/rule_model/components/output.rs @@ -4,7 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; -use super::{ProgramComponent, Tag}; +use super::{tag::Tag, ProgramComponent}; /// Output directive /// @@ -66,10 +66,10 @@ impl ProgramComponent for Output { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, _builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { - todo!() + Ok(()) } } diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 89bfe0ad5..10e064d45 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -1,11 +1,12 @@ -//! This module defines [Rule] and [RuleBuilder] +//! This module defines [Rule] and [RuleBuilder]. use std::{collections::HashSet, fmt::Display, hash::Hash}; -use similar_string::find_best_similarity; - use crate::rule_model::{ - error::{hint::Hint, validation_error::ValidationErrorKind, ValidationErrorBuilder}, + error::{ + hint::Hint, info::Info, validation_error::ValidationErrorKind, ComplexErrorLabelKind, + ValidationErrorBuilder, + }, origin::Origin, }; @@ -92,9 +93,11 @@ impl Rule { for literal in &self.body { if let Literal::Positive(atom) = literal { - for term in atom.subterms() { + for term in atom.arguments() { if let Term::Primitive(Primitive::Variable(variable)) = term { - result.insert(variable); + if variable.is_universal() && variable.name().is_some() { + result.insert(variable); + } } } } @@ -106,7 +109,10 @@ impl Rule { for literal in &self.body { if let Literal::Operation(operation) = literal { if let Some((variable, term)) = operation.variable_assignment() { - if term.variables().all(|variable| result.contains(variable)) { + if variable.is_universal() + && variable.name().is_some() + && term.variables().all(|variable| result.contains(variable)) + { result.insert(variable); } } @@ -120,6 +126,90 @@ impl Rule { result } + + /// Check for + fn validate_term_head(builder: &mut ValidationErrorBuilder, term: &Term) -> Result { + if term.is_map() || term.is_tuple() || term.is_function() { + builder.report_error( + term.origin().clone(), + ValidationErrorKind::UnsupportedComplexTerm, + ); + return Err(()); + } + + let mut first_aggregate = term.is_aggregate(); + + for subterm in term.arguments() { + let contains_aggregate = Self::validate_term_head(builder, subterm)?; + + if contains_aggregate && first_aggregate { + builder.report_error( + subterm.origin().clone(), + ValidationErrorKind::UnsupportedAggregateMultiple, + ); + + return Err(()); + } + + first_aggregate |= contains_aggregate; + } + + Ok(first_aggregate) + } + + /// Check for + fn validate_term_body( + builder: &mut ValidationErrorBuilder, + term: &Term, + safe_variables: &HashSet<&Variable>, + ) -> Result<(), ()> { + if let Term::Primitive(Primitive::Variable(Variable::Existential(existential))) = term { + builder.report_error( + existential.origin().clone(), + ValidationErrorKind::BodyExistential(Variable::Existential(existential.clone())), + ); + return Err(()); + } + + if term.is_aggregate() { + builder.report_error(term.origin().clone(), ValidationErrorKind::BodyAggregate); + return Err(()); + } + + if term.is_operation() { + for operation_variable in term.variables() { + if operation_variable.name().is_none() { + builder.report_error( + operation_variable.origin().clone(), + ValidationErrorKind::OperationAnonymous, + ); + return Err(()); + } + + if !safe_variables.contains(operation_variable) { + builder.report_error( + operation_variable.origin().clone(), + ValidationErrorKind::OperationUnsafe(operation_variable.clone()), + ); + return Err(()); + } + } + } + + if term.is_map() || term.is_tuple() || term.is_function() { + builder.report_error( + term.origin().clone(), + ValidationErrorKind::UnsupportedComplexTerm, + ); + return Err(()); + } + + for subterm in term.arguments() { + Self::validate_term_body(builder, subterm, safe_variables)?; + } + + Ok(()) + } } impl Display for Rule { @@ -184,42 +274,111 @@ impl ProgramComponent for Rule { Self: Sized, { let safe_variables = self.safe_variables(); + let is_existential = self + .head() + .iter() + .flat_map(|atom| atom.variables()) + .any(|variable| variable.is_existential()); + + for atom in self.head() { + atom.validate(builder)?; + + let mut contains_aggregate = false; + for term in atom.arguments() { + if let Ok(aggregate) = Self::validate_term_head(builder, term) { + if aggregate && contains_aggregate { + builder.report_error( + term.origin().clone(), + ValidationErrorKind::UnsupportedAggregateMultiple, + ); + } - for atom in &self.head { - for term in atom.subterms() { - if let Term::Primitive(Primitive::Variable(head_variable)) = term { - if !safe_variables.contains(head_variable) { - let head_variable_name = head_variable - .name() - .expect("anonymous variables not allowed in the head"); + if aggregate && is_existential { + builder.report_error( + term.origin().clone(), + ValidationErrorKind::UnsupportedAggregatesAndExistentials, + ); + } + contains_aggregate |= aggregate; + } + } + + for variable in atom.variables() { + if let Some(variable_name) = variable.name() { + if !safe_variables.contains(variable) { let info = builder.report_error( - head_variable.origin().clone(), - ValidationErrorKind::HeadUnsafe(head_variable.clone()), + variable.origin().clone(), + ValidationErrorKind::HeadUnsafe(variable.clone()), ); - - if let Some(closest_option) = find_best_similarity( - head_variable_name.clone(), + if let Some(hint) = Hint::similar( + "variable", + variable_name, &safe_variables .iter() - .filter_map(|variable| variable.name()) + .flat_map(|variable| variable.name()) .collect::>(), ) { - if head_variable_name.len() > 2 - && closest_option.0.len() > 2 - && closest_option.1 > 0.75 - { - info.add_hint(Hint::SimilarExists { - kind: "variable".to_string(), - name: closest_option.0, - }); - } + info.add_hint(hint); } + + return Err(()); } + } else { + builder.report_error( + variable.origin().clone(), + ValidationErrorKind::HeadAnonymous, + ); + return Err(()); } } } + let mut negative_variables = HashSet::<&Variable>::new(); + + for literal in self.body() { + literal.validate(builder)?; + + for term in literal.arguments() { + let _ = Self::validate_term_body(builder, term, &safe_variables); + } + + let mut current_negative_variables = HashSet::<&Variable>::new(); + if let Literal::Negative(negative) = literal { + for negative_subterm in negative.arguments() { + if let Term::Primitive(Primitive::Variable(variable)) = negative_subterm { + if !safe_variables.contains(variable) { + current_negative_variables.insert(variable); + } + } + } + } + + for repeated_variable in current_negative_variables.intersection(&negative_variables) { + let first_use = negative_variables + .get(repeated_variable) + .expect("value is contained in the intersection"); + let repeated_use = current_negative_variables + .get(repeated_variable) + .expect("value is contained in the intersection"); + + builder + .report_error( + repeated_use.origin().clone(), + ValidationErrorKind::MultipleNegativeLiteralsUnsafe( + (*repeated_use).clone(), + ), + ) + .add_label( + ComplexErrorLabelKind::Information, + first_use.origin().clone(), + Info::FirstUse, + ); + } + + negative_variables.extend(current_negative_variables); + } + Ok(()) } } diff --git a/nemo/src/rule_model/components/tag.rs b/nemo/src/rule_model/components/tag.rs new file mode 100644 index 000000000..0f21d8294 --- /dev/null +++ b/nemo/src/rule_model/components/tag.rs @@ -0,0 +1,85 @@ +//! This module defines [Tag]. + +use std::{fmt::Display, hash::Hash}; + +use crate::rule_model::origin::Origin; + +/// Name of a term or predicate +#[derive(Debug, Clone, Eq)] +pub struct Tag { + /// Origin of this component. + origin: Origin, + /// Content of this tag + tag: String, +} + +impl Tag { + /// Create a new [Tag]. + pub fn new(name: String) -> Self { + Self { + origin: Origin::Created, + tag: name, + } + } + + /// Validate term name. + pub fn is_valid(&self) -> bool { + !self.tag.starts_with("__") + } + + /// Return the [Origin] associated with this tag. + pub fn origin(&self) -> &Origin { + &self.origin + } + + /// Set the [Origin]. + pub fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} + +impl Display for Tag { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.tag) + } +} + +impl PartialEq for Tag { + fn eq(&self, other: &Self) -> bool { + self.tag == other.tag + } +} + +impl PartialOrd for Tag { + fn partial_cmp(&self, other: &Self) -> Option { + self.tag.partial_cmp(&other.tag) + } +} + +impl Hash for Tag { + fn hash(&self, state: &mut H) { + self.tag.hash(state); + } +} + +impl From for Tag { + fn from(value: String) -> Self { + Self { + origin: Origin::Created, + tag: value, + } + } +} + +impl From<&str> for Tag { + fn from(value: &str) -> Self { + Self { + origin: Origin::Created, + tag: value.to_string(), + } + } +} diff --git a/nemo/src/rule_model/components/term.rs b/nemo/src/rule_model/components/term.rs index b7d18c835..98bab72c2 100644 --- a/nemo/src/rule_model/components/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -1,5 +1,7 @@ //! This module defines [Term]. +pub mod value_type; + #[macro_use] pub mod aggregate; #[macro_use] @@ -26,6 +28,7 @@ use primitive::{ Primitive, }; use tuple::Tuple; +use value_type::ValueType; use crate::rule_model::{ error::{ValidationError, ValidationErrorBuilder}, @@ -73,6 +76,60 @@ impl Term { pub fn ground(value: AnyDataValue) -> Self { Self::Primitive(Primitive::Ground(GroundTerm::new(value))) } + + /// Return the value type of this term. + pub fn value_type(&self) -> ValueType { + match self { + Term::Primitive(term) => term.value_type(), + Term::Aggregate(term) => term.value_type(), + Term::FunctionTerm(term) => term.value_type(), + Term::Map(term) => term.value_type(), + Term::Operation(term) => term.value_type(), + Term::Tuple(term) => term.value_type(), + } + } + + /// Return whether this term is a primitive term. + pub fn is_primitive(&self) -> bool { + matches!(self, Term::Primitive(_)) + } + + /// Return whether this term is an aggregate. + pub fn is_aggregate(&self) -> bool { + matches!(self, Term::Aggregate(_)) + } + + /// Return whether this term is a function term. + pub fn is_function(&self) -> bool { + matches!(self, Term::FunctionTerm(_)) + } + + /// Return whether this term is a map. + pub fn is_map(&self) -> bool { + matches!(self, Term::Map(_)) + } + + /// Return whether this term is a operation. + pub fn is_operation(&self) -> bool { + matches!(self, Term::Operation(_)) + } + + /// Return whether this term is a tuple. + pub fn is_tuple(&self) -> bool { + matches!(self, Term::Tuple(_)) + } + + /// Return an iterator over the arguments to this term. + pub fn arguments(&self) -> Box + '_> { + match self { + Term::Primitive(_) => Box::new(None.into_iter()), + Term::Aggregate(term) => Box::new(Some(term.aggregate_term()).into_iter()), + Term::FunctionTerm(term) => Box::new(term.arguments()), + Term::Map(term) => Box::new(term.key_value().flat_map(|(key, value)| vec![key, value])), + Term::Operation(term) => Box::new(term.arguments()), + Term::Tuple(term) => Box::new(term.arguments()), + } + } } impl From for Term { @@ -215,7 +272,14 @@ impl ProgramComponent for Term { where Self: Sized, { - todo!() + match self { + Term::Primitive(term) => term.validate(builder), + Term::Aggregate(term) => term.validate(builder), + Term::FunctionTerm(term) => term.validate(builder), + Term::Map(term) => term.validate(builder), + Term::Operation(term) => term.validate(builder), + Term::Tuple(term) => term.validate(builder), + } } } diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 9fee6f819..fa9ceb3a6 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -1,4 +1,4 @@ -//! This module defines [Aggregate] +//! This module defines [Aggregate]. #![allow(missing_docs)] use std::{fmt::Display, hash::Hash}; @@ -9,29 +9,38 @@ use strum_macros::EnumIter; use crate::{ rule_model::{ components::{IterableVariables, ProgramComponent}, - error::ValidationErrorBuilder, + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, }, syntax::builtin::aggregate, }; -use super::{primitive::variable::Variable, Term}; +use super::{primitive::variable::Variable, value_type::ValueType, Term}; /// Aggregate operation on logical values #[derive(Assoc, EnumIter, Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[func(pub fn name(&self) -> &'static str)] +#[func(pub fn value_type(&self) -> ValueType)] +#[func(pub fn input_type(&self) -> Option)] pub enum AggregateKind { /// Count of distinct values #[assoc(name = aggregate::COUNT)] + #[assoc(value_type = ValueType::Number)] CountValues, /// Minimum numerical value #[assoc(name = aggregate::MIN)] + #[assoc(value_type = ValueType::Number)] + #[assoc(input_type = ValueType::Number)] MinNumber, /// Maximum numerical value #[assoc(name = aggregate::MAX)] + #[assoc(value_type = ValueType::Number)] + #[assoc(input_type = ValueType::Number)] MaxNumber, /// Sum of numerical values #[assoc(name = aggregate::SUM)] + #[assoc(value_type = ValueType::Number)] + #[assoc(input_type = ValueType::Number)] SumOfNumbers, } @@ -105,6 +114,11 @@ impl Aggregate { Self::new(AggregateKind::MaxNumber, aggregate, distinct) } + /// Return the value type of this term. + pub fn value_type(&self) -> ValueType { + self.kind.value_type() + } + /// Return a reference to aggregated term. pub fn aggregate_term(&self) -> &Term { &self.aggregate @@ -187,11 +201,26 @@ impl ProgramComponent for Aggregate { self } - fn validate(&self, _builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { - todo!() + let input_type = self.aggregate.value_type(); + if let Some(expected_type) = self.kind.input_type() { + if input_type != expected_type { + builder.report_error( + self.aggregate.origin().clone(), + ValidationErrorKind::AggregateInvalidValueType { + found: input_type.name().to_string(), + expected: expected_type.name().to_string(), + }, + ); + + return Err(()); + } + } + + Ok(()) } } diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index bbf36ead3..aca2bc756 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -1,14 +1,14 @@ -//! This module defines [FunctionTerm] +//! This module defines [FunctionTerm]. use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::{IterableVariables, ProgramComponent, Tag}, - error::{ValidationError, ValidationErrorBuilder}, + components::{tag::Tag, IterableVariables, ProgramComponent}, + error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, origin::Origin, }; -use super::{primitive::variable::Variable, Term}; +use super::{primitive::variable::Variable, value_type::ValueType, Term}; /// Function term /// @@ -29,28 +29,37 @@ pub struct FunctionTerm { macro_rules! function { // Base case: no elements ($name:tt) => { - crate::rule_model::component::term::function::FunctionTerm::new($name, Vec::new()) + crate::rule_model::components::term::function::FunctionTerm::new( + crate::rule_model::components::tag::Tag::from($name), Vec::new() + ) }; // Recursive case: handle each term, separated by commas ($name:tt; $($tt:tt)*) => {{ let mut terms = Vec::new(); term_list!(terms; $($tt)*); - crate::rule_model::components::term::function::FunctionTerm::new($name,terms) + crate::rule_model::components::term::function::FunctionTerm::new( + crate::rule_model::components::tag::Tag::from($name), terms + ) }}; } impl FunctionTerm { /// Create a new [FunctionTerm]. - pub fn new>(name: &str, subterms: Terms) -> Self { + pub fn new>(tag: Tag, subterms: Terms) -> Self { Self { origin: Origin::Created, - tag: Tag::new(name.to_string()), + tag, terms: subterms.into_iter().collect(), } } - /// Return an iterator over the subterms of this function term. - pub fn subterms(&self) -> impl Iterator { + /// Return the value type of this term. + pub fn value_type(&self) -> ValueType { + ValueType::FunctionTerm + } + + /// Return an iterator over the arguments of this function term. + pub fn arguments(&self) -> impl Iterator { self.terms.iter() } @@ -129,13 +138,20 @@ impl ProgramComponent for FunctionTerm { Self: Sized, { if !self.tag.is_valid() { - todo!() + builder.report_error( + self.tag.origin().clone(), + ValidationErrorKind::InvalidTermTag(self.tag.to_string()), + ); } - for term in self.subterms() { + for term in self.arguments() { term.validate(builder)? } + if self.is_empty() { + builder.report_error(self.origin.clone(), ValidationErrorKind::FunctionTermEmpty); + } + Ok(()) } } diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index e06bfae59..ecf35547b 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -3,12 +3,12 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::{IterableVariables, ProgramComponent, Tag}, + components::{tag::Tag, IterableVariables, ProgramComponent}, error::ValidationErrorBuilder, origin::Origin, }; -use super::{primitive::variable::Variable, Term}; +use super::{primitive::variable::Variable, value_type::ValueType, Term}; /// Map /// @@ -45,6 +45,11 @@ impl Map { } } + /// Return the value type of this term. + pub fn value_type(&self) -> ValueType { + ValueType::Map + } + /// Return the tag of this map. pub fn tag(&self) -> Option { self.tag.clone() @@ -70,7 +75,9 @@ impl Display for Map { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!( "{}{{", - self.tag.as_ref().map_or("", |tag| &tag.0) + self.tag + .as_ref() + .map_or(String::default(), |tag| tag.to_string()) ))?; for (term_index, (key, value)) in self.map.iter().enumerate() { @@ -113,7 +120,7 @@ impl ProgramComponent for Map { } fn origin(&self) -> &Origin { - todo!() + &self.origin } fn set_origin(mut self, origin: Origin) -> Self @@ -128,7 +135,12 @@ impl ProgramComponent for Map { where Self: Sized, { - todo!() + for (key, value) in self.key_value() { + key.validate(builder)?; + value.validate(builder)?; + } + + Ok(()) } } diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index eb5c17d7c..f07a9f935 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -8,12 +8,13 @@ use operation_kind::OperationKind; use crate::rule_model::{ components::{IterableVariables, ProgramComponent}, - error::ValidationErrorBuilder, + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, }; use super::{ primitive::{variable::Variable, Primitive}, + value_type::ValueType, Term, }; @@ -42,6 +43,21 @@ impl Operation { } } + /// Return an iterator over the arguments of this operation. + pub fn arguments(&self) -> impl Iterator { + self.subterms.iter() + } + + /// Return the [OperationKind] of this operation. + pub fn kind(&self) -> OperationKind { + self.kind + } + + /// Return the value type of this term. + pub fn value_type(&self) -> ValueType { + self.kind.return_type() + } + /// Check whether this operation has the form of an assignment of a variable to a term. /// If so return the variable and the term as a pair or `None` otherwise. /// @@ -199,7 +215,23 @@ impl ProgramComponent for Operation { where Self: Sized, { - todo!() + if !self.kind.num_arguments().validate(self.subterms.len()) { + builder.report_error( + self.origin.clone(), + ValidationErrorKind::OperationArgumentNumber { + used: self.subterms.len(), + expected: self.kind.num_arguments().to_string(), + }, + ); + + return Err(()); + } + + for argument in self.arguments() { + argument.validate(builder)?; + } + + Ok(()) } } diff --git a/nemo/src/rule_model/components/term/operation/operation_kind.rs b/nemo/src/rule_model/components/term/operation/operation_kind.rs index a77206da5..1a23e97de 100644 --- a/nemo/src/rule_model/components/term/operation/operation_kind.rs +++ b/nemo/src/rule_model/components/term/operation/operation_kind.rs @@ -6,7 +6,7 @@ use std::fmt::Display; use enum_assoc::Assoc; use strum_macros::EnumIter; -use crate::syntax::builtin::function; +use crate::{rule_model::components::term::value_type::ValueType, syntax::builtin::function}; /// Number of arguments supported by an operation #[derive(Debug)] @@ -16,11 +16,11 @@ pub(crate) enum OperationNumArguments { /// Operation requires two arguments Binary, /// Operation requires three arguments - Ternary, + _Ternary, /// Operation supports arbitrary many arguments (including zero) Arbitrary, /// Operation supports arguments that satisfy one of the given requirements - Choice(Vec), + Choice(Vec), } impl OperationNumArguments { @@ -29,11 +29,38 @@ impl OperationNumArguments { match self { OperationNumArguments::Unary => num_arguments == 1, OperationNumArguments::Binary => num_arguments == 2, - OperationNumArguments::Ternary => num_arguments == 3, + OperationNumArguments::_Ternary => num_arguments == 3, OperationNumArguments::Arbitrary => true, - OperationNumArguments::Choice(choice) => { - choice.iter().any(|num| num.validate(num_arguments)) - } + OperationNumArguments::Choice(choice) => choice.iter().any(|&num| num == num_arguments), + } + } +} + +impl Display for OperationNumArguments { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OperationNumArguments::Unary => write!(f, "1"), + OperationNumArguments::Binary => write!(f, "2"), + OperationNumArguments::_Ternary => write!(f, "3"), + OperationNumArguments::Arbitrary => write!(f, ""), + OperationNumArguments::Choice(choice) => match choice.len() { + 0 => write!(f, "0"), + 1 => write!(f, "{}", choice[0]), + 2 => write!(f, "{} or {}", choice[0], choice[1]), + _ => { + for (index, value) in choice.iter().enumerate() { + write!(f, "{}", value)?; + + if index < choice.len() - 2 { + write!(f, ", ")?; + } else if index == choice.len() - 2 { + write!(f, ", or ")?; + } + } + + Ok(()) + } + }, } } } @@ -42,292 +69,292 @@ impl OperationNumArguments { #[derive(Assoc, EnumIter, Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd)] #[func(pub fn name(&self) -> &'static str)] #[func(pub fn num_arguments(&self) -> OperationNumArguments)] -#[func(pub fn is_boolean(&self) -> bool)] +#[func(pub fn return_type(&self) -> ValueType)] pub enum OperationKind { /// Equality #[assoc(name = function::EQUAL)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] Equal, /// Inequality #[assoc(name = function::UNEQUAL)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] Unequals, /// Sum of numeric values #[assoc(name = function::SUM)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericSum, /// Subtraction between two numeric values #[assoc(name = function::SUBTRACTION)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericSubtraction, /// Product of numeric values #[assoc(name = function::PRODUCT)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericProduct, /// Division between two numeric values #[assoc(name = function::DIVISION)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericDivision, /// Logarithm of a numeric value to some numeric base #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(name = function::LOGARITHM)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericLogarithm, /// Numeric value raised to another numeric value #[assoc(name = function::POW)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericPower, /// Remainder of a division between two numeric values #[assoc(name = function::REM)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericRemainder, /// Numeric greater than or equals comparison #[assoc(name = function::GREATEREQ)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Boolean)] NumericGreaterthaneq, /// Numeric greater than comparison #[assoc(name = function::GREATER)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Boolean)] NumericGreaterthan, /// Numeric less than or equals comparison #[assoc(name = function::LESSEQ)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Boolean)] NumericLessthaneq, /// Numeric less than comparison #[assoc(name = function::LESS)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Boolean)] NumericLessthan, /// Lexicographic comparison between strings #[assoc(name = function::COMPARE)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] StringCompare, /// Check whether string is contained in another, correspondng to SPARQL function CONTAINS. #[assoc(name = function::CONTAINS)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] StringContains, /// String starting at some start position #[assoc(name = function::SUBSTR)] - #[assoc(num_arguments = OperationNumArguments::Choice(vec![OperationNumArguments::Binary, OperationNumArguments::Ternary]))] - #[assoc(is_boolean = false)] + #[assoc(num_arguments = OperationNumArguments::Choice(vec![2, 3]))] + #[assoc(return_type = ValueType::String)] StringSubstring, /// First part of a string split by some other string #[assoc(name = function::STRBEFORE)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] StringBefore, /// Second part of a string split by some other string #[assoc(name = function::STRAFTER)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] StringAfter, /// Whether string starts with a certain string #[assoc(name = function::STRSTARTS)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] StringStarts, /// Whether string ends with a certain string #[assoc(name = function::STRENDS)] #[assoc(num_arguments = OperationNumArguments::Binary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] StringEnds, /// Boolean negation #[assoc(name = function::NOT)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] BooleanNegation, /// Cast to double #[assoc(name = function::DOUBLE)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] CastToDouble, /// Cast to float #[assoc(name = function::FLOAT)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] CastToFloat, /// Cast to integer #[assoc(name = function::INT)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] CastToInteger, /// Canonical string representation of a value #[assoc(name = function::FULLSTR)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] CanonicalString, /// Check if value is an integer #[assoc(name = function::IS_INTEGER)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] CheckIsInteger, /// Check if value is a float #[assoc(name = function::IS_FLOAT)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] CheckIsFloat, /// Check if value is a double #[assoc(name = function::IS_DOUBLE)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] CheckIsDouble, /// Check if value is an iri #[assoc(name = function::IS_IRI)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] CheckIsIri, /// Check if value is numeric #[assoc(name = function::IS_NUMERIC)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] CheckIsNumeric, /// Check if value is a null #[assoc(name = function::IS_NULL)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] CheckIsNull, /// Check if value is a string #[assoc(name = function::IS_STRING)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] CheckIsString, /// Get datatype of a value #[assoc(name = function::DATATYPE)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Constant)] Datatype, /// Get language tag of a languaged tagged string #[assoc(name = function::LANG)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] LanguageTag, /// Absolute value of a numeric value #[assoc(name = function::ABS)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericAbsolute, /// Cosine of a numeric valueloga #[assoc(name = function::COS)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericCosine, /// Rounding up of a numeric value #[assoc(name = function::CEIL)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericCeil, /// Rounding down of a numeric value #[assoc(name = function::FLOOR)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericFloor, /// Additive inverse of a numeric value #[assoc(name = function::INVERSE)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericNegation, /// Rounding of a numeric value #[assoc(name = function::ROUND)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericRound, /// Sine of a numeric value #[assoc(name = function::SIN)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericSine, /// Square root of a numeric value #[assoc(name = function::SQRT)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericSquareroot, /// Tangent of a numeric value #[assoc(name = function::TAN)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericTangent, /// Length of a string value #[assoc(name = function::STRLEN)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] StringLength, /// Reverse of a string value #[assoc(name = function::STRREV)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] StringReverse, /// String converted to lowercase letters #[assoc(name = function::LCASE)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] StringLowercase, /// String converted to uppercase letters #[assoc(name = function::UCASE)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] StringUppercase, /// Bitwise and operation #[assoc(name = function::BITAND)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] BitAnd, /// Bitwise or operation #[assoc(name = function::BITOR)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] BitOr, /// Bitwise xor operation #[assoc(name = function::BITXOR)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] BitXor, /// Conjunction of boolean values #[assoc(name = function::AND)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] BooleanConjunction, /// Disjunction of boolean values #[assoc(name = function::OR)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] - #[assoc(is_boolean = true)] + #[assoc(return_type = ValueType::Boolean)] BooleanDisjunction, /// Minimum of numeric values #[assoc(name = function::MIN)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericMinimum, /// Maximum of numeric values #[assoc(name = function::MAX)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericMaximum, /// Lukasiewicz norm of numeric values #[assoc(name = function::LUKA)] #[assoc(num_arguments = OperationNumArguments::Arbitrary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::Number)] NumericLukasiewicz, /// Concatentation of two string values, correspondng to SPARQL function CONCAT. #[assoc(name = function::CONCAT)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] StringConcatenation, /// Lexical value #[assoc(name = function::STR)] #[assoc(num_arguments = OperationNumArguments::Unary)] - #[assoc(is_boolean = false)] + #[assoc(return_type = ValueType::String)] LexicalValue, } diff --git a/nemo/src/rule_model/components/term/primitive.rs b/nemo/src/rule_model/components/term/primitive.rs index d24a11bdc..83c02cfa8 100644 --- a/nemo/src/rule_model/components/term/primitive.rs +++ b/nemo/src/rule_model/components/term/primitive.rs @@ -15,6 +15,8 @@ use crate::rule_model::{ origin::Origin, }; +use super::value_type::ValueType; + /// Primitive term /// /// Represents a basic, indivisble values, which can either be [GroundTerm]s or [Variable]s. @@ -32,6 +34,14 @@ impl Primitive { pub fn is_ground(&self) -> bool { matches!(self, Self::Ground(_)) } + + /// Return the value type of this term. + pub fn value_type(&self) -> ValueType { + match self { + Primitive::Variable(_) => ValueType::Any, + Primitive::Ground(term) => term.value_type(), + } + } } impl From for Primitive { diff --git a/nemo/src/rule_model/components/term/primitive/ground.rs b/nemo/src/rule_model/components/term/primitive/ground.rs index a8a68415e..e5e75ba06 100644 --- a/nemo/src/rule_model/components/term/primitive/ground.rs +++ b/nemo/src/rule_model/components/term/primitive/ground.rs @@ -2,10 +2,10 @@ use std::{fmt::Display, hash::Hash}; -use nemo_physical::datavalues::{AnyDataValue, IriDataValue}; +use nemo_physical::datavalues::{AnyDataValue, DataValue, IriDataValue, ValueDomain}; use crate::rule_model::{ - components::ProgramComponent, + components::{term::value_type::ValueType, ProgramComponent}, error::{ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -30,6 +30,28 @@ impl GroundTerm { value, } } + + /// Return the value type of this term. + pub fn value_type(&self) -> ValueType { + match self.value.value_domain() { + ValueDomain::Float + | ValueDomain::Double + | ValueDomain::UnsignedLong + | ValueDomain::NonNegativeLong + | ValueDomain::UnsignedInt + | ValueDomain::NonNegativeInt + | ValueDomain::Long + | ValueDomain::Int => ValueType::Number, + ValueDomain::PlainString => ValueType::String, + ValueDomain::LanguageTaggedString => ValueType::LanguageString, + ValueDomain::Iri => ValueType::Constant, + ValueDomain::Tuple => ValueType::Tuple, + ValueDomain::Map => ValueType::Map, + ValueDomain::Boolean => ValueType::Boolean, + ValueDomain::Null => ValueType::Null, + ValueDomain::Other => ValueType::Other, + } + } } impl From for GroundTerm { @@ -118,7 +140,7 @@ impl ProgramComponent for GroundTerm { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, _builder: &mut ValidationErrorBuilder) -> Result<(), ()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs index 3209ee546..4b29523ce 100644 --- a/nemo/src/rule_model/components/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -27,7 +27,7 @@ impl VariableName { /// Validate variable name. pub fn is_valid(&self) -> bool { - !self.0.is_empty() + !self.0.starts_with("__") } } @@ -72,6 +72,16 @@ impl Variable { Variable::Existential(variable) => Some(variable.name()), } } + + /// Return whether this is a universal variable. + pub fn is_universal(&self) -> bool { + matches!(self, Variable::Universal(_)) + } + + /// Return whether this is an existential variable. + pub fn is_existential(&self) -> bool { + matches!(self, Variable::Existential(_)) + } } impl From for Variable { @@ -125,14 +135,8 @@ impl ProgramComponent for Variable { Self: Sized, { match &self { - Variable::Universal(universal) => { - universal.validate(builder)?; - } - Variable::Existential(existential) => { - existential.validate(builder)?; - } + Variable::Universal(universal) => universal.validate(builder), + Variable::Existential(existential) => existential.validate(builder), } - - Ok(()) } } diff --git a/nemo/src/rule_model/components/term/primitive/variable/existential.rs b/nemo/src/rule_model/components/term/primitive/variable/existential.rs index f5cdac4d9..45f2420ce 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/existential.rs @@ -4,7 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ components::ProgramComponent, - error::{ValidationError, ValidationErrorBuilder}, + error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -85,6 +85,13 @@ impl ProgramComponent for ExistentialVariable { where Self: Sized, { - todo!() + if !self.name.is_valid() { + builder.report_error( + self.origin, + ValidationErrorKind::InvalidVariableName(self.name()), + ); + } + + Ok(()) } } diff --git a/nemo/src/rule_model/components/term/primitive/variable/universal.rs b/nemo/src/rule_model/components/term/primitive/variable/universal.rs index 97b309ad0..215156718 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/universal.rs @@ -4,7 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ components::ProgramComponent, - error::{ValidationError, ValidationErrorBuilder}, + error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -105,6 +105,15 @@ impl ProgramComponent for UniversalVariable { where Self: Sized, { - todo!() + if let Some(name) = &self.name { + if !name.is_valid() { + builder.report_error( + self.origin, + ValidationErrorKind::InvalidVariableName(name.0.clone()), + ); + } + } + + Ok(()) } } diff --git a/nemo/src/rule_model/components/term/tuple.rs b/nemo/src/rule_model/components/term/tuple.rs index 980074d48..941060970 100644 --- a/nemo/src/rule_model/components/term/tuple.rs +++ b/nemo/src/rule_model/components/term/tuple.rs @@ -8,7 +8,7 @@ use crate::rule_model::{ origin::Origin, }; -use super::{primitive::variable::Variable, Term}; +use super::{primitive::variable::Variable, value_type::ValueType, Term}; /// Tuple /// @@ -27,7 +27,7 @@ pub struct Tuple { macro_rules! tuple { // Base case: no elements () => { - crate::rule_model::component::term::tuple::Tuple::new(Vec::new()) + crate::rule_model::components::term::tuple::Tuple::new(Vec::new()) }; // Recursive case: handle each term, separated by commas ($($tt:tt)*) => {{ @@ -45,6 +45,16 @@ impl Tuple { terms: terms.into_iter().collect(), } } + + /// Return the value type of this term. + pub fn value_type(&self) -> ValueType { + ValueType::Tuple + } + + /// Return an iterator over the arguments of this tuple. + pub fn arguments(&self) -> impl Iterator { + self.terms.iter() + } } impl Display for Tuple { @@ -90,7 +100,7 @@ impl ProgramComponent for Tuple { } fn origin(&self) -> &Origin { - todo!() + &self.origin } fn set_origin(mut self, origin: Origin) -> Self @@ -105,7 +115,11 @@ impl ProgramComponent for Tuple { where Self: Sized, { - todo!() + for term in self.arguments() { + term.validate(builder)?; + } + + Ok(()) } } diff --git a/nemo/src/rule_model/components/term/value_type.rs b/nemo/src/rule_model/components/term/value_type.rs new file mode 100644 index 000000000..979699cde --- /dev/null +++ b/nemo/src/rule_model/components/term/value_type.rs @@ -0,0 +1,43 @@ +//! This module defines +#![allow(missing_docs)] + +use enum_assoc::Assoc; + +/// Potential value types of terms +#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] +#[func(pub fn name(&self) -> &'static str)] +pub enum ValueType { + /// Boolean + #[assoc(name = "boolean")] + Boolean, + /// Number + #[assoc(name = "number")] + Number, + /// String + #[assoc(name = "string")] + String, + /// Language string + #[assoc(name = "lang-string")] + LanguageString, + /// Constant + #[assoc(name = "constant")] + Constant, + /// Null + #[assoc(name = "null")] + Null, + /// Map + #[assoc(name = "map")] + Map, + /// Tuple + #[assoc(name = "tuple")] + Tuple, + /// Function term + #[assoc(name = "function term")] + FunctionTerm, + /// Other + #[assoc(name = "other")] + Other, + /// Any + #[assoc(name = "any")] + Any, +} diff --git a/nemo/src/rule_model/error/hint.rs b/nemo/src/rule_model/error/hint.rs index c54fa1807..26dbc9969 100644 --- a/nemo/src/rule_model/error/hint.rs +++ b/nemo/src/rule_model/error/hint.rs @@ -11,7 +11,7 @@ use enum_assoc::Assoc; pub enum Hint { #[assoc(message = "unnamed universal variables may be expressed with an underscore `_`".to_string())] AnonymousVariables, - #[assoc(message = format!("similar {} exists: `{}`", _kind, _name))] + #[assoc(message = format!("a {} with a similar name exists: `{}`", _kind, _name))] SimilarExists { kind: String, name: String }, } diff --git a/nemo/src/rule_model/error/hint/similar.rs b/nemo/src/rule_model/error/hint/similar.rs index 53be7176f..9818a64d1 100644 --- a/nemo/src/rule_model/error/hint/similar.rs +++ b/nemo/src/rule_model/error/hint/similar.rs @@ -1,10 +1,34 @@ -//! This module defines a helper function for computin g +//! This module defines a helper function for obtaining a [Hint] +//! that points the user to a similar string exist in a collection of source strings. + +use similar_string::find_best_similarity; use super::Hint; +const SIMILARITY_MIN_LENGTH: usize = 3; +const SIMILARITY_THRESHOLD: f64 = 0.8; + impl Hint { - /// Checks whether a similar string exist in a collection of source strings - pub fn similar() -> Option { - todo!() + /// Checks whether a similar string exist in a collection of source strings. + /// Returns the most similar string, if there is one + pub fn similar( + kind: &str, + target: impl AsRef, + options: &[impl AsRef], + ) -> Option { + if target.as_ref().len() < SIMILARITY_MIN_LENGTH { + return None; + } + + let (best, confidence) = find_best_similarity(target, options)?; + + if best.len() >= SIMILARITY_MIN_LENGTH && confidence >= SIMILARITY_THRESHOLD { + return Some(Hint::SimilarExists { + kind: kind.to_string(), + name: best, + }); + } + + None } } diff --git a/nemo/src/rule_model/error/info.rs b/nemo/src/rule_model/error/info.rs index b6aea309b..b75fb1b2e 100644 --- a/nemo/src/rule_model/error/info.rs +++ b/nemo/src/rule_model/error/info.rs @@ -10,6 +10,9 @@ pub enum Info { /// First definition occurred somewhere #[assoc(message = format!("first definition occurred here"))] FirstDefinition, + /// First use occurred somewhere + #[assoc(message = format!("first use occurred here"))] + FirstUse, } impl std::fmt::Display for Info { diff --git a/nemo/src/rule_model/error/validation_error.rs b/nemo/src/rule_model/error/validation_error.rs index cc0f1679c..9c59b15e1 100644 --- a/nemo/src/rule_model/error/validation_error.rs +++ b/nemo/src/rule_model/error/validation_error.rs @@ -4,9 +4,7 @@ use enum_assoc::Assoc; use thiserror::Error; -use crate::rule_model::components::term::{ - aggregate::Aggregate, primitive::variable::Variable, Term, -}; +use crate::rule_model::components::term::primitive::variable::Variable; /// Types of errors that occur while building the logical rule model #[derive(Assoc, Error, Debug)] @@ -29,7 +27,10 @@ pub enum ValidationErrorKind { #[assoc(code = 203)] HeadAnonymous, /// Operation with unsafe variable - #[error(r#"unsafe variable used in computation: `{0}`"#)] + #[error(r#"unsafe variable used in operation: `{0}`"#)] + #[assoc( + note = "every universal variable used in an operation must occur at a safe position in the body" + )] #[assoc(code = 204)] OperationUnsafe(Variable), /// Unsafe variable used in multiple negative literals @@ -37,43 +38,63 @@ pub enum ValidationErrorKind { #[assoc(code = 205)] MultipleNegativeLiteralsUnsafe(Variable), /// Aggregate is used in body - #[error(r#"aggregate used in rule body: `{0}`"#)] + #[error(r#"aggregate used in rule body"#)] #[assoc(code = 206)] - BodyAggregate(Aggregate), + BodyAggregate, /// A variable is both universally and existentially quantified #[error(r#"variable is both universal and existential: `{0}`"#)] #[assoc(code = 207)] VariableMultipleQuantifiers(String), /// Fact contains non-ground term - #[error(r#"non-ground term used in fact: `{0}`"#)] + #[error(r#"non-ground term used in fact"#)] #[assoc(code = 208)] - FactNonGround(Term), + FactNonGround, /// Invalid variable name was used - #[assoc(code = 209)] #[error(r#"variable name is invalid: `{0}`"#)] + #[assoc(code = 209)] + #[assoc(note = "variable names may not start with double underscore")] InvalidVariableName(String), - /// Invalid tag was used + /// Invalid function term name was used + #[error(r#"function name is invalid: `{0}`"#)] #[assoc(code = 210)] - #[error(r#"tag is invalid: `{0}`"#)] + #[assoc(note = "function names may not start with double underscore")] InvalidTermTag(String), /// Invalid predicate name was used + #[error(r#"predicate name is invalid: `{0}`"#)] #[assoc(code = 211)] - #[error(r#"predicate name is invalid: `{0}"#)] + #[assoc(note = "predicate names may not start with double underscore")] InvalidPredicateName(String), + /// Invalid value type for aggregate + #[error(r#"used aggregate term of type `{found}`, expected `{expected}`"#)] + #[assoc(code = 212)] + AggregateInvalidValueType { found: String, expected: String }, + /// Empty function term + #[error(r#"function term without arguments"#)] + #[assoc(code = 213)] + FunctionTermEmpty, + /// Wrong number of arguments for function + #[error(r#"operation used with {used} arguments, expected {expected}"#)] + #[assoc(code = 214)] + OperationArgumentNumber { used: usize, expected: String }, + /// Anonymous variable used in operation + #[error(r#"anonymous variable used in operation"#)] + #[assoc(code = 215)] + OperationAnonymous, + /// Unsupported feature: Multiple aggregates in one rule #[error(r#"multiple aggregates in one rule is currently unsupported"#)] #[assoc(code = 999)] - AggregateMultiple, + UnsupportedAggregateMultiple, /// Unsupported feature: Aggregates combined with existential rules - #[error(r#"aggregates and existential variables in one rule is currently unsupported"#)] + #[error(r#"aggregates in existential rules in one rule is currently unsupported"#)] #[assoc(code = 998)] - AggregatesAndExistentials, + UnsupportedAggregatesAndExistentials, /// Atom used without any arguments #[assoc(code = 997)] #[error(r#"atoms without arguments are currently unsupported"#)] - AtomNoArguments, + UnsupportedAtomEmpty, /// Non-primitive terms are currently unsupported #[assoc(code = 996)] #[error(r#"complex terms are currently unsupported"#)] - ComplexTerm, + UnsupportedComplexTerm, } diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index 378ff162d..bc113ea91 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -15,7 +15,7 @@ use crate::{ }; use super::{ - components::fact::Fact, + components::{fact::Fact, ProgramComponent}, error::{ translation_error::TranslationErrorKind, ProgramError, TranslationError, ValidationErrorBuilder, @@ -72,6 +72,15 @@ impl<'a> ASTProgramTranslation<'a> { new_origin } + + /// Register a [ProgramComponent] + pub fn register_component( + &mut self, + component: Component, + node: &'a dyn ProgramAST<'a>, + ) -> Component { + component.set_origin(self.register_node(node)) + } } /// Report of all [ProgramError]s occurred diff --git a/nemo/src/rule_model/translation/basic/variable.rs b/nemo/src/rule_model/translation/basic/variable.rs index cbf84bce8..fe389b781 100644 --- a/nemo/src/rule_model/translation/basic/variable.rs +++ b/nemo/src/rule_model/translation/basic/variable.rs @@ -14,7 +14,7 @@ impl<'a> ASTProgramTranslation<'a> { &mut self, variable: &'a ast::expression::basic::variable::Variable, ) -> Result { - Ok(match variable.kind() { + let result = match variable.kind() { ast::expression::basic::variable::VariableType::Universal => { if let Some(variable_name) = variable.name() { Variable::universal(&variable_name).set_origin(self.register_node(variable)) @@ -46,6 +46,8 @@ impl<'a> ASTProgramTranslation<'a> { )); } } - }) + }; + + Ok(self.register_component(result, variable)) } } diff --git a/nemo/src/rule_model/translation/complex/aggregation.rs b/nemo/src/rule_model/translation/complex/aggregation.rs index e0d0f12a7..ae153b59d 100644 --- a/nemo/src/rule_model/translation/complex/aggregation.rs +++ b/nemo/src/rule_model/translation/complex/aggregation.rs @@ -1,11 +1,14 @@ //! This module contains a function to create an aggregation term //! from the corresponding ast node. -use crate::parser::ast::{self, ProgramAST}; - -use crate::rule_model::components::term::aggregate::Aggregate; -use crate::rule_model::error::translation_error::TranslationErrorKind; -use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; +use crate::{ + parser::ast::{self, ProgramAST}, + rule_model::{ + components::term::aggregate::Aggregate, + error::{translation_error::TranslationErrorKind, TranslationError}, + translation::ASTProgramTranslation, + }, +}; impl<'a> ASTProgramTranslation<'a> { /// Create an aggregation term from the corresponding AST node. @@ -37,6 +40,6 @@ impl<'a> ASTProgramTranslation<'a> { } } - Ok(Aggregate::new(kind, aggregate, distinct)) + Ok(self.register_component(Aggregate::new(kind, aggregate, distinct), aggregation)) } } diff --git a/nemo/src/rule_model/translation/complex/arithmetic.rs b/nemo/src/rule_model/translation/complex/arithmetic.rs index 46fd8b923..530e85314 100644 --- a/nemo/src/rule_model/translation/complex/arithmetic.rs +++ b/nemo/src/rule_model/translation/complex/arithmetic.rs @@ -1,11 +1,14 @@ //! This module contains a function to create a arithmetic term //! from the corresponding ast node. -use crate::parser::ast; - -use crate::rule_model::components::term::operation::operation_kind::OperationKind; -use crate::rule_model::components::term::operation::Operation; -use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; +use crate::{ + parser::ast::{self}, + rule_model::{ + components::term::operation::{operation_kind::OperationKind, Operation}, + error::TranslationError, + translation::ASTProgramTranslation, + }, +}; impl<'a> ASTProgramTranslation<'a> { /// Create a arithmetic term from the corresponding AST node. @@ -33,6 +36,6 @@ impl<'a> ASTProgramTranslation<'a> { self.build_inner_term(arithmetic.right())?, ]; - Ok(Operation::new(kind, subterms)) + Ok(self.register_component(Operation::new(kind, subterms), arithmetic)) } } diff --git a/nemo/src/rule_model/translation/complex/function.rs b/nemo/src/rule_model/translation/complex/function.rs index 11c73dce2..fbf97674a 100644 --- a/nemo/src/rule_model/translation/complex/function.rs +++ b/nemo/src/rule_model/translation/complex/function.rs @@ -1,10 +1,12 @@ //! This module contains a function to create a function term //! from the corresponding ast node. -use crate::parser::ast; - +use crate::rule_model::components::tag::Tag; use crate::rule_model::components::term::function::FunctionTerm; -use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; +use crate::{ + parser::ast::{self}, + rule_model::{error::TranslationError, translation::ASTProgramTranslation}, +}; impl<'a> ASTProgramTranslation<'a> { /// Create a function term from the corresponding AST node. @@ -12,12 +14,14 @@ impl<'a> ASTProgramTranslation<'a> { &mut self, function: &'a ast::expression::complex::atom::Atom, ) -> Result { - let name = self.resolve_tag(function.tag())?; + let tag = Tag::from(self.resolve_tag(function.tag())?) + .set_origin(self.register_node(function.tag())); + let mut subterms = Vec::new(); for expression in function.expressions() { subterms.push(self.build_inner_term(expression)?); } - Ok(FunctionTerm::new(&name, subterms)) + Ok(self.register_component(FunctionTerm::new(tag, subterms), function)) } } diff --git a/nemo/src/rule_model/translation/complex/infix.rs b/nemo/src/rule_model/translation/complex/infix.rs index 7584e06df..0d88e6f42 100644 --- a/nemo/src/rule_model/translation/complex/infix.rs +++ b/nemo/src/rule_model/translation/complex/infix.rs @@ -1,11 +1,14 @@ //! This module contains a function to create an operation //! from an infix ast node. -use crate::parser::ast; - -use crate::rule_model::components::term::operation::operation_kind::OperationKind; -use crate::rule_model::components::term::operation::Operation; -use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; +use crate::{ + parser::ast::{self}, + rule_model::{ + components::term::operation::{operation_kind::OperationKind, Operation}, + error::TranslationError, + translation::ASTProgramTranslation, + }, +}; impl<'a> ASTProgramTranslation<'a> { /// Create an [Operation] from an infix AST node. @@ -36,6 +39,6 @@ impl<'a> ASTProgramTranslation<'a> { let subterms = vec![self.build_inner_term(left)?, self.build_inner_term(right)?]; - Ok(Operation::new(kind, subterms)) + Ok(self.register_component(Operation::new(kind, subterms), infix)) } } diff --git a/nemo/src/rule_model/translation/complex/map.rs b/nemo/src/rule_model/translation/complex/map.rs index a1fa43624..52086439f 100644 --- a/nemo/src/rule_model/translation/complex/map.rs +++ b/nemo/src/rule_model/translation/complex/map.rs @@ -1,10 +1,12 @@ //! This module contains a function to create a map term //! from the corresponding ast node. -use crate::parser::ast; - -use crate::rule_model::components::term::map::Map; -use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; +use crate::{ + parser::ast, + rule_model::{ + components::term::map::Map, error::TranslationError, translation::ASTProgramTranslation, + }, +}; impl<'a> ASTProgramTranslation<'a> { /// Create a map term from the corresponding AST node. @@ -20,9 +22,11 @@ impl<'a> ASTProgramTranslation<'a> { subterms.push((key, value)); } - Ok(match map.tag() { + let result = match map.tag() { Some(tag) => Map::new(&self.resolve_tag(tag)?, subterms), None => Map::new_unnamed(subterms), - }) + }; + + Ok(self.register_component(result, map)) } } diff --git a/nemo/src/rule_model/translation/complex/operation.rs b/nemo/src/rule_model/translation/complex/operation.rs index 4f58abc1c..f95fc3f69 100644 --- a/nemo/src/rule_model/translation/complex/operation.rs +++ b/nemo/src/rule_model/translation/complex/operation.rs @@ -1,10 +1,13 @@ //! This module contains a function to create an operation term //! from the corresponding ast node. -use crate::parser::ast; - -use crate::rule_model::components::term::operation::Operation; -use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; +use crate::{ + parser::ast::{self}, + rule_model::{ + components::term::operation::Operation, error::TranslationError, + translation::ASTProgramTranslation, + }, +}; impl<'a> ASTProgramTranslation<'a> { /// Create an operation term from the corresponding AST node. @@ -18,6 +21,6 @@ impl<'a> ASTProgramTranslation<'a> { subterms.push(self.build_inner_term(expression)?); } - Ok(Operation::new(kind, subterms)) + Ok(self.register_component(Operation::new(kind, subterms), operation)) } } diff --git a/nemo/src/rule_model/translation/complex/tuple.rs b/nemo/src/rule_model/translation/complex/tuple.rs index 69b9050b1..7c2861b7e 100644 --- a/nemo/src/rule_model/translation/complex/tuple.rs +++ b/nemo/src/rule_model/translation/complex/tuple.rs @@ -1,10 +1,12 @@ //! This module contains a function to create a tuple term //! from the corresponding ast node. -use crate::parser::ast; - -use crate::rule_model::components::term::tuple::Tuple; -use crate::rule_model::{error::TranslationError, translation::ASTProgramTranslation}; +use crate::{ + parser::ast, + rule_model::{ + components::term::tuple::Tuple, error::TranslationError, translation::ASTProgramTranslation, + }, +}; impl<'a> ASTProgramTranslation<'a> { /// Create a tuple term from the corresponding AST node. @@ -17,6 +19,6 @@ impl<'a> ASTProgramTranslation<'a> { subterms.push(self.build_inner_term(expression)?); } - Ok(Tuple::new(subterms)) + Ok(self.register_component(Tuple::new(subterms), tuple)) } } diff --git a/nemo/src/rule_model/translation/directive/import_export.rs b/nemo/src/rule_model/translation/directive/import_export.rs index 21bfac8e9..b84dc924c 100644 --- a/nemo/src/rule_model/translation/directive/import_export.rs +++ b/nemo/src/rule_model/translation/directive/import_export.rs @@ -12,7 +12,8 @@ use crate::{ file_formats::{FileFormat, FILE_FORMATS_RDF}, ExportDirective, ImportDirective, }, - ProgramComponent, Tag, + tag::Tag, + ProgramComponent, }, error::{translation_error::TranslationErrorKind, TranslationError}, translation::ASTProgramTranslation, @@ -101,11 +102,15 @@ impl<'a> ASTProgramTranslation<'a> { &mut self, import: &'a ast::directive::import::Import, ) -> Result<(), TranslationError> { - let predicate = Tag::new(self.resolve_tag(import.predicate())?); + let predicate = Tag::new(self.resolve_tag(import.predicate())?) + .set_origin(self.register_node(import.predicate())); let attributes = self.build_map(import.instructions())?; let file_format = self.import_export_format(import.instructions())?; - let import_directive = ImportDirective::new(predicate, file_format, attributes); + let import_directive = self.register_component( + ImportDirective::new(predicate, file_format, attributes), + import, + ); let _ = import_directive.validate(&mut self.validation_error_builder); self.program_builder.add_import(import_directive); @@ -118,11 +123,15 @@ impl<'a> ASTProgramTranslation<'a> { &mut self, export: &'a ast::directive::export::Export, ) -> Result<(), TranslationError> { - let predicate = Tag::new(self.resolve_tag(export.predicate())?); + let predicate = Tag::new(self.resolve_tag(export.predicate())?) + .set_origin(self.register_node(export.predicate())); let attributes = self.build_map(export.instructions())?; let file_format = self.import_export_format(export.instructions())?; - let export_directive = ExportDirective::new(predicate, file_format, attributes); + let export_directive = self.register_component( + ExportDirective::new(predicate, file_format, attributes), + export, + ); let _ = export_directive.validate(&mut self.validation_error_builder); self.program_builder.add_export(export_directive); diff --git a/nemo/src/rule_model/translation/directive/output.rs b/nemo/src/rule_model/translation/directive/output.rs index 9c6b4f035..85c26f372 100644 --- a/nemo/src/rule_model/translation/directive/output.rs +++ b/nemo/src/rule_model/translation/directive/output.rs @@ -1,9 +1,9 @@ //! This module contains a function for handling output statements. use crate::{ - parser::ast::{self}, + parser::ast, rule_model::{ - components::{output::Output, Tag}, + components::{output::Output, tag::Tag}, error::TranslationError, translation::ASTProgramTranslation, }, @@ -16,7 +16,8 @@ impl<'a> ASTProgramTranslation<'a> { output: &'a ast::directive::output::Output, ) -> Result<(), TranslationError> { for predicate in output.predicates() { - let tag = Tag::new(self.resolve_tag(predicate)?); + let tag = + Tag::new(self.resolve_tag(predicate)?).set_origin(self.register_node(predicate)); self.program_builder.add_output(Output::new(tag)); } diff --git a/nemo/src/rule_model/translation/rule.rs b/nemo/src/rule_model/translation/rule.rs index 7c7d6c973..40669531e 100644 --- a/nemo/src/rule_model/translation/rule.rs +++ b/nemo/src/rule_model/translation/rule.rs @@ -7,6 +7,7 @@ use crate::{ atom::Atom, literal::Literal, rule::{Rule, RuleBuilder}, + tag::Tag, term::Term, ProgramComponent, }, @@ -45,15 +46,15 @@ impl<'a> ASTProgramTranslation<'a> { ) -> Result { let result = match body { ast::expression::Expression::Atom(atom) => { + let predicate = Tag::from(self.resolve_tag(atom.tag())?) + .set_origin(self.register_node(atom.tag())); + let mut subterms = Vec::new(); for expression in atom.expressions() { subterms.push(self.build_inner_term(expression)?); } - Literal::Positive( - Atom::new(&self.resolve_tag(atom.tag())?, subterms) - .set_origin(self.register_node(atom)), - ) + Literal::Positive(self.register_component(Atom::new(predicate, subterms), atom)) } ast::expression::Expression::Negation(negated) => { let atom = if let ast::expression::Expression::Atom(atom) = negated.expression() { @@ -67,24 +68,20 @@ impl<'a> ASTProgramTranslation<'a> { )); }; + let predicate = Tag::from(self.resolve_tag(atom.tag())?) + .set_origin(self.register_node(atom.tag())); let mut subterms = Vec::new(); for expression in atom.expressions() { subterms.push(self.build_inner_term(expression)?); } - Literal::Negative( - Atom::new(&self.resolve_tag(atom.tag())?, subterms) - .set_origin(self.register_node(atom)), - ) + Literal::Negative(self.register_component(Atom::new(predicate, subterms), atom)) + } + ast::expression::Expression::Infix(infix) => { + Literal::Operation(self.build_infix(infix)?) } - ast::expression::Expression::Infix(infix) => Literal::Operation( - self.build_infix(infix)? - .set_origin(self.register_node(infix)), - ), ast::expression::Expression::Operation(operation) => { - let result = self.build_operation(operation)?; - - Literal::Operation(result.set_origin(self.register_node(operation))) + Literal::Operation(self.build_operation(operation)?) } _ => { return Err(TranslationError::new( @@ -104,12 +101,14 @@ impl<'a> ASTProgramTranslation<'a> { head: &'a ast::expression::Expression<'a>, ) -> Result { let result = if let ast::expression::Expression::Atom(atom) = head { + let predicate = + Tag::from(self.resolve_tag(atom.tag())?).set_origin(self.register_node(atom.tag())); let mut subterms = Vec::new(); for expression in atom.expressions() { subterms.push(self.build_inner_term(expression)?); } - Atom::new(&self.resolve_tag(atom.tag())?, subterms).set_origin(self.register_node(atom)) + self.register_component(Atom::new(predicate, subterms), atom) } else { return Err(TranslationError::new( head.span(), @@ -125,7 +124,7 @@ impl<'a> ASTProgramTranslation<'a> { &mut self, expression: &'a ast::expression::Expression, ) -> Result { - match expression { + Ok(match expression { ast::expression::Expression::Arithmetic(arithmetic) => { self.build_arithmetic(arithmetic).map(Term::from) } @@ -167,6 +166,7 @@ impl<'a> ASTProgramTranslation<'a> { infix.span(), TranslationErrorKind::InnerExpressionInfix, )), - } + }? + .set_origin(self.register_node(expression))) } } From d9c334e503e0b0f316d9a275a4279caed18c261c Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 14 Aug 2024 10:18:06 +0200 Subject: [PATCH 146/214] Include erroneous program in err return variant when an AST has errors --- nemo-cli/src/main.rs | 3 ++- nemo/src/parser.rs | 19 ++++++++------ nemo/src/parser/ast.rs | 1 + nemo/src/parser/ast/program.rs | 2 +- nemo/src/parser/ast/statement.rs | 42 +++++++++--------------------- nemo/src/parser/error.rs | 21 +++++++++++---- nemo/src/rule_model/translation.rs | 3 +++ 7 files changed, 46 insertions(+), 45 deletions(-) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index fb462f143..8ca9ad393 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -173,7 +173,8 @@ fn run(mut cli: CliApp) -> Result<(), Error> { .parse() { Ok(program) => program, - Err(report) => { + Err((program, report)) => { + println!("{program}"); report.eprint(report.build_reports())?; std::process::exit(1); } diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index 709699559..b937c1a55 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -99,7 +99,7 @@ impl<'a> Parser<'a> { } /// Parse the input - pub fn parse(self) -> Result, ParserErrorReport<'a>> { + pub fn parse(self) -> Result, (Program<'a>, ParserErrorReport<'a>)> { let parser_input = ParserInput::new(&self.input, self.state.clone()); let (_, program) = Program::parse(parser_input).expect("parsing should always succeed"); @@ -107,13 +107,16 @@ impl<'a> Parser<'a> { if self.state.errors.borrow().is_empty() { Ok(program) } else { - Err(ParserErrorReport { - input: self.input, - label: self.label, - errors: Rc::try_unwrap(self.state.errors) - .expect("there should only be one owner now") - .into_inner(), - }) + Err(( + program, + ParserErrorReport { + input: self.input, + label: self.label, + errors: Rc::try_unwrap(self.state.errors) + .expect("there should only be one owner now") + .into_inner(), + }, + )) } // let error_tree = match transform_error_tree(Program::parse)(parser_input) { diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 9685a68a6..874411847 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -15,6 +15,7 @@ use std::fmt::Debug; use super::{context::ParserContext, span::Span, ParserInput, ParserResult}; use ascii_tree::Tree; +use token::Token; /// Trait implemented by nodes in the abstract syntax tree pub trait ProgramAST<'a>: Debug + Sync { diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 6ba9235e0..38f7728b7 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -108,7 +108,7 @@ impl<'a> ProgramAST<'a> for Program<'a> { Self { span: input_span.until_rest(&rest_span), comment, - statements: statements.into_iter().flatten().collect::>(), + statements, }, ) }) diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs index 5e93d42e7..ffe1820a8 100644 --- a/nemo/src/parser/ast/statement.rs +++ b/nemo/src/parser/ast/statement.rs @@ -32,6 +32,8 @@ pub enum StatementKind<'a> { Rule(Rule<'a>), /// Directive Directive(Directive<'a>), + /// This represents a statement, that has an error that could not get recovered in a child node. + Error(Token<'a>), } impl<'a> StatementKind<'a> { @@ -41,6 +43,7 @@ impl<'a> StatementKind<'a> { StatementKind::Fact(statement) => statement.context(), StatementKind::Rule(statement) => statement.context(), StatementKind::Directive(statement) => statement.context(), + StatementKind::Error(_statement) => todo!(), } } @@ -58,12 +61,12 @@ impl<'a> StatementKind<'a> { #[derive(Debug)] pub struct Statement<'a> { /// [Span] associated with this node - span: Span<'a>, + pub(crate) span: Span<'a>, /// Doc comment associated with this statement - comment: Option>, + pub(crate) comment: Option>, /// The statement - kind: StatementKind<'a>, + pub(crate) kind: StatementKind<'a>, } impl<'a> Statement<'a> { @@ -83,11 +86,12 @@ const CONTEXT: ParserContext = ParserContext::Statement; impl<'a> ProgramAST<'a> for Statement<'a> { fn children(&self) -> Vec<&dyn ProgramAST> { - vec![match &self.kind { - StatementKind::Fact(statement) => statement, - StatementKind::Rule(statement) => statement, - StatementKind::Directive(statement) => statement, - }] + match &self.kind { + StatementKind::Fact(statement) => vec![statement], + StatementKind::Rule(statement) => vec![statement], + StatementKind::Directive(statement) => vec![statement], + StatementKind::Error(_) => vec![], + } } fn span(&self) -> Span<'a> { @@ -162,25 +166,3 @@ mod test { } } } - -// TODO: Remove this when the debug error statement printing in the ast is no longer needed -impl<'a> ProgramAST<'a> for Option> { - fn children(&'a self) -> Vec<&'a dyn ProgramAST> { - vec![] - } - - fn span(&self) -> Span<'a> { - Span(LocatedSpan::new("ERROR!")) - } - - fn parse(_input: ParserInput<'a>) -> ParserResult<'a, Self> - where - Self: Sized + 'a, - { - todo!() - } - - fn context(&self) -> ParserContext { - ParserContext::Statement - } -} diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index 9f032d315..c9ed8460d 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -11,7 +11,10 @@ use nom::{ use nom_supreme::error::{GenericErrorTree, StackContext}; use super::{ - ast::{statement::Statement, token::Token}, + ast::{ + statement::{Statement, StatementKind}, + token::Token, + }, context::ParserContext, span::CharacterPosition, ParserInput, ParserResult, @@ -46,6 +49,7 @@ pub(crate) fn skip_statement<'a>(input: ParserInput<'a>) -> ParserResult<'a, Tok )), move |token| Token::error(input_span.enclose(&input_span, &token.span())), ); + // TODO: Should there additional whitespace be allowed in-between the dot and the newline? let until_dot_newline = map( alt(( preceded(take_until(".\n"), terminated(Token::dot, line_ending)), @@ -61,13 +65,20 @@ pub(crate) fn skip_statement<'a>(input: ParserInput<'a>) -> ParserResult<'a, Tok pub(crate) fn recover<'a>( mut parser: impl Parser, Statement<'a>, ParserErrorTree<'a>>, -) -> impl FnMut(ParserInput<'a>) -> ParserResult>> { +) -> impl FnMut(ParserInput<'a>) -> ParserResult> { move |input: ParserInput<'a>| match parser.parse(input.clone()) { - Ok((rest, statement)) => Ok((rest, Some(statement))), + Ok((rest, statement)) => Ok((rest, statement)), Err(err) if input.span.0.is_empty() => Err(err), Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { - let (rest_input, _span) = skip_statement(input).expect("this parser cannot fail"); - Ok((rest_input, None)) + let (rest_input, token) = skip_statement(input).expect("this parser cannot fail"); + Ok(( + rest_input, + Statement { + span: token.span(), + comment: None, + kind: StatementKind::Error(token), + }, + )) } Err(err) => Err(err), } diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index bc113ea91..a68992841 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -172,6 +172,9 @@ impl<'a> ASTProgramTranslation<'a> { self.errors.push(ProgramError::TranslationError(error)); } } + ast::statement::StatementKind::Error(_token) => { + todo!("Should faulty statements get ignored?") + } } } From 09d3ad24037cb39ea80b3c5d175fc7ee609ed2c4 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 23 Aug 2024 09:36:42 +0200 Subject: [PATCH 147/214] Fix wasm bindings --- nemo-wasm/src/lib.rs | 4 +- nemo/src/io/parser.rs | 4661 +++++++++++++++++----------------- nemo/src/io/parser/iri.rs | 2 +- nemo/src/io/parser/sparql.rs | 7 +- nemo/src/io/parser/turtle.rs | 6 +- 5 files changed, 2374 insertions(+), 2306 deletions(-) diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index b5d7e8301..b4e1b6919 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -13,8 +13,8 @@ use nemo::execution::tracing::trace::ExecutionTraceTree; use nemo::execution::ExecutionEngine; use nemo::io::compression_format::CompressionFormat; -use nemo::io::parser::parse_fact; -use nemo::io::parser::parse_program; +use nemo::io::parser::old::parse_fact; +use nemo::io::parser::old::parse_program; use nemo::io::resource_providers::{ResourceProvider, ResourceProviders}; use nemo::io::ImportManager; use nemo::model::Atom; diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs index a3e5d61e4..4ae877b50 100644 --- a/nemo/src/io/parser.rs +++ b/nemo/src/io/parser.rs @@ -1,27 +1,5 @@ //! A parser for rulewerk-style rules. -// use std::{cell::RefCell, collections::HashMap, fmt::Debug}; - -// use crate::{ -// error::Error, -// io::parser::types::{ArithmeticOperator, BodyExpression}, -// model::*, -// }; -// use nemo_physical::datavalues::{ -// AnyDataValue, DataValueCreationError, MapDataValue, TupleDataValue, -// }; -// use nom::{ -// branch::alt, -// bytes::complete::{is_not, tag}, -// character::complete::{alpha1, digit1, multispace1, satisfy}, -// combinator::{all_consuming, cut, map, map_res, opt, recognize, value}, -// multi::{many0, many1, separated_list0, separated_list1}, -// sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, -// Err, -// }; - -// use macros::traced; - pub mod ast; pub(crate) mod types; @@ -35,2409 +13,2498 @@ use ast::term::{Exponent, Primitive, Term}; use ast::tuple::Tuple; use ast::{List, Position, Wsoc}; use types::Input; -// use types::{ConstraintOperator, IntermediateResult, Span}; -// pub(crate) mod iri; -// pub(crate) mod rfc5234; -// pub(crate) mod sparql; -// pub(crate) mod turtle; -// pub use types::{span_from_str, LocatedParseError, ParseError, ParseResult}; pub use types::LocatedParseError; -// /// Parse a program in the given `input`-String and return a [Program]. -// /// -// /// The program will be parsed and checked for unsupported features. -// /// -// /// # Error -// /// Returns an appropriate [Error] variant on parsing and feature check issues. -// pub fn parse_program(input: impl AsRef) -> Result { -// let program = all_input_consumed(RuleParser::new().parse_program())(input.as_ref())?; -// Ok(program) -// } +pub(crate) mod iri; +pub(crate) mod rfc5234; +pub(crate) mod sparql; +pub(crate) mod turtle; -// /// Parse a single fact in the given `input`-String and return a [Program]. -// /// -// /// The program will be parsed and checked for unsupported features. -// /// -// /// # Error -// /// Returns an appropriate [Error] variant on parsing and feature check issues. -// pub fn parse_fact(mut input: String) -> Result { -// input += "."; -// let fact = all_input_consumed(RuleParser::new().parse_fact())(input.as_str())?; -// Ok(fact) -// } +pub mod old { + //! FIXME: remove when new parser is in use and wasm binds can get updated + use super::{iri, rfc5234, sparql, turtle}; -// /// A combinator to add tracing to the parser. -// /// [fun] is an identifier for the parser and [parser] is the actual parser. -// #[inline(always)] -// fn traced<'a, T, P>( -// fun: &'static str, -// mut parser: P, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> -// where -// T: Debug, -// P: FnMut(Span<'a>) -> IntermediateResult<'a, T>, -// { -// move |input| { -// log::trace!(target: "parser", "{fun}({input:?})"); -// let result = parser(input); -// log::trace!(target: "parser", "{fun}({input:?}) -> {result:?}"); -// result -// } -// } + use std::{cell::RefCell, collections::HashMap, fmt::Debug}; -// /// A combinator that makes sure all input has been consumed. -// pub fn all_input_consumed<'a, T: 'a>( -// parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, -// ) -> impl FnMut(&'a str) -> Result + 'a { -// let mut p = all_consuming(parser); -// move |input| { -// let input = Span::new(input); -// p(input).map(|(_, result)| result).map_err(|e| match e { -// Err::Incomplete(e) => ParseError::MissingInput(match e { -// nom::Needed::Unknown => "expected an unknown amount of further input".to_string(), -// nom::Needed::Size(size) => format!("expected at least {size} more bytes"), -// }) -// .at(input), -// Err::Error(e) | Err::Failure(e) => e, -// }) -// } -// } + use crate::{ + error::Error, + io::parser::types::{ArithmeticOperator, BodyExpression}, + model::*, + }; + use nemo_physical::datavalues::{ + AnyDataValue, DataValueCreationError, MapDataValue, TupleDataValue, + }; + use nom::{ + branch::alt, + bytes::complete::{is_not, tag}, + character::complete::{alpha1, digit1, multispace1, satisfy}, + combinator::{all_consuming, cut, map, map_res, opt, recognize, value}, + multi::{many0, many1, separated_list0, separated_list1}, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, + Err, + }; -// /// A combinator that recognises a comment, starting at a `%` -// /// character and ending at the end of the line. -// pub fn comment(input: Span) -> IntermediateResult<()> { -// alt(( -// value((), pair(tag("%"), is_not("\n\r"))), -// // a comment that immediately precedes the end of the line – -// // this must come after the normal line comment above -// value((), tag("%")), -// ))(input) -// } + use macros::traced; -// /// A combinator that recognises an arbitrary amount of whitespace and -// /// comments. -// pub fn multispace_or_comment0(input: Span) -> IntermediateResult<()> { -// value((), many0(alt((value((), multispace1), comment))))(input) -// } + pub use super::types::{span_from_str, LocatedParseError, ParseError, ParseResult}; + use super::types::{ConstraintOperator, IntermediateResult, Span}; -// /// A combinator that recognises any non-empty amount of whitespace -// /// and comments. -// pub fn multispace_or_comment1(input: Span) -> IntermediateResult<()> { -// value((), many1(alt((value((), multispace1), comment))))(input) -// } + /// Parse a program in the given `input`-String and return a [Program]. + /// + /// The program will be parsed and checked for unsupported features. + /// + /// # Error + /// Returns an appropriate [Error] variant on parsing and feature check issues. + pub fn parse_program(input: impl AsRef) -> Result { + let program = all_input_consumed(RuleParser::new().parse_program())(input.as_ref())?; + Ok(program) + } -// /// A combinator that modifies the associated error. -// pub fn map_error<'a, T: 'a>( -// mut parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, -// mut error: impl FnMut() -> ParseError + 'a, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a { -// move |input| { -// parser(input).map_err(|e| match e { -// Err::Incomplete(_) => e, -// Err::Error(context) => { -// let mut err = error().at(input); -// err.append(context); -// Err::Error(err) -// } -// Err::Failure(context) => { -// let mut err = error().at(input); -// err.append(context); -// Err::Failure(err) -// } -// }) -// } -// } + /// Parse a single fact in the given `input`-String and return a [Program]. + /// + /// The program will be parsed and checked for unsupported features. + /// + /// # Error + /// Returns an appropriate [Error] variant on parsing and feature check issues. + pub fn parse_fact(mut input: String) -> Result { + input += "."; + let fact = all_input_consumed(RuleParser::new().parse_fact())(input.as_str())?; + Ok(fact) + } -// /// A combinator that creates a parser for a specific token. -// pub fn token<'a>(token: &'a str) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// map_error(tag(token), || ParseError::ExpectedToken(token.to_string())) -// } + /// A combinator to add tracing to the parser. + /// [fun] is an identifier for the parser and [parser] is the actual parser. + #[inline(always)] + fn traced<'a, T, P>( + fun: &'static str, + mut parser: P, + ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + where + T: Debug, + P: FnMut(Span<'a>) -> IntermediateResult<'a, T>, + { + move |input| { + log::trace!(target: "parser", "{fun}({input:?})"); + let result = parser(input); + log::trace!(target: "parser", "{fun}({input:?}) -> {result:?}"); + result + } + } -// /// A combinator that creates a parser for a specific token, -// /// surrounded by whitespace or comments. -// pub fn space_delimited_token<'a>( -// token: &'a str, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// map_error( -// delimited(multispace_or_comment0, tag(token), multispace_or_comment0), -// || ParseError::ExpectedToken(token.to_string()), -// ) -// } + /// A combinator that makes sure all input has been consumed. + pub fn all_input_consumed<'a, T: 'a>( + parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, + ) -> impl FnMut(&'a str) -> Result + 'a { + let mut p = all_consuming(parser); + move |input| { + let input = Span::new(input); + p(input).map(|(_, result)| result).map_err(|e| match e { + Err::Incomplete(e) => ParseError::MissingInput(match e { + nom::Needed::Unknown => { + "expected an unknown amount of further input".to_string() + } + nom::Needed::Size(size) => format!("expected at least {size} more bytes"), + }) + .at(input), + Err::Error(e) | Err::Failure(e) => e, + }) + } + } -// /// Expand a prefix. -// fn resolve_prefix<'a>( -// prefixes: &'a HashMap<&'a str, &'a str>, -// prefix: &'a str, -// ) -> Result<&'a str, ParseError> { -// prefixes -// .get(prefix) -// .copied() -// .ok_or_else(|| ParseError::UndeclaredPrefix(prefix.to_string())) -// } + /// A combinator that recognises a comment, starting at a `%` + /// character and ending at the end of the line. + pub fn comment(input: Span) -> IntermediateResult<()> { + alt(( + value((), pair(tag("%"), is_not("\n\r"))), + // a comment that immediately precedes the end of the line – + // this must come after the normal line comment above + value((), tag("%")), + ))(input) + } -// /// Expand a prefixed name. -// fn resolve_prefixed_name( -// prefixes: &HashMap<&str, &str>, -// name: sparql::Name, -// ) -> Result { -// match name { -// sparql::Name::IriReference(iri) => Ok(iri.to_string()), -// sparql::Name::PrefixedName { prefix, local } => { -// resolve_prefix(prefixes, prefix).map(|iri| format!("{iri}{local}")) -// } -// sparql::Name::BlankNode(label) => Ok(format!("_:{label}")), -// } -// } + /// A combinator that recognises an arbitrary amount of whitespace and + /// comments. + pub fn multispace_or_comment0(input: Span) -> IntermediateResult<()> { + value((), many0(alt((value((), multispace1), comment))))(input) + } -// /// Resolve prefixes in a [turtle::RdfLiteral]. -// fn resolve_prefixed_rdf_literal( -// prefixes: &HashMap<&str, &str>, -// literal: turtle::RdfLiteral, -// ) -> Result { -// match literal { -// turtle::RdfLiteral::LanguageString { value, tag } => Ok( -// AnyDataValue::new_language_tagged_string(value.to_string(), tag.to_string()), -// ), -// turtle::RdfLiteral::DatatypeValue { value, datatype } => { -// AnyDataValue::new_from_typed_literal( -// value.to_string(), -// resolve_prefixed_name(prefixes, datatype) -// .expect("prefix should have been registered during parsing"), -// ) -// } -// } -// } + /// A combinator that recognises any non-empty amount of whitespace + /// and comments. + pub fn multispace_or_comment1(input: Span) -> IntermediateResult<()> { + value((), many1(alt((value((), multispace1), comment))))(input) + } -// #[traced("parser")] -// pub(crate) fn parse_bare_name(input: Span<'_>) -> IntermediateResult> { -// map_error( -// recognize(pair( -// alpha1, -// opt(many1(satisfy(|c| { -// ['0'..='9', 'a'..='z', 'A'..='Z', '-'..='-', '_'..='_'] -// .iter() -// .any(|range| range.contains(&c)) -// }))), -// )), -// || ParseError::ExpectedBareName, -// )(input) -// } + /// A combinator that modifies the associated error. + pub fn map_error<'a, T: 'a>( + mut parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, + mut error: impl FnMut() -> ParseError + 'a, + ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a { + move |input| { + parser(input).map_err(|e| match e { + Err::Incomplete(_) => e, + Err::Error(context) => { + let mut err = error().at(input); + err.append(context); + Err::Error(err) + } + Err::Failure(context) => { + let mut err = error().at(input); + err.append(context); + Err::Failure(err) + } + }) + } + } -// #[traced("parser")] -// fn parse_simple_name(input: Span<'_>) -> IntermediateResult> { -// map_error( -// recognize(pair( -// alpha1, -// opt(preceded( -// many0(tag(" ")), -// separated_list1( -// many1(tag(" ")), -// many1(satisfy(|c| { -// ['0'..='9', 'a'..='z', 'A'..='Z', '_'..='_'] -// .iter() -// .any(|range| range.contains(&c)) -// })), -// ), -// )), -// )), -// || ParseError::ExpectedBareName, -// )(input) -// } + /// A combinator that creates a parser for a specific token. + pub fn token<'a>(token: &'a str) -> impl FnMut(Span<'a>) -> IntermediateResult> { + map_error(tag(token), || ParseError::ExpectedToken(token.to_string())) + } -// /// Parse an IRI representing a constant. -// fn parse_iri_constant<'a>( -// prefixes: &'a RefCell>, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { -// map_error( -// move |input| { -// let (remainder, name) = traced( -// "parse_iri_constant", -// alt(( -// map(sparql::iriref, |name| sparql::Name::IriReference(&name)), -// sparql::prefixed_name, -// sparql::blank_node_label, -// map(parse_bare_name, |name| sparql::Name::IriReference(&name)), -// )), -// )(input)?; - -// let resolved = resolve_prefixed_name(&prefixes.borrow(), name) -// .map_err(|e| Err::Failure(e.at(input)))?; - -// Ok((remainder, AnyDataValue::new_iri(resolved))) -// }, -// || ParseError::ExpectedIriConstant, -// ) -// } + /// A combinator that creates a parser for a specific token, + /// surrounded by whitespace or comments. + pub fn space_delimited_token<'a>( + token: &'a str, + ) -> impl FnMut(Span<'a>) -> IntermediateResult> { + map_error( + delimited(multispace_or_comment0, tag(token), multispace_or_comment0), + || ParseError::ExpectedToken(token.to_string()), + ) + } -// fn parse_constant_term<'a>( -// prefixes: &'a RefCell>, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { -// traced( -// "parse_constant_term", -// alt(( -// parse_iri_constant(prefixes), -// turtle::numeric_literal, -// map_res(turtle::rdf_literal, move |literal| { -// resolve_prefixed_rdf_literal(&prefixes.borrow(), literal) -// }), -// map(turtle::string, move |literal| { -// AnyDataValue::new_plain_string(literal.to_string()) -// }), -// )), -// ) -// } + /// Expand a prefix. + fn resolve_prefix<'a>( + prefixes: &'a HashMap<&'a str, &'a str>, + prefix: &'a str, + ) -> Result<&'a str, ParseError> { + prefixes + .get(prefix) + .copied() + .ok_or_else(|| ParseError::UndeclaredPrefix(prefix.to_string())) + } -// /// Parse a ground term. -// pub fn parse_ground_term<'a>( -// prefixes: &'a RefCell>, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, PrimitiveTerm> { -// traced( -// "parse_ground_term", -// map_error( -// map(parse_constant_term(prefixes), PrimitiveTerm::GroundTerm), -// || ParseError::ExpectedGroundTerm, -// ), -// ) -// } + /// Expand a prefixed name. + fn resolve_prefixed_name( + prefixes: &HashMap<&str, &str>, + name: sparql::Name, + ) -> Result { + match name { + sparql::Name::IriReference(iri) => Ok(iri.to_string()), + sparql::Name::PrefixedName { prefix, local } => { + resolve_prefix(prefixes, prefix).map(|iri| format!("{iri}{local}")) + } + sparql::Name::BlankNode(label) => Ok(format!("_:{label}")), + } + } -// /// The main parser. Holds a hash map for -// /// prefixes, as well as the base IRI. -// #[derive(Debug, Default)] -// pub struct RuleParser<'a> { -// /// The base IRI, if set. -// base: RefCell>, -// /// A map from Prefixes to IRIs. -// prefixes: RefCell>, -// /// Number counting up for generating distinct wildcards. -// wildcard_generator: RefCell, -// } + /// Resolve prefixes in a [turtle::RdfLiteral]. + fn resolve_prefixed_rdf_literal( + prefixes: &HashMap<&str, &str>, + literal: turtle::RdfLiteral, + ) -> Result { + match literal { + turtle::RdfLiteral::LanguageString { value, tag } => Ok( + AnyDataValue::new_language_tagged_string(value.to_string(), tag.to_string()), + ), + turtle::RdfLiteral::DatatypeValue { value, datatype } => { + AnyDataValue::new_from_typed_literal( + value.to_string(), + resolve_prefixed_name(prefixes, datatype) + .expect("prefix should have been registered during parsing"), + ) + } + } + } -// impl<'a> RuleParser<'a> { -// /// Construct a new [RuleParser]. -// pub fn new() -> Self { -// Default::default() -// } + #[traced("parser")] + pub(crate) fn parse_bare_name(input: Span<'_>) -> IntermediateResult> { + map_error( + recognize(pair( + alpha1, + opt(many1(satisfy(|c| { + ['0'..='9', 'a'..='z', 'A'..='Z', '-'..='-', '_'..='_'] + .iter() + .any(|range| range.contains(&c)) + }))), + )), + || ParseError::ExpectedBareName, + )(input) + } -// fn parse_complex_constant_term( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { -// traced( -// "parse_complex_constant_term", -// // Note: The explicit |s| in the cases below is important to enable proper type -// // reasoning in rust. Without it, unresolved opaque types appear in the recursion. -// alt(( -// parse_constant_term(&self.prefixes), -// map(|s| self.parse_tuple_literal()(s), AnyDataValue::from), -// map(|s| self.parse_map_literal()(s), AnyDataValue::from), -// )), -// ) -// } + #[traced("parser")] + fn parse_simple_name(input: Span<'_>) -> IntermediateResult> { + map_error( + recognize(pair( + alpha1, + opt(preceded( + many0(tag(" ")), + separated_list1( + many1(tag(" ")), + many1(satisfy(|c| { + ['0'..='9', 'a'..='z', 'A'..='Z', '_'..='_'] + .iter() + .any(|range| range.contains(&c)) + })), + ), + )), + )), + || ParseError::ExpectedBareName, + )(input) + } -// /// Parse the dot that ends declarations, optionally surrounded by spaces. -// fn parse_dot(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_dot", space_delimited_token(".")) -// } + /// Parse an IRI representing a constant. + fn parse_iri_constant<'a>( + prefixes: &'a RefCell>, + ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { + map_error( + move |input| { + let (remainder, name) = traced( + "parse_iri_constant", + alt(( + map(sparql::iriref, |name| sparql::Name::IriReference(&name)), + sparql::prefixed_name, + sparql::blank_node_label, + map(parse_bare_name, |name| sparql::Name::IriReference(&name)), + )), + )(input)?; -// /// Parse a comma, optionally surrounded by spaces. -// fn parse_comma(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_comma", space_delimited_token(",")) -// } + let resolved = resolve_prefixed_name(&prefixes.borrow(), name) + .map_err(|e| Err::Failure(e.at(input)))?; -// /// Parse an equality sign, optionally surrounded by spaces. -// fn parse_equals(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_equals", space_delimited_token("=")) -// } + Ok((remainder, AnyDataValue::new_iri(resolved))) + }, + || ParseError::ExpectedIriConstant, + ) + } -// /// Parse a negation sign (`~`), optionally surrounded by spaces. -// fn parse_not(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_not", space_delimited_token("~")) -// } + fn parse_constant_term<'a>( + prefixes: &'a RefCell>, + ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { + traced( + "parse_constant_term", + alt(( + parse_iri_constant(prefixes), + turtle::numeric_literal, + map_res(turtle::rdf_literal, move |literal| { + resolve_prefixed_rdf_literal(&prefixes.borrow(), literal) + }), + map(turtle::string, move |literal| { + AnyDataValue::new_plain_string(literal.to_string()) + }), + )), + ) + } -// /// Parse an arrow (`:-`), optionally surrounded by spaces. -// fn parse_arrow(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_arrow", space_delimited_token(":-")) -// } + /// Parse a ground term. + pub fn parse_ground_term<'a>( + prefixes: &'a RefCell>, + ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, PrimitiveTerm> { + traced( + "parse_ground_term", + map_error( + map(parse_constant_term(prefixes), PrimitiveTerm::GroundTerm), + || ParseError::ExpectedGroundTerm, + ), + ) + } -// /// Parse an opening parenthesis, optionally surrounded by spaces. -// fn parse_open_parenthesis(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_open_parenthesis", space_delimited_token("(")) -// } + /// The main parser. Holds a hash map for + /// prefixes, as well as the base IRI. + #[derive(Debug, Default)] + pub struct RuleParser<'a> { + /// The base IRI, if set. + base: RefCell>, + /// A map from Prefixes to IRIs. + prefixes: RefCell>, + /// Number counting up for generating distinct wildcards. + wildcard_generator: RefCell, + } -// /// Parse a closing parenthesis, optionally surrounded by spaces. -// fn parse_close_parenthesis(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_close_parenthesis", space_delimited_token(")")) -// } + impl<'a> RuleParser<'a> { + /// Construct a new [RuleParser]. + pub fn new() -> Self { + Default::default() + } -// /// Matches an opening parenthesis, -// /// then gets an object from the parser, -// /// and finally matches an closing parenthesis. -// pub fn parenthesised<'b, O, F>( -// &'a self, -// parser: F, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult -// where -// O: Debug + 'a, -// F: FnMut(Span<'a>) -> IntermediateResult + 'a, -// { -// traced( -// "parenthesised", -// map_error( -// delimited( -// self.parse_open_parenthesis(), -// parser, -// self.parse_close_parenthesis(), -// ), -// || ParseError::ExpectedParenthesisedExpression, -// ), -// ) -// } + fn parse_complex_constant_term( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { + traced( + "parse_complex_constant_term", + // Note: The explicit |s| in the cases below is important to enable proper type + // reasoning in rust. Without it, unresolved opaque types appear in the recursion. + alt(( + parse_constant_term(&self.prefixes), + map(|s| self.parse_tuple_literal()(s), AnyDataValue::from), + map(|s| self.parse_map_literal()(s), AnyDataValue::from), + )), + ) + } -// /// Parse an opening brace, optionally surrounded by spaces. -// fn parse_open_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_open_brace", space_delimited_token("{")) -// } + /// Parse the dot that ends declarations, optionally surrounded by spaces. + fn parse_dot(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_dot", space_delimited_token(".")) + } -// /// Parse a closing brace, optionally surrounded by spaces. -// fn parse_close_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced("parse_close_brace", space_delimited_token("}")) -// } + /// Parse a comma, optionally surrounded by spaces. + fn parse_comma(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_comma", space_delimited_token(",")) + } -// /// Parse a base declaration. -// fn parse_base(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_base", -// map_error( -// move |input| { -// let (remainder, base) = delimited( -// terminated(token("@base"), cut(multispace_or_comment1)), -// cut(sparql::iriref), -// cut(self.parse_dot()), -// )(input)?; - -// log::debug!(target: "parser", r#"parse_base: set new base: "{base}""#); -// *self.base.borrow_mut() = Some(&base); - -// Ok((remainder, Identifier(base.to_string()))) -// }, -// || ParseError::ExpectedBaseDeclaration, -// ), -// ) -// } + /// Parse an equality sign, optionally surrounded by spaces. + fn parse_equals(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_equals", space_delimited_token("=")) + } -// fn parse_prefix(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { -// traced( -// "parse_prefix", -// map_error( -// move |input| { -// let (remainder, (prefix, iri)) = delimited( -// terminated(token("@prefix"), cut(multispace_or_comment1)), -// cut(tuple(( -// cut(terminated(sparql::pname_ns, multispace_or_comment1)), -// cut(sparql::iriref), -// ))), -// cut(self.parse_dot()), -// )(input)?; - -// log::debug!(target: "parser", r#"parse_prefix: got prefix "{prefix}" for iri "{iri}""#); -// if self.prefixes.borrow_mut().insert(&prefix, &iri).is_some() { -// Err(Err::Failure( -// ParseError::RedeclaredPrefix(prefix.to_string()).at(input), -// )) -// } else { -// Ok((remainder, prefix)) -// } -// }, -// || ParseError::ExpectedPrefixDeclaration, -// ), -// ) -// } + /// Parse a negation sign (`~`), optionally surrounded by spaces. + fn parse_not(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_not", space_delimited_token("~")) + } -// /// Parse a data source declaration. -// /// This is a backwards compatibility feature for Rulewerk syntax. Nemo normally uses -// /// `@import` instead of `@source`. The difference in `@source` is that (1) a predicate -// /// arity is given in brackets after the predicate name, (2) the import predicate names -// /// are one of `load-csv`, `load-tsv`, `load-rdf`, and `sparql`, with the only parameter -// /// being the file name or IRI to be loaded. -// fn parse_source(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_source", -// map_error( -// move |input| { -// let (remainder, (predicate, arity)) = preceded( -// terminated(token("@source"), cut(multispace_or_comment1)), -// cut(self.parse_qualified_predicate_name()), -// )(input)?; - -// let (remainder, datasource): (_, Result<_, ParseError>) = cut(delimited( -// delimited(multispace_or_comment0, token(":"), multispace_or_comment1), -// alt(( -// map( -// delimited( -// preceded(token("load-csv"), cut(self.parse_open_parenthesis())), -// turtle::string, -// self.parse_close_parenthesis(), -// ), -// |filename| { -// let attributes = MapDataValue::from_iter([ -// ( -// AnyDataValue::new_iri( -// PARAMETER_NAME_RESOURCE.to_string(), -// ), -// AnyDataValue::new_plain_string(filename.to_string()), -// ), -// ( -// AnyDataValue::new_iri( -// PARAMETER_NAME_FORMAT.to_string(), -// ), -// TupleDataValue::from_iter( -// vec![VALUE_FORMAT_ANY; arity] -// .iter() -// .map(|format| { -// AnyDataValue::new_plain_string( -// (*format).to_string(), -// ) -// }) -// .collect::>(), -// ) -// .into(), -// ), -// ]); -// Ok(ImportDirective::from(ImportExportDirective { -// predicate: predicate.clone(), -// format: FileFormat::CSV, -// attributes, -// })) -// }, -// ), -// map( -// delimited( -// preceded(token("load-tsv"), cut(self.parse_open_parenthesis())), -// turtle::string, -// self.parse_close_parenthesis(), -// ), -// |filename| { -// let attributes = MapDataValue::from_iter([ -// ( -// AnyDataValue::new_iri( -// PARAMETER_NAME_RESOURCE.to_string(), -// ), -// AnyDataValue::new_plain_string(filename.to_string()), -// ), -// ( -// AnyDataValue::new_iri( -// PARAMETER_NAME_FORMAT.to_string(), -// ), -// TupleDataValue::from_iter( -// vec![VALUE_FORMAT_ANY; arity] -// .iter() -// .map(|format| { -// AnyDataValue::new_plain_string( -// (*format).to_string(), -// ) -// }) -// .collect::>(), -// ) -// .into(), -// ), -// ]); -// Ok(ImportDirective::from(ImportExportDirective { -// predicate: predicate.clone(), -// format: FileFormat::TSV, -// attributes, -// })) -// }, -// ), -// map( -// delimited( -// preceded(token("load-rdf"), cut(self.parse_open_parenthesis())), -// turtle::string, -// self.parse_close_parenthesis(), -// ), -// |filename| { -// let mut attribute_pairs = vec![ -// ( -// AnyDataValue::new_iri( -// PARAMETER_NAME_RESOURCE.to_string(), -// ), -// AnyDataValue::new_plain_string(filename.to_string()), -// ), -// ( -// AnyDataValue::new_iri( -// PARAMETER_NAME_FORMAT.to_string(), -// ), -// TupleDataValue::from_iter( -// vec![VALUE_FORMAT_ANY; arity] -// .iter() -// .map(|format| { -// AnyDataValue::new_plain_string( -// (*format).to_string(), -// ) -// }) -// .collect::>(), -// ) -// .into(), -// ), -// ]; -// if let Some(base) = self.base() { -// attribute_pairs.push(( -// AnyDataValue::new_iri(PARAMETER_NAME_BASE.to_string()), -// AnyDataValue::new_iri(base.to_string()), -// )); -// } - -// let attributes = MapDataValue::from_iter(attribute_pairs); - -// Ok(ImportDirective::from(ImportExportDirective { -// predicate: predicate.clone(), -// format: FileFormat::RDF(RdfVariant::Unspecified), -// attributes, -// })) -// }, -// ), -// map( -// delimited( -// preceded(token("sparql"), cut(self.parse_open_parenthesis())), -// tuple(( -// self.parse_iri_identifier(), -// delimited( -// self.parse_comma(), -// turtle::string, -// self.parse_comma(), -// ), -// turtle::string, -// )), -// self.parse_close_parenthesis(), -// ), -// |(_endpoint, _projection, _query)| { -// Err(ParseError::UnsupportedSparqlSource(predicate.clone().0)) -// }, -// ), -// )), -// cut(self.parse_dot()), -// ))( -// remainder -// )?; - -// let spec = datasource.map_err(|e| Err::Failure(e.at(input)))?; - -// Ok((remainder, spec)) -// }, -// || ParseError::ExpectedDataSourceDeclaration, -// ), -// ) -// } + /// Parse an arrow (`:-`), optionally surrounded by spaces. + fn parse_arrow(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_arrow", space_delimited_token(":-")) + } -// /// Parse an output directive. -// fn parse_output_directive(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_output", -// map_error( -// delimited( -// terminated(token("@output"), cut(multispace_or_comment1)), -// cut(map_res::<_, _, _, _, Error, _, _>( -// self.parse_iri_like_identifier(), -// Ok, -// )), -// cut(self.parse_dot()), -// ), -// || ParseError::ExpectedOutputDeclaration, -// ), -// ) -// } + /// Parse an opening parenthesis, optionally surrounded by spaces. + fn parse_open_parenthesis( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_open_parenthesis", space_delimited_token("(")) + } -// /// Parse an entry in a [MapDataValue], i.e., am [AnyDataValue]--[AnyDataValue] pair. -// fn parse_map_entry( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult<(AnyDataValue, AnyDataValue)> { -// traced( -// "parse_map_entry", -// separated_pair( -// self.parse_complex_constant_term(), -// self.parse_equals(), -// map(self.parse_complex_constant_term(), |term| term), -// ), -// ) -// } + /// Parse a closing parenthesis, optionally surrounded by spaces. + fn parse_close_parenthesis( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_close_parenthesis", space_delimited_token(")")) + } -// /// Parse a ground map literal. -// fn parse_map_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_map_literal", -// delimited( -// self.parse_open_brace(), -// map( -// separated_list0(self.parse_comma(), self.parse_map_entry()), -// MapDataValue::from_iter, -// ), -// self.parse_close_brace(), -// ), -// ) -// } + /// Matches an opening parenthesis, + /// then gets an object from the parser, + /// and finally matches an closing parenthesis. + pub fn parenthesised<'b, O, F>( + &'a self, + parser: F, + ) -> impl FnMut(Span<'a>) -> IntermediateResult + where + O: Debug + 'a, + F: FnMut(Span<'a>) -> IntermediateResult + 'a, + { + traced( + "parenthesised", + map_error( + delimited( + self.parse_open_parenthesis(), + parser, + self.parse_close_parenthesis(), + ), + || ParseError::ExpectedParenthesisedExpression, + ), + ) + } -// /// Parse a ground tuple literal. -// pub fn parse_tuple_literal( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_tuple_literal", -// delimited( -// self.parse_open_parenthesis(), -// map( -// separated_list0(self.parse_comma(), self.parse_complex_constant_term()), -// TupleDataValue::from_iter, -// ), -// self.parse_close_parenthesis(), -// ), -// ) -// } + /// Parse an opening brace, optionally surrounded by spaces. + fn parse_open_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_open_brace", space_delimited_token("{")) + } -// /// Parse a file format name. -// fn parse_file_format(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced("parse_file_format", move |input| { -// let (remainder, format) = -// map_res(alpha1, |format: Span<'a>| match *format.fragment() { -// FILE_FORMAT_CSV => Ok(FileFormat::CSV), -// FILE_FORMAT_DSV => Ok(FileFormat::DSV), -// FILE_FORMAT_TSV => Ok(FileFormat::TSV), -// FILE_FORMAT_RDF_UNSPECIFIED => Ok(FileFormat::RDF(RdfVariant::Unspecified)), -// FILE_FORMAT_RDF_NTRIPLES => Ok(FileFormat::RDF(RdfVariant::NTriples)), -// FILE_FORMAT_RDF_NQUADS => Ok(FileFormat::RDF(RdfVariant::NQuads)), -// FILE_FORMAT_RDF_TURTLE => Ok(FileFormat::RDF(RdfVariant::Turtle)), -// FILE_FORMAT_RDF_TRIG => Ok(FileFormat::RDF(RdfVariant::TriG)), -// FILE_FORMAT_RDF_XML => Ok(FileFormat::RDF(RdfVariant::RDFXML)), -// FILE_FORMAT_JSON => Ok(FileFormat::JSON), -// _ => Err(ParseError::FileFormatError(format.fragment().to_string())), -// })(input)?; - -// Ok((remainder, format)) -// }) -// } + /// Parse a closing brace, optionally surrounded by spaces. + fn parse_close_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced("parse_close_brace", space_delimited_token("}")) + } -// /// Parse an import/export specification. -// fn parse_import_export_spec( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced("parse_import_export_spec", move |input| { -// let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; -// let (remainder, format) = delimited( -// space_delimited_token(":-"), -// self.parse_file_format(), -// multispace_or_comment0, -// )(remainder)?; -// let (remainder, attributes) = self.parse_map_literal()(remainder)?; -// Ok(( -// remainder, -// ImportExportDirective { -// predicate, -// format, -// attributes, -// }, -// )) -// }) -// } + /// Parse a base declaration. + fn parse_base(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_base", + map_error( + move |input| { + let (remainder, base) = delimited( + terminated(token("@base"), cut(multispace_or_comment1)), + cut(sparql::iriref), + cut(self.parse_dot()), + )(input)?; + + log::debug!(target: "parser", r#"parse_base: set new base: "{base}""#); + *self.base.borrow_mut() = Some(&base); + + Ok((remainder, Identifier(base.to_string()))) + }, + || ParseError::ExpectedBaseDeclaration, + ), + ) + } -// /// Parse an import directive. -// fn parse_import(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_import", -// delimited( -// terminated(token("@import"), multispace_or_comment1), -// cut(map(self.parse_import_export_spec(), ImportDirective::from)), -// cut(self.parse_dot()), -// ), -// ) -// } + fn parse_prefix(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { + traced( + "parse_prefix", + map_error( + move |input| { + let (remainder, (prefix, iri)) = delimited( + terminated(token("@prefix"), cut(multispace_or_comment1)), + cut(tuple(( + cut(terminated(sparql::pname_ns, multispace_or_comment1)), + cut(sparql::iriref), + ))), + cut(self.parse_dot()), + )(input)?; + + log::debug!(target: "parser", r#"parse_prefix: got prefix "{prefix}" for iri "{iri}""#); + if self.prefixes.borrow_mut().insert(&prefix, &iri).is_some() { + Err(Err::Failure( + ParseError::RedeclaredPrefix(prefix.to_string()).at(input), + )) + } else { + Ok((remainder, prefix)) + } + }, + || ParseError::ExpectedPrefixDeclaration, + ), + ) + } -// /// Parse an export directive. -// fn parse_export(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_export", -// delimited( -// terminated(token("@export"), multispace_or_comment1), -// cut(map(self.parse_import_export_spec(), ExportDirective::from)), -// cut(self.parse_dot()), -// ), -// ) -// } + /// Parse a data source declaration. + /// This is a backwards compatibility feature for Rulewerk syntax. Nemo normally uses + /// `@import` instead of `@source`. The difference in `@source` is that (1) a predicate + /// arity is given in brackets after the predicate name, (2) the import predicate names + /// are one of `load-csv`, `load-tsv`, `load-rdf`, and `sparql`, with the only parameter + /// being the file name or IRI to be loaded. + fn parse_source(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_source", + map_error( + move |input| { + let (remainder, (predicate, arity)) = preceded( + terminated(token("@source"), cut(multispace_or_comment1)), + cut(self.parse_qualified_predicate_name()), + )(input)?; + + let (remainder, datasource): (_, Result<_, ParseError>) = cut(delimited( + delimited(multispace_or_comment0, token(":"), multispace_or_comment1), + alt(( + map( + delimited( + preceded( + token("load-csv"), + cut(self.parse_open_parenthesis()), + ), + turtle::string, + self.parse_close_parenthesis(), + ), + |filename| { + let attributes = MapDataValue::from_iter([ + ( + AnyDataValue::new_iri( + PARAMETER_NAME_RESOURCE.to_string(), + ), + AnyDataValue::new_plain_string( + filename.to_string(), + ), + ), + ( + AnyDataValue::new_iri( + PARAMETER_NAME_FORMAT.to_string(), + ), + TupleDataValue::from_iter( + vec![VALUE_FORMAT_ANY; arity] + .iter() + .map(|format| { + AnyDataValue::new_plain_string( + (*format).to_string(), + ) + }) + .collect::>(), + ) + .into(), + ), + ]); + Ok(ImportDirective::from(ImportExportDirective { + predicate: predicate.clone(), + format: FileFormat::CSV, + attributes, + })) + }, + ), + map( + delimited( + preceded( + token("load-tsv"), + cut(self.parse_open_parenthesis()), + ), + turtle::string, + self.parse_close_parenthesis(), + ), + |filename| { + let attributes = MapDataValue::from_iter([ + ( + AnyDataValue::new_iri( + PARAMETER_NAME_RESOURCE.to_string(), + ), + AnyDataValue::new_plain_string( + filename.to_string(), + ), + ), + ( + AnyDataValue::new_iri( + PARAMETER_NAME_FORMAT.to_string(), + ), + TupleDataValue::from_iter( + vec![VALUE_FORMAT_ANY; arity] + .iter() + .map(|format| { + AnyDataValue::new_plain_string( + (*format).to_string(), + ) + }) + .collect::>(), + ) + .into(), + ), + ]); + Ok(ImportDirective::from(ImportExportDirective { + predicate: predicate.clone(), + format: FileFormat::TSV, + attributes, + })) + }, + ), + map( + delimited( + preceded( + token("load-rdf"), + cut(self.parse_open_parenthesis()), + ), + turtle::string, + self.parse_close_parenthesis(), + ), + |filename| { + let mut attribute_pairs = vec![ + ( + AnyDataValue::new_iri( + PARAMETER_NAME_RESOURCE.to_string(), + ), + AnyDataValue::new_plain_string( + filename.to_string(), + ), + ), + ( + AnyDataValue::new_iri( + PARAMETER_NAME_FORMAT.to_string(), + ), + TupleDataValue::from_iter( + vec![VALUE_FORMAT_ANY; arity] + .iter() + .map(|format| { + AnyDataValue::new_plain_string( + (*format).to_string(), + ) + }) + .collect::>(), + ) + .into(), + ), + ]; + if let Some(base) = self.base() { + attribute_pairs.push(( + AnyDataValue::new_iri( + PARAMETER_NAME_BASE.to_string(), + ), + AnyDataValue::new_iri(base.to_string()), + )); + } + + let attributes = MapDataValue::from_iter(attribute_pairs); + + Ok(ImportDirective::from(ImportExportDirective { + predicate: predicate.clone(), + format: FileFormat::RDF(RdfVariant::Unspecified), + attributes, + })) + }, + ), + map( + delimited( + preceded( + token("sparql"), + cut(self.parse_open_parenthesis()), + ), + tuple(( + self.parse_iri_identifier(), + delimited( + self.parse_comma(), + turtle::string, + self.parse_comma(), + ), + turtle::string, + )), + self.parse_close_parenthesis(), + ), + |(_endpoint, _projection, _query)| { + Err(ParseError::UnsupportedSparqlSource( + predicate.clone().0, + )) + }, + ), + )), + cut(self.parse_dot()), + ))( + remainder + )?; + + let spec = datasource.map_err(|e| Err::Failure(e.at(input)))?; + + Ok((remainder, spec)) + }, + || ParseError::ExpectedDataSourceDeclaration, + ), + ) + } -// /// Parse a statement. -// fn parse_statement(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_statement", -// map_error( -// alt(( -// map(self.parse_fact(), Statement::Fact), -// map(self.parse_rule(), Statement::Rule), -// )), -// || ParseError::ExpectedStatement, -// ), -// ) -// } + /// Parse an output directive. + fn parse_output_directive( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_output", + map_error( + delimited( + terminated(token("@output"), cut(multispace_or_comment1)), + cut(map_res::<_, _, _, _, Error, _, _>( + self.parse_iri_like_identifier(), + Ok, + )), + cut(self.parse_dot()), + ), + || ParseError::ExpectedOutputDeclaration, + ), + ) + } -// /// Parse a fact. -// fn parse_fact(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_fact", -// map_error( -// move |input| { -// let (remainder, (predicate, terms)) = terminated( -// pair( -// self.parse_iri_like_identifier(), -// self.parenthesised(separated_list1( -// self.parse_comma(), -// parse_ground_term(&self.prefixes), -// )), -// ), -// self.parse_dot(), -// )(input)?; - -// let predicate_name = predicate.name(); -// log::trace!(target: "parser", "found fact {predicate_name}({terms:?})"); - -// // We do not allow complex term trees in facts for now -// let terms = terms.into_iter().map(Term::Primitive).collect(); - -// Ok((remainder, Fact(Atom::new(predicate, terms)))) -// }, -// || ParseError::ExpectedFact, -// ), -// ) -// } + /// Parse an entry in a [MapDataValue], i.e., am [AnyDataValue]--[AnyDataValue] pair. + fn parse_map_entry( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult<(AnyDataValue, AnyDataValue)> { + traced( + "parse_map_entry", + separated_pair( + self.parse_complex_constant_term(), + self.parse_equals(), + map(self.parse_complex_constant_term(), |term| term), + ), + ) + } -// /// Parse an IRI identifier, e.g. for predicate names. -// fn parse_iri_identifier(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// map_error( -// move |input| { -// let (remainder, name) = traced( -// "parse_iri_identifier", -// alt(( -// map(sparql::iriref, |name| sparql::Name::IriReference(&name)), -// sparql::prefixed_name, -// sparql::blank_node_label, -// )), -// )(input)?; - -// Ok(( -// remainder, -// Identifier( -// resolve_prefixed_name(&self.prefixes.borrow(), name) -// .map_err(|e| Err::Failure(e.at(input)))?, -// ), -// )) -// }, -// || ParseError::ExpectedIriIdentifier, -// ) -// } + /// Parse a ground map literal. + fn parse_map_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_map_literal", + delimited( + self.parse_open_brace(), + map( + separated_list0(self.parse_comma(), self.parse_map_entry()), + MapDataValue::from_iter, + ), + self.parse_close_brace(), + ), + ) + } -// /// Parse an IRI-like identifier. -// /// -// /// This is being used for: -// /// * predicate names -// /// * built-in functions in term trees -// fn parse_iri_like_identifier( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_iri_like_identifier", -// map_error( -// alt(( -// self.parse_iri_identifier(), -// self.parse_bare_iri_like_identifier(), -// )), -// || ParseError::ExpectedIriLikeIdentifier, -// ), -// ) -// } + /// Parse a ground tuple literal. + pub fn parse_tuple_literal( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_tuple_literal", + delimited( + self.parse_open_parenthesis(), + map( + separated_list0(self.parse_comma(), self.parse_complex_constant_term()), + TupleDataValue::from_iter, + ), + self.parse_close_parenthesis(), + ), + ) + } -// /// Parse a qualified predicate name – currently, this is a -// /// predicate name together with its arity. -// /// -// /// FIXME: Obsolete. Can be removed in the future. -// fn parse_qualified_predicate_name( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult<(Identifier, usize)> { -// traced( -// "parse_qualified_predicate_name", -// pair( -// self.parse_iri_like_identifier(), -// preceded( -// multispace_or_comment0, -// delimited( -// token("["), -// cut(map_res(digit1, |number: Span<'a>| number.parse::())), -// cut(token("]")), -// ), -// ), -// ), -// ) -// } + /// Parse a file format name. + fn parse_file_format(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced("parse_file_format", move |input| { + let (remainder, format) = + map_res(alpha1, |format: Span<'a>| match *format.fragment() { + FILE_FORMAT_CSV => Ok(FileFormat::CSV), + FILE_FORMAT_DSV => Ok(FileFormat::DSV), + FILE_FORMAT_TSV => Ok(FileFormat::TSV), + FILE_FORMAT_RDF_UNSPECIFIED => Ok(FileFormat::RDF(RdfVariant::Unspecified)), + FILE_FORMAT_RDF_NTRIPLES => Ok(FileFormat::RDF(RdfVariant::NTriples)), + FILE_FORMAT_RDF_NQUADS => Ok(FileFormat::RDF(RdfVariant::NQuads)), + FILE_FORMAT_RDF_TURTLE => Ok(FileFormat::RDF(RdfVariant::Turtle)), + FILE_FORMAT_RDF_TRIG => Ok(FileFormat::RDF(RdfVariant::TriG)), + FILE_FORMAT_RDF_XML => Ok(FileFormat::RDF(RdfVariant::RDFXML)), + FILE_FORMAT_JSON => Ok(FileFormat::JSON), + _ => Err(ParseError::FileFormatError(format.fragment().to_string())), + })(input)?; + + Ok((remainder, format)) + }) + } -// /// Parse an IRI-like identifier (e.g. a predicate name) that is not an IRI. -// fn parse_bare_iri_like_identifier( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced("parse_bare_iri_like_identifier", move |input| { -// let (remainder, name) = parse_bare_name(input)?; + /// Parse an import/export specification. + fn parse_import_export_spec( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced("parse_import_export_spec", move |input| { + let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; + let (remainder, format) = delimited( + space_delimited_token(":-"), + self.parse_file_format(), + multispace_or_comment0, + )(remainder)?; + let (remainder, attributes) = self.parse_map_literal()(remainder)?; + Ok(( + remainder, + ImportExportDirective { + predicate, + format, + attributes, + }, + )) + }) + } -// Ok((remainder, Identifier(name.to_string()))) -// }) -// } + /// Parse an import directive. + fn parse_import(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_import", + delimited( + terminated(token("@import"), multispace_or_comment1), + cut(map(self.parse_import_export_spec(), ImportDirective::from)), + cut(self.parse_dot()), + ), + ) + } -// /// Parse a rule. -// fn parse_rule(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_rule", -// map_error( -// move |input| { -// let (remainder, (head, body)) = pair( -// terminated( -// separated_list1(self.parse_comma(), self.parse_atom()), -// self.parse_arrow(), -// ), -// cut(terminated( -// separated_list1(self.parse_comma(), self.parse_body_expression()), -// self.parse_dot(), -// )), -// )(input)?; - -// log::trace!(target: "parser", r#"found rule "{head:?}" :- "{body:?}""#); - -// let literals = body -// .iter() -// .filter_map(|expr| match expr { -// BodyExpression::Literal(l) => Some(l.clone()), -// _ => None, -// }) -// .collect(); -// let constraints = body -// .into_iter() -// .filter_map(|expr| match expr { -// BodyExpression::Constraint(c) => Some(c), -// _ => None, -// }) -// .collect(); -// Ok(( -// remainder, -// Rule::new_validated(head, literals, constraints) -// .map_err(|e| Err::Failure(e.at(input)))?, -// )) -// }, -// || ParseError::ExpectedRule, -// ), -// ) -// } + /// Parse an export directive. + fn parse_export(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_export", + delimited( + terminated(token("@export"), multispace_or_comment1), + cut(map(self.parse_import_export_spec(), ExportDirective::from)), + cut(self.parse_dot()), + ), + ) + } -// /// Parse an atom. -// fn parse_atom(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_atom", -// map_error( -// move |input| { -// let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; -// let (remainder, terms) = delimited( -// self.parse_open_parenthesis(), -// cut(separated_list1(self.parse_comma(), self.parse_term())), -// cut(self.parse_close_parenthesis()), -// )(remainder)?; - -// let predicate_name = predicate.name(); -// log::trace!(target: "parser", "found atom {predicate_name}({terms:?})"); - -// Ok((remainder, Atom::new(predicate, terms))) -// }, -// || ParseError::ExpectedAtom, -// ), -// ) -// } + /// Parse a statement. + fn parse_statement(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_statement", + map_error( + alt(( + map(self.parse_fact(), Statement::Fact), + map(self.parse_rule(), Statement::Rule), + )), + || ParseError::ExpectedStatement, + ), + ) + } -// /// Parse a [PrimitiveTerm]. -// fn parse_primitive_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_primitive_term", -// map_error( -// alt((parse_ground_term(&self.prefixes), self.parse_variable())), -// || ParseError::ExpectedPrimitiveTerm, -// ), -// ) -// } + /// Parse a fact. + fn parse_fact(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_fact", + map_error( + move |input| { + let (remainder, (predicate, terms)) = terminated( + pair( + self.parse_iri_like_identifier(), + self.parenthesised(separated_list1( + self.parse_comma(), + parse_ground_term(&self.prefixes), + )), + ), + self.parse_dot(), + )(input)?; + + let predicate_name = predicate.name(); + log::trace!(target: "parser", "found fact {predicate_name}({terms:?})"); + + // We do not allow complex term trees in facts for now + let terms = terms.into_iter().map(Term::Primitive).collect(); + + Ok((remainder, Fact(Atom::new(predicate, terms)))) + }, + || ParseError::ExpectedFact, + ), + ) + } -// /// Parse an aggregate term. -// fn parse_aggregate(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_aggregate", -// map_error( -// move |input| { -// let (remainder, _) = nom::character::complete::char('#')(input)?; -// let (remainder, aggregate_operation_identifier) = -// self.parse_bare_iri_like_identifier()(remainder)?; -// let (remainder, terms) = self -// .parenthesised(separated_list1(self.parse_comma(), self.parse_term()))( -// remainder, -// )?; - -// if let Some(logical_aggregate_operation) = -// (&aggregate_operation_identifier).into() -// { -// let aggregate = Aggregate { -// logical_aggregate_operation, -// terms, -// }; - -// Ok((remainder, Term::Aggregation(aggregate))) -// } else { -// Err(Err::Failure( -// ParseError::UnknownAggregateOperation( -// aggregate_operation_identifier.name(), -// ) -// .at(input), -// )) -// } -// }, -// || ParseError::ExpectedAggregate, -// ), -// ) -// } + /// Parse an IRI identifier, e.g. for predicate names. + fn parse_iri_identifier( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + map_error( + move |input| { + let (remainder, name) = traced( + "parse_iri_identifier", + alt(( + map(sparql::iriref, |name| sparql::Name::IriReference(&name)), + sparql::prefixed_name, + sparql::blank_node_label, + )), + )(input)?; -// /// Parse a variable. -// fn parse_variable(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_variable", -// map_error( -// map( -// alt(( -// self.parse_universal_variable(), -// self.parse_existential_variable(), -// )), -// PrimitiveTerm::Variable, -// ), -// || ParseError::ExpectedVariable, -// ), -// ) -// } + Ok(( + remainder, + Identifier( + resolve_prefixed_name(&self.prefixes.borrow(), name) + .map_err(|e| Err::Failure(e.at(input)))?, + ), + )) + }, + || ParseError::ExpectedIriIdentifier, + ) + } -// /// Parse a universally quantified variable. -// fn parse_universal_variable(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_universal_variable", -// map_error( -// map( -// preceded(token("?"), cut(self.parse_variable_name())), -// Variable::Universal, -// ), -// || ParseError::ExpectedUniversalVariable, -// ), -// ) -// } + /// Parse an IRI-like identifier. + /// + /// This is being used for: + /// * predicate names + /// * built-in functions in term trees + fn parse_iri_like_identifier( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_iri_like_identifier", + map_error( + alt(( + self.parse_iri_identifier(), + self.parse_bare_iri_like_identifier(), + )), + || ParseError::ExpectedIriLikeIdentifier, + ), + ) + } -// /// Parse an existentially quantified variable. -// fn parse_existential_variable( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_existential_variable", -// map_error( -// map( -// preceded(token("!"), cut(self.parse_variable_name())), -// Variable::Existential, -// ), -// || ParseError::ExpectedExistentialVariable, -// ), -// ) -// } + /// Parse a qualified predicate name – currently, this is a + /// predicate name together with its arity. + /// + /// FIXME: Obsolete. Can be removed in the future. + fn parse_qualified_predicate_name( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult<(Identifier, usize)> { + traced( + "parse_qualified_predicate_name", + pair( + self.parse_iri_like_identifier(), + preceded( + multispace_or_comment0, + delimited( + token("["), + cut(map_res(digit1, |number: Span<'a>| number.parse::())), + cut(token("]")), + ), + ), + ), + ) + } -// /// Parse a variable name. -// fn parse_variable_name(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_variable", -// map_error( -// move |input| { -// let (remainder, name) = parse_simple_name(input)?; + /// Parse an IRI-like identifier (e.g. a predicate name) that is not an IRI. + fn parse_bare_iri_like_identifier( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced("parse_bare_iri_like_identifier", move |input| { + let (remainder, name) = parse_bare_name(input)?; -// Ok((remainder, name.to_string())) -// }, -// || ParseError::ExpectedVariableName, -// ), -// ) -// } + Ok((remainder, Identifier(name.to_string()))) + }) + } -// /// Parse a literal (i.e., a possibly negated atom). -// fn parse_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_literal", -// map_error( -// alt((self.parse_negative_literal(), self.parse_positive_literal())), -// || ParseError::ExpectedLiteral, -// ), -// ) -// } + /// Parse a rule. + fn parse_rule(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_rule", + map_error( + move |input| { + let (remainder, (head, body)) = pair( + terminated( + separated_list1(self.parse_comma(), self.parse_atom()), + self.parse_arrow(), + ), + cut(terminated( + separated_list1(self.parse_comma(), self.parse_body_expression()), + self.parse_dot(), + )), + )(input)?; + + log::trace!(target: "parser", r#"found rule "{head:?}" :- "{body:?}""#); + + let literals = body + .iter() + .filter_map(|expr| match expr { + BodyExpression::Literal(l) => Some(l.clone()), + _ => None, + }) + .collect(); + let constraints = body + .into_iter() + .filter_map(|expr| match expr { + BodyExpression::Constraint(c) => Some(c), + _ => None, + }) + .collect(); + Ok(( + remainder, + Rule::new_validated(head, literals, constraints) + .map_err(|e| Err::Failure(e.at(input)))?, + )) + }, + || ParseError::ExpectedRule, + ), + ) + } -// /// Parse a non-negated literal. -// fn parse_positive_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_positive_literal", -// map_error(map(self.parse_atom(), Literal::Positive), || { -// ParseError::ExpectedPositiveLiteral -// }), -// ) -// } + /// Parse an atom. + fn parse_atom(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_atom", + map_error( + move |input| { + let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; + let (remainder, terms) = delimited( + self.parse_open_parenthesis(), + cut(separated_list1(self.parse_comma(), self.parse_term())), + cut(self.parse_close_parenthesis()), + )(remainder)?; + + let predicate_name = predicate.name(); + log::trace!(target: "parser", "found atom {predicate_name}({terms:?})"); + + Ok((remainder, Atom::new(predicate, terms))) + }, + || ParseError::ExpectedAtom, + ), + ) + } -// /// Parse a negated literal. -// fn parse_negative_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_negative_literal", -// map_error( -// map( -// preceded(self.parse_not(), cut(self.parse_atom())), -// Literal::Negative, -// ), -// || ParseError::ExpectedNegativeLiteral, -// ), -// ) -// } + /// Parse a [PrimitiveTerm]. + fn parse_primitive_term( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_primitive_term", + map_error( + alt((parse_ground_term(&self.prefixes), self.parse_variable())), + || ParseError::ExpectedPrimitiveTerm, + ), + ) + } -// /// Parse operation that is filters a variable -// fn parse_constraint_operator( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_constraint_operator", -// map_error( -// delimited( -// multispace_or_comment0, -// alt(( -// value(ConstraintOperator::LessThanEq, token("<=")), -// value(ConstraintOperator::LessThan, token("<")), -// value(ConstraintOperator::Equals, token("=")), -// value(ConstraintOperator::Unequals, token("!=")), -// value(ConstraintOperator::GreaterThanEq, token(">=")), -// value(ConstraintOperator::GreaterThan, token(">")), -// )), -// multispace_or_comment0, -// ), -// || ParseError::ExpectedFilterOperator, -// ), -// ) -// } + /// Parse an aggregate term. + fn parse_aggregate(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_aggregate", + map_error( + move |input| { + let (remainder, _) = nom::character::complete::char('#')(input)?; + let (remainder, aggregate_operation_identifier) = + self.parse_bare_iri_like_identifier()(remainder)?; + let (remainder, terms) = self + .parenthesised(separated_list1(self.parse_comma(), self.parse_term()))( + remainder, + )?; + + if let Some(logical_aggregate_operation) = + (&aggregate_operation_identifier).into() + { + let aggregate = Aggregate { + logical_aggregate_operation, + terms, + }; + + Ok((remainder, Term::Aggregation(aggregate))) + } else { + Err(Err::Failure( + ParseError::UnknownAggregateOperation( + aggregate_operation_identifier.name(), + ) + .at(input), + )) + } + }, + || ParseError::ExpectedAggregate, + ), + ) + } -// /// Parse a term tree. -// /// -// /// This may consist of: -// /// * A function term -// /// * An arithmetic expression, which handles e.g. precedence of addition over multiplication -// fn parse_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_term", -// map_error( -// move |input| { -// delimited( -// multispace_or_comment0, -// alt(( -// self.parse_arithmetic_expression(), -// // map(self.parse_constraint(), |c| c.as_binary_term()), -// self.parse_parenthesised_term(), -// self.parse_function_term(), -// self.parse_aggregate(), -// self.parse_wildcard(), -// )), -// multispace_or_comment0, -// )(input) -// }, -// || ParseError::ExpectedTerm, -// ), -// ) -// } + /// Parse a variable. + fn parse_variable(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_variable", + map_error( + map( + alt(( + self.parse_universal_variable(), + self.parse_existential_variable(), + )), + PrimitiveTerm::Variable, + ), + || ParseError::ExpectedVariable, + ), + ) + } -// /// Parse a wildcard variable. -// fn parse_wildcard(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_wildcard", -// map_res(space_delimited_token("_"), |_| { -// let wildcard = Variable::new_unamed(*self.wildcard_generator.borrow()); -// *self.wildcard_generator.borrow_mut() += 1; -// Ok::<_, ParseError>(Term::Primitive(PrimitiveTerm::Variable(wildcard))) -// }), -// ) -// } + /// Parse a universally quantified variable. + fn parse_universal_variable( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_universal_variable", + map_error( + map( + preceded(token("?"), cut(self.parse_variable_name())), + Variable::Universal, + ), + || ParseError::ExpectedUniversalVariable, + ), + ) + } -// /// Parse a parenthesised term tree. -// fn parse_parenthesised_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_parenthesised_term", -// map_error(self.parenthesised(self.parse_term()), || { -// ParseError::ExpectedParenthesisedTerm -// }), -// ) -// } + /// Parse an existentially quantified variable. + fn parse_existential_variable( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_existential_variable", + map_error( + map( + preceded(token("!"), cut(self.parse_variable_name())), + Variable::Existential, + ), + || ParseError::ExpectedExistentialVariable, + ), + ) + } -// /// Parse a function term, possibly with nested term trees. -// fn parse_function_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_function_term", -// map_error( -// move |input| { -// let (remainder, name) = self.parse_iri_like_identifier()(input)?; - -// if let Ok(op) = UnaryOperation::construct_from_name(&name.0) { -// let (remainder, subterm) = -// (self.parenthesised(self.parse_term()))(remainder)?; - -// Ok((remainder, Term::Unary(op, Box::new(subterm)))) -// } else if let Some(op) = BinaryOperation::construct_from_name(&name.0) { -// let (remainder, (left, _, right)) = (self.parenthesised(tuple(( -// self.parse_term(), -// self.parse_comma(), -// self.parse_term(), -// ))))(remainder)?; - -// Ok(( -// remainder, -// Term::Binary { -// operation: op, -// lhs: Box::new(left), -// rhs: Box::new(right), -// }, -// )) -// } else if let Some(op) = TernaryOperation::construct_from_name(&name.0) { -// let (remainder, (first, _, second, _, third)) = -// (self.parenthesised(tuple(( -// self.parse_term(), -// self.parse_comma(), -// self.parse_term(), -// self.parse_comma(), -// self.parse_term(), -// ))))(remainder)?; - -// Ok(( -// remainder, -// Term::Ternary { -// operation: op, -// first: Box::new(first), -// second: Box::new(second), -// third: Box::new(third), -// }, -// )) -// } else if let Some(op) = NaryOperation::construct_from_name(&name.0) { -// let (remainder, subterms) = (self.parenthesised(separated_list0( -// self.parse_comma(), -// self.parse_term(), -// )))(remainder)?; - -// Ok(( -// remainder, -// Term::Nary { -// operation: op, -// parameters: subterms, -// }, -// )) -// } else { -// let (remainder, subterms) = (self.parenthesised(separated_list0( -// self.parse_comma(), -// self.parse_term(), -// )))(remainder)?; - -// Ok((remainder, Term::Function(name, subterms))) -// } -// }, -// || ParseError::ExpectedFunctionTerm, -// ), -// ) -// } + /// Parse a variable name. + fn parse_variable_name(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_variable", + map_error( + move |input| { + let (remainder, name) = parse_simple_name(input)?; -// /// Parse an arithmetic expression -// fn parse_arithmetic_expression(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_arithmetic_expression", -// map_error( -// move |input| { -// let (remainder, first) = self.parse_arithmetic_product()(input)?; -// let (remainder, expressions) = many0(alt(( -// preceded( -// delimited(multispace_or_comment0, token("+"), multispace_or_comment0), -// map(self.parse_arithmetic_product(), |term| { -// (ArithmeticOperator::Addition, term) -// }), -// ), -// preceded( -// delimited(multispace_or_comment0, token("-"), multispace_or_comment0), -// map(self.parse_arithmetic_product(), |term| { -// (ArithmeticOperator::Subtraction, term) -// }), -// ), -// )))(remainder)?; - -// Ok(( -// remainder, -// Self::fold_arithmetic_expressions(first, expressions), -// )) -// }, -// || ParseError::ExpectedArithmeticExpression, -// ), -// ) -// } + Ok((remainder, name.to_string())) + }, + || ParseError::ExpectedVariableName, + ), + ) + } -// /// Parse an arithmetic product, i.e., an expression involving -// /// only `*` and `/` over subexpressions. -// fn parse_arithmetic_product(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_arithmetic_product", -// map_error( -// move |input| { -// let (remainder, first) = self.parse_arithmetic_factor()(input)?; -// let (remainder, factors) = many0(alt(( -// preceded( -// delimited(multispace_or_comment0, token("*"), multispace_or_comment0), -// map(self.parse_arithmetic_factor(), |term| { -// (ArithmeticOperator::Multiplication, term) -// }), -// ), -// preceded( -// delimited(multispace_or_comment0, token("/"), multispace_or_comment0), -// map(self.parse_arithmetic_factor(), |term| { -// (ArithmeticOperator::Division, term) -// }), -// ), -// )))(remainder)?; - -// Ok((remainder, Self::fold_arithmetic_expressions(first, factors))) -// }, -// || ParseError::ExpectedArithmeticProduct, -// ), -// ) -// } + /// Parse a literal (i.e., a possibly negated atom). + fn parse_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_literal", + map_error( + alt((self.parse_negative_literal(), self.parse_positive_literal())), + || ParseError::ExpectedLiteral, + ), + ) + } -// /// Parse an arithmetic factor. -// fn parse_arithmetic_factor(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_arithmetic_factor", -// map_error( -// alt(( -// self.parse_function_term(), -// self.parse_aggregate(), -// map(self.parse_primitive_term(), Term::Primitive), -// self.parse_parenthesised_term(), -// )), -// || ParseError::ExpectedArithmeticFactor, -// ), -// ) -// } + /// Parse a non-negated literal. + fn parse_positive_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_positive_literal", + map_error(map(self.parse_atom(), Literal::Positive), || { + ParseError::ExpectedPositiveLiteral + }), + ) + } -// /// Fold a sequence of ([ArithmeticOperator], [PrimitiveTerm]) pairs into a single [Term]. -// fn fold_arithmetic_expressions( -// initial: Term, -// sequence: Vec<(ArithmeticOperator, Term)>, -// ) -> Term { -// sequence.into_iter().fold(initial, |acc, pair| { -// let (operation, expression) = pair; - -// use ArithmeticOperator::*; - -// let operation = match operation { -// Addition => BinaryOperation::NumericAddition, -// Subtraction => BinaryOperation::NumericSubtraction, -// Multiplication => BinaryOperation::NumericMultiplication, -// Division => BinaryOperation::NumericDivision, -// }; + /// Parse a negated literal. + fn parse_negative_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_negative_literal", + map_error( + map( + preceded(self.parse_not(), cut(self.parse_atom())), + Literal::Negative, + ), + || ParseError::ExpectedNegativeLiteral, + ), + ) + } -// Term::Binary { -// operation, -// lhs: Box::new(acc), -// rhs: Box::new(expression), -// } -// }) -// } + /// Parse operation that is filters a variable + fn parse_constraint_operator( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_constraint_operator", + map_error( + delimited( + multispace_or_comment0, + alt(( + value(ConstraintOperator::LessThanEq, token("<=")), + value(ConstraintOperator::LessThan, token("<")), + value(ConstraintOperator::Equals, token("=")), + value(ConstraintOperator::Unequals, token("!=")), + value(ConstraintOperator::GreaterThanEq, token(">=")), + value(ConstraintOperator::GreaterThan, token(">")), + )), + multispace_or_comment0, + ), + || ParseError::ExpectedFilterOperator, + ), + ) + } -// /// Parse expression of the form ` ` expressing a constraint. -// fn parse_constraint(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_constraint", -// map_error( -// map( -// tuple(( -// self.parse_term(), -// self.parse_constraint_operator(), -// cut(self.parse_term()), -// )), -// |(lhs, operation, rhs)| operation.into_constraint(lhs, rhs), -// ), -// || ParseError::ExpectedConstraint, -// ), -// ) -// } + /// Parse a term tree. + /// + /// This may consist of: + /// * A function term + /// * An arithmetic expression, which handles e.g. precedence of addition over multiplication + fn parse_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_term", + map_error( + move |input| { + delimited( + multispace_or_comment0, + alt(( + self.parse_arithmetic_expression(), + // map(self.parse_constraint(), |c| c.as_binary_term()), + self.parse_parenthesised_term(), + self.parse_function_term(), + self.parse_aggregate(), + self.parse_wildcard(), + )), + multispace_or_comment0, + )(input) + }, + || ParseError::ExpectedTerm, + ), + ) + } -// /// Parse body expression -// fn parse_body_expression( -// &'a self, -// ) -> impl FnMut(Span<'a>) -> IntermediateResult { -// traced( -// "parse_body_expression", -// map_error( -// alt(( -// map(self.parse_constraint(), BodyExpression::Constraint), -// map(self.parse_literal(), BodyExpression::Literal), -// )), -// || ParseError::ExpectedBodyExpression, -// ), -// ) -// } + /// Parse a wildcard variable. + fn parse_wildcard(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_wildcard", + map_res(space_delimited_token("_"), |_| { + let wildcard = Variable::new_unamed(*self.wildcard_generator.borrow()); + *self.wildcard_generator.borrow_mut() += 1; + Ok::<_, ParseError>(Term::Primitive(PrimitiveTerm::Variable(wildcard))) + }), + ) + } -// /// Parse a program in the rules language. -// pub fn parse_program(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { -// fn check_for_invalid_statement<'a, F>( -// parser: &mut F, -// input: Span<'a>, -// ) -> IntermediateResult<'a, ()> -// where -// F: FnMut(Span<'a>) -> IntermediateResult, -// { -// if let Ok((_, e)) = parser(input) { -// return Err(Err::Failure(e.at(input))); -// } + /// Parse a parenthesised term tree. + fn parse_parenthesised_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_parenthesised_term", + map_error(self.parenthesised(self.parse_term()), || { + ParseError::ExpectedParenthesisedTerm + }), + ) + } -// Ok((input, ())) -// } + /// Parse a function term, possibly with nested term trees. + fn parse_function_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_function_term", + map_error( + move |input| { + let (remainder, name) = self.parse_iri_like_identifier()(input)?; + + if let Ok(op) = UnaryOperation::construct_from_name(&name.0) { + let (remainder, subterm) = + (self.parenthesised(self.parse_term()))(remainder)?; + + Ok((remainder, Term::Unary(op, Box::new(subterm)))) + } else if let Some(op) = BinaryOperation::construct_from_name(&name.0) { + let (remainder, (left, _, right)) = + (self.parenthesised(tuple(( + self.parse_term(), + self.parse_comma(), + self.parse_term(), + ))))(remainder)?; + + Ok(( + remainder, + Term::Binary { + operation: op, + lhs: Box::new(left), + rhs: Box::new(right), + }, + )) + } else if let Some(op) = TernaryOperation::construct_from_name(&name.0) { + let (remainder, (first, _, second, _, third)) = + (self.parenthesised(tuple(( + self.parse_term(), + self.parse_comma(), + self.parse_term(), + self.parse_comma(), + self.parse_term(), + ))))(remainder)?; + + Ok(( + remainder, + Term::Ternary { + operation: op, + first: Box::new(first), + second: Box::new(second), + third: Box::new(third), + }, + )) + } else if let Some(op) = NaryOperation::construct_from_name(&name.0) { + let (remainder, subterms) = (self.parenthesised(separated_list0( + self.parse_comma(), + self.parse_term(), + )))(remainder)?; + + Ok(( + remainder, + Term::Nary { + operation: op, + parameters: subterms, + }, + )) + } else { + let (remainder, subterms) = (self.parenthesised(separated_list0( + self.parse_comma(), + self.parse_term(), + )))(remainder)?; + + Ok((remainder, Term::Function(name, subterms))) + } + }, + || ParseError::ExpectedFunctionTerm, + ), + ) + } -// traced("parse_program", move |input| { -// let (remainder, _) = multispace_or_comment0(input)?; -// let (remainder, _) = opt(self.parse_base())(remainder)?; - -// check_for_invalid_statement( -// &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), -// remainder, -// )?; - -// let (remainder, _) = many0(self.parse_prefix())(remainder)?; - -// check_for_invalid_statement( -// &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), -// remainder, -// )?; -// check_for_invalid_statement( -// &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), -// remainder, -// )?; - -// let mut statements = Vec::new(); -// let mut output_predicates = Vec::new(); -// let mut sources = Vec::new(); -// let mut imports = Vec::new(); -// let mut exports = Vec::new(); - -// let (remainder, _) = many0(alt(( -// map(self.parse_source(), |source| sources.push(source)), -// map(self.parse_import(), |import| imports.push(import)), -// map(self.parse_export(), |export| exports.push(export)), -// map(self.parse_statement(), |statement| { -// statements.push(statement) -// }), -// map(self.parse_output_directive(), |output_predicate| { -// output_predicates.push(output_predicate) -// }), -// )))(remainder)?; - -// check_for_invalid_statement( -// &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), -// remainder, -// )?; -// check_for_invalid_statement( -// &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), -// remainder, -// )?; - -// let base = self.base().map(String::from); -// let prefixes = self -// .prefixes -// .borrow() -// .iter() -// .map(|(&prefix, &iri)| (prefix.to_string(), iri.to_string())) -// .collect::>(); -// let mut rules = Vec::new(); -// let mut facts = Vec::new(); - -// statements.iter().for_each(|statement| match statement { -// Statement::Fact(value) => facts.push(value.clone()), -// Statement::Rule(value) => rules.push(value.clone()), -// }); - -// let mut program_builder = Program::builder() -// .prefixes(prefixes) -// .imports(sources) -// .imports(imports) -// .exports(exports) -// .rules(rules) -// .facts(facts); - -// if let Some(base) = base { -// program_builder = program_builder.base(base); -// } + /// Parse an arithmetic expression + fn parse_arithmetic_expression( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_arithmetic_expression", + map_error( + move |input| { + let (remainder, first) = self.parse_arithmetic_product()(input)?; + let (remainder, expressions) = many0(alt(( + preceded( + delimited( + multispace_or_comment0, + token("+"), + multispace_or_comment0, + ), + map(self.parse_arithmetic_product(), |term| { + (ArithmeticOperator::Addition, term) + }), + ), + preceded( + delimited( + multispace_or_comment0, + token("-"), + multispace_or_comment0, + ), + map(self.parse_arithmetic_product(), |term| { + (ArithmeticOperator::Subtraction, term) + }), + ), + )))(remainder)?; -// if !output_predicates.is_empty() { -// program_builder = program_builder.output_predicates(output_predicates); -// } + Ok(( + remainder, + Self::fold_arithmetic_expressions(first, expressions), + )) + }, + || ParseError::ExpectedArithmeticExpression, + ), + ) + } -// Ok((remainder, program_builder.build())) -// }) -// } + /// Parse an arithmetic product, i.e., an expression involving + /// only `*` and `/` over subexpressions. + fn parse_arithmetic_product(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_arithmetic_product", + map_error( + move |input| { + let (remainder, first) = self.parse_arithmetic_factor()(input)?; + let (remainder, factors) = many0(alt(( + preceded( + delimited( + multispace_or_comment0, + token("*"), + multispace_or_comment0, + ), + map(self.parse_arithmetic_factor(), |term| { + (ArithmeticOperator::Multiplication, term) + }), + ), + preceded( + delimited( + multispace_or_comment0, + token("/"), + multispace_or_comment0, + ), + map(self.parse_arithmetic_factor(), |term| { + (ArithmeticOperator::Division, term) + }), + ), + )))(remainder)?; -// /// Return the declared base, if set, or None. -// #[must_use] -// fn base(&self) -> Option<&'a str> { -// *self.base.borrow() -// } -// } + Ok((remainder, Self::fold_arithmetic_expressions(first, factors))) + }, + || ParseError::ExpectedArithmeticProduct, + ), + ) + } -// #[cfg(test)] -// mod test { -// use super::*; -// use std::assert_matches::assert_matches; -// use test_log::test; - -// macro_rules! assert_parse { -// ($parser:expr, $left:expr, $right:expr $(,) ?) => { -// assert_eq!( -// all_input_consumed($parser)($left).expect( -// format!("failed to parse `{:?}`\nexpected `{:?}`", $left, $right).as_str() -// ), -// $right -// ); -// }; -// } + /// Parse an arithmetic factor. + fn parse_arithmetic_factor(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_arithmetic_factor", + map_error( + alt(( + self.parse_function_term(), + self.parse_aggregate(), + map(self.parse_primitive_term(), Term::Primitive), + self.parse_parenthesised_term(), + )), + || ParseError::ExpectedArithmeticFactor, + ), + ) + } -// macro_rules! assert_fails { -// ($parser:expr, $left:expr, $right:pat $(,) ?) => {{ -// // Store in intermediate variable to prevent from being dropped too early -// let result = all_input_consumed($parser)($left); -// assert_matches!(result, Err($right)) -// }}; -// } + /// Fold a sequence of ([ArithmeticOperator], [PrimitiveTerm]) pairs into a single [Term]. + fn fold_arithmetic_expressions( + initial: Term, + sequence: Vec<(ArithmeticOperator, Term)>, + ) -> Term { + sequence.into_iter().fold(initial, |acc, pair| { + let (operation, expression) = pair; + + use ArithmeticOperator::*; + + let operation = match operation { + Addition => BinaryOperation::NumericAddition, + Subtraction => BinaryOperation::NumericSubtraction, + Multiplication => BinaryOperation::NumericMultiplication, + Division => BinaryOperation::NumericDivision, + }; -// macro_rules! assert_parse_error { -// ($parser:expr, $left:expr, $right:pat $(,) ?) => { -// assert_fails!($parser, $left, LocatedParseError { source: $right, .. }) -// }; -// } + Term::Binary { + operation, + lhs: Box::new(acc), + rhs: Box::new(expression), + } + }) + } -// macro_rules! assert_expected_token { -// ($parser:expr, $left:expr, $right:expr $(,) ?) => { -// let _token = String::from($right); -// assert_parse_error!($parser, $left, ParseError::ExpectedToken(_token),); -// }; -// } + /// Parse expression of the form ` ` expressing a constraint. + fn parse_constraint(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_constraint", + map_error( + map( + tuple(( + self.parse_term(), + self.parse_constraint_operator(), + cut(self.parse_term()), + )), + |(lhs, operation, rhs)| operation.into_constraint(lhs, rhs), + ), + || ParseError::ExpectedConstraint, + ), + ) + } -// #[test] -// fn base_directive() { -// let base = "http://example.org/foo"; -// let input = format!("@base <{base}> ."); -// let parser = RuleParser::new(); -// let b = Identifier(base.to_string()); -// assert!(parser.base().is_none()); -// assert_parse!(parser.parse_base(), input.as_str(), b); -// assert_eq!(parser.base(), Some(base)); -// } + /// Parse body expression + fn parse_body_expression( + &'a self, + ) -> impl FnMut(Span<'a>) -> IntermediateResult { + traced( + "parse_body_expression", + map_error( + alt(( + map(self.parse_constraint(), BodyExpression::Constraint), + map(self.parse_literal(), BodyExpression::Literal), + )), + || ParseError::ExpectedBodyExpression, + ), + ) + } -// #[test] -// fn prefix_directive() { -// let prefix = unsafe { Span::new_from_raw_offset(8, 1, "foo", ()) }; -// let iri = "http://example.org/foo"; -// let input = format!("@prefix {prefix}: <{iri}> ."); -// let parser = RuleParser::new(); -// assert!(resolve_prefix(&parser.prefixes.borrow(), &prefix).is_err()); -// assert_parse!(parser.parse_prefix(), input.as_str(), prefix); -// assert_eq!( -// resolve_prefix(&parser.prefixes.borrow(), &prefix).map_err(|_| ()), -// Ok(iri) -// ); -// } + /// Parse a program in the rules language. + pub fn parse_program(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { + fn check_for_invalid_statement<'a, F>( + parser: &mut F, + input: Span<'a>, + ) -> IntermediateResult<'a, ()> + where + F: FnMut(Span<'a>) -> IntermediateResult, + { + if let Ok((_, e)) = parser(input) { + return Err(Err::Failure(e.at(input))); + } -// #[test] -// #[cfg_attr(miri, ignore)] -// fn source() { -// /// Helper function to create source-like imports -// fn csv_import(predicate: Identifier, filename: &str, arity: i64) -> ImportDirective { -// let attributes = MapDataValue::from_iter([ -// ( -// AnyDataValue::new_iri(PARAMETER_NAME_RESOURCE.to_string()), -// AnyDataValue::new_plain_string(filename.to_string()), -// ), -// ( -// AnyDataValue::new_iri(PARAMETER_NAME_FORMAT.to_string()), -// TupleDataValue::from_iter( -// vec![ -// VALUE_FORMAT_ANY; -// usize::try_from(arity).expect("required for these tests") -// ] -// .iter() -// .map(|format| AnyDataValue::new_plain_string((*format).to_string())) -// .collect::>(), -// ) -// .into(), -// ), -// ]); -// ImportDirective::from(ImportExportDirective { -// predicate, -// format: FileFormat::CSV, -// attributes, -// }) -// } + Ok((input, ())) + } -// let parser = RuleParser::new(); -// let file = "drinks.csv"; -// let predicate_name = "drink"; -// let predicate = Identifier(predicate_name.to_string()); -// let default_import = csv_import(predicate.clone(), file, 1); - -// // rulewerk accepts all of these variants -// let input = format!(r#"@source {predicate_name}[1]: load-csv("{file}") ."#); -// assert_parse!(parser.parse_source(), &input, default_import); -// let input = format!(r#"@source {predicate_name}[1] : load-csv("{file}") ."#); -// assert_parse!(parser.parse_source(), &input, default_import); -// let input = format!(r#"@source {predicate_name}[1] : load-csv ( "{file}" ) ."#); -// assert_parse!(parser.parse_source(), &input, default_import); -// let input = format!(r#"@source {predicate_name} [1] : load-csv ( "{file}" ) ."#); -// assert_parse!(parser.parse_source(), &input, default_import); -// } + traced("parse_program", move |input| { + let (remainder, _) = multispace_or_comment0(input)?; + let (remainder, _) = opt(self.parse_base())(remainder)?; + + check_for_invalid_statement( + &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), + remainder, + )?; + + let (remainder, _) = many0(self.parse_prefix())(remainder)?; + + check_for_invalid_statement( + &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), + remainder, + )?; + check_for_invalid_statement( + &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), + remainder, + )?; + + let mut statements = Vec::new(); + let mut output_predicates = Vec::new(); + let mut sources = Vec::new(); + let mut imports = Vec::new(); + let mut exports = Vec::new(); + + let (remainder, _) = many0(alt(( + map(self.parse_source(), |source| sources.push(source)), + map(self.parse_import(), |import| imports.push(import)), + map(self.parse_export(), |export| exports.push(export)), + map(self.parse_statement(), |statement| { + statements.push(statement) + }), + map(self.parse_output_directive(), |output_predicate| { + output_predicates.push(output_predicate) + }), + )))(remainder)?; + + check_for_invalid_statement( + &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), + remainder, + )?; + check_for_invalid_statement( + &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), + remainder, + )?; + + let base = self.base().map(String::from); + let prefixes = self + .prefixes + .borrow() + .iter() + .map(|(&prefix, &iri)| (prefix.to_string(), iri.to_string())) + .collect::>(); + let mut rules = Vec::new(); + let mut facts = Vec::new(); + + statements.iter().for_each(|statement| match statement { + Statement::Fact(value) => facts.push(value.clone()), + Statement::Rule(value) => rules.push(value.clone()), + }); + + let mut program_builder = Program::builder() + .prefixes(prefixes) + .imports(sources) + .imports(imports) + .exports(exports) + .rules(rules) + .facts(facts); + + if let Some(base) = base { + program_builder = program_builder.base(base); + } -// #[test] -// fn fact() { -// let parser = RuleParser::new(); -// let predicate = "p"; -// let value = "foo"; -// let datatype = "bar"; -// let p = Identifier(predicate.to_string()); -// let v = value.to_string(); -// let t = datatype.to_string(); -// let fact = format!(r#"{predicate}("{value}"^^<{datatype}>) ."#); - -// let expected_fact = Fact(Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_from_typed_literal(v, t).expect("unknown types should work"), -// ))], -// )); - -// assert_parse!(parser.parse_fact(), &fact, expected_fact,); -// } + if !output_predicates.is_empty() { + program_builder = program_builder.output_predicates(output_predicates); + } -// #[test] -// fn fact_namespaced() { -// let parser = RuleParser::new(); -// let predicate = "p"; -// let name = "foo"; -// let prefix = unsafe { Span::new_from_raw_offset(8, 1, "eg", ()) }; -// let iri = "http://example.org/foo"; -// let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); -// let p = Identifier(predicate.to_string()); -// let pn = format!("{prefix}:{name}"); -// let v = format!("{iri}{name}"); -// let fact = format!(r#"{predicate}({pn}) ."#); - -// assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); - -// let expected_fact = Fact(Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_iri(v), -// ))], -// )); - -// assert_parse!(parser.parse_fact(), &fact, expected_fact,); -// } + Ok((remainder, program_builder.build())) + }) + } -// #[test] -// fn fact_bnode() { -// let parser = RuleParser::new(); -// let predicate = "p"; -// let name = "foo"; -// let p = Identifier(predicate.to_string()); -// let pn = format!("_:{name}"); -// let fact = format!(r#"{predicate}({pn}) ."#); - -// let expected_fact = Fact(Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_iri(pn), -// ))], -// )); - -// assert_parse!(parser.parse_fact(), &fact, expected_fact,); -// } + /// Return the declared base, if set, or None. + #[must_use] + fn base(&self) -> Option<&'a str> { + *self.base.borrow() + } + } -// #[test] -// fn fact_numbers() { -// let parser = RuleParser::new(); -// let predicate = "p"; -// let p = Identifier(predicate.to_string()); -// let int = 23_i64; -// let dbl = 42.0; -// let dec = 13.37; -// let fact = format!(r#"{predicate}({int}, {dbl:.1}E0, {dec:.2}) ."#); - -// let expected_fact = Fact(Atom::new( -// p, -// vec![ -// Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(int), -// )), -// Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_double_from_f64(dbl).expect("is not NaN"), -// )), -// Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_double_from_f64(dec).expect("is not NaN"), -// )), -// ], -// )); - -// assert_parse!(parser.parse_fact(), &fact, expected_fact,); -// } + #[cfg(test)] + mod test { + use super::*; + use std::assert_matches::assert_matches; + use test_log::test; + + macro_rules! assert_parse { + ($parser:expr, $left:expr, $right:expr $(,) ?) => { + assert_eq!( + all_input_consumed($parser)($left).expect( + format!("failed to parse `{:?}`\nexpected `{:?}`", $left, $right).as_str() + ), + $right + ); + }; + } -// #[test] -// fn fact_rdf_literal_xsd_string() { -// let parser = RuleParser::new(); + macro_rules! assert_fails { + ($parser:expr, $left:expr, $right:pat $(,) ?) => {{ + // Store in intermediate variable to prevent from being dropped too early + let result = all_input_consumed($parser)($left); + assert_matches!(result, Err($right)) + }}; + } -// let prefix = unsafe { Span::new_from_raw_offset(8, 1, "xsd", ()) }; -// let iri = "http://www.w3.org/2001/XMLSchema#"; -// let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); + macro_rules! assert_parse_error { + ($parser:expr, $left:expr, $right:pat $(,) ?) => { + assert_fails!($parser, $left, LocatedParseError { source: $right, .. }) + }; + } -// assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); + macro_rules! assert_expected_token { + ($parser:expr, $left:expr, $right:expr $(,) ?) => { + let _token = String::from($right); + assert_parse_error!($parser, $left, ParseError::ExpectedToken(_token),); + }; + } -// let predicate = "p"; -// let value = "my nice string"; -// let datatype = "xsd:string"; + #[test] + fn base_directive() { + let base = "http://example.org/foo"; + let input = format!("@base <{base}> ."); + let parser = RuleParser::new(); + let b = Identifier(base.to_string()); + assert!(parser.base().is_none()); + assert_parse!(parser.parse_base(), input.as_str(), b); + assert_eq!(parser.base(), Some(base)); + } -// let p = Identifier(predicate.to_string()); -// let v = value.to_string(); -// let fact = format!(r#"{predicate}("{value}"^^{datatype}) ."#); + #[test] + fn prefix_directive() { + let prefix = unsafe { Span::new_from_raw_offset(8, 1, "foo", ()) }; + let iri = "http://example.org/foo"; + let input = format!("@prefix {prefix}: <{iri}> ."); + let parser = RuleParser::new(); + assert!(resolve_prefix(&parser.prefixes.borrow(), &prefix).is_err()); + assert_parse!(parser.parse_prefix(), input.as_str(), prefix); + assert_eq!( + resolve_prefix(&parser.prefixes.borrow(), &prefix).map_err(|_| ()), + Ok(iri) + ); + } -// let expected_fact = Fact(Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_plain_string(v), -// ))], -// )); + #[test] + #[cfg_attr(miri, ignore)] + fn source() { + /// Helper function to create source-like imports + fn csv_import(predicate: Identifier, filename: &str, arity: i64) -> ImportDirective { + let attributes = MapDataValue::from_iter([ + ( + AnyDataValue::new_iri(PARAMETER_NAME_RESOURCE.to_string()), + AnyDataValue::new_plain_string(filename.to_string()), + ), + ( + AnyDataValue::new_iri(PARAMETER_NAME_FORMAT.to_string()), + TupleDataValue::from_iter( + vec![ + VALUE_FORMAT_ANY; + usize::try_from(arity).expect("required for these tests") + ] + .iter() + .map(|format| AnyDataValue::new_plain_string((*format).to_string())) + .collect::>(), + ) + .into(), + ), + ]); + ImportDirective::from(ImportExportDirective { + predicate, + format: FileFormat::CSV, + attributes, + }) + } -// assert_parse!(parser.parse_fact(), &fact, expected_fact,); -// } + let parser = RuleParser::new(); + let file = "drinks.csv"; + let predicate_name = "drink"; + let predicate = Identifier(predicate_name.to_string()); + let default_import = csv_import(predicate.clone(), file, 1); + + // rulewerk accepts all of these variants + let input = format!(r#"@source {predicate_name}[1]: load-csv("{file}") ."#); + assert_parse!(parser.parse_source(), &input, default_import); + let input = format!(r#"@source {predicate_name}[1] : load-csv("{file}") ."#); + assert_parse!(parser.parse_source(), &input, default_import); + let input = format!(r#"@source {predicate_name}[1] : load-csv ( "{file}" ) ."#); + assert_parse!(parser.parse_source(), &input, default_import); + let input = format!(r#"@source {predicate_name} [1] : load-csv ( "{file}" ) ."#); + assert_parse!(parser.parse_source(), &input, default_import); + } -// #[test] -// fn fact_string_literal() { -// let parser = RuleParser::new(); -// let predicate = "p"; -// let value = "my nice string"; -// let p = Identifier(predicate.to_string()); -// let v = value.to_string(); -// let fact = format!(r#"{predicate}("{value}") ."#); - -// let expected_fact = Fact(Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_plain_string(v), -// ))], -// )); - -// assert_parse!(parser.parse_fact(), &fact, expected_fact,); -// } + #[test] + fn fact() { + let parser = RuleParser::new(); + let predicate = "p"; + let value = "foo"; + let datatype = "bar"; + let p = Identifier(predicate.to_string()); + let v = value.to_string(); + let t = datatype.to_string(); + let fact = format!(r#"{predicate}("{value}"^^<{datatype}>) ."#); + + let expected_fact = Fact(Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_from_typed_literal(v, t).expect("unknown types should work"), + ))], + )); + + assert_parse!(parser.parse_fact(), &fact, expected_fact,); + } -// #[test] -// fn fact_language_string() { -// let parser = RuleParser::new(); -// let predicate = "p"; -// let v = "Qapla"; -// let langtag = "tlh"; -// let p = Identifier(predicate.to_string()); -// let value = v.to_string(); -// let fact = format!(r#"{predicate}("{v}"@{langtag}) ."#); -// let tag = langtag.to_string(); - -// let expected_fact = Fact(Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_language_tagged_string(value, tag), -// ))], -// )); - -// assert_parse!(parser.parse_fact(), &fact, expected_fact); -// } + #[test] + fn fact_namespaced() { + let parser = RuleParser::new(); + let predicate = "p"; + let name = "foo"; + let prefix = unsafe { Span::new_from_raw_offset(8, 1, "eg", ()) }; + let iri = "http://example.org/foo"; + let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); + let p = Identifier(predicate.to_string()); + let pn = format!("{prefix}:{name}"); + let v = format!("{iri}{name}"); + let fact = format!(r#"{predicate}({pn}) ."#); + + assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); + + let expected_fact = Fact(Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_iri(v), + ))], + )); + + assert_parse!(parser.parse_fact(), &fact, expected_fact,); + } -// #[test] -// fn fact_abstract() { -// let parser = RuleParser::new(); -// let predicate = "p"; -// let name = "a"; -// let p = Identifier(predicate.to_string()); -// let fact = format!(r#"{predicate}({name}) ."#); - -// let expected_fact = Fact(Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_iri(name.to_string()), -// ))], -// )); - -// assert_parse!(parser.parse_fact(), &fact, expected_fact,); -// } + #[test] + fn fact_bnode() { + let parser = RuleParser::new(); + let predicate = "p"; + let name = "foo"; + let p = Identifier(predicate.to_string()); + let pn = format!("_:{name}"); + let fact = format!(r#"{predicate}({pn}) ."#); + + let expected_fact = Fact(Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_iri(pn), + ))], + )); + + assert_parse!(parser.parse_fact(), &fact, expected_fact,); + } -// #[test] -// fn fact_comment() { -// let parser = RuleParser::new(); -// let predicate = "p"; -// let value = "foo"; -// let datatype = "bar"; -// let p = Identifier(predicate.to_string()); -// let v = value.to_string(); -// let t = datatype.to_string(); -// let fact = format!( -// r#"{predicate}(% comment 1 -// "{value}"^^<{datatype}> % comment 2 -// ) % comment 3 -// . % comment 4 -// %"# -// ); - -// let expected_fact = Fact(Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_from_typed_literal(v, t) -// .expect("unknown datatype should always work"), -// ))], -// )); - -// assert_parse!(parser.parse_fact(), &fact, expected_fact,); -// } + #[test] + fn fact_numbers() { + let parser = RuleParser::new(); + let predicate = "p"; + let p = Identifier(predicate.to_string()); + let int = 23_i64; + let dbl = 42.0; + let dec = 13.37; + let fact = format!(r#"{predicate}({int}, {dbl:.1}E0, {dec:.2}) ."#); + + let expected_fact = Fact(Atom::new( + p, + vec![ + Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(int), + )), + Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_double_from_f64(dbl).expect("is not NaN"), + )), + Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_double_from_f64(dec).expect("is not NaN"), + )), + ], + )); -// #[test] -// #[cfg_attr(miri, ignore)] -// fn filter() { -// let parser = RuleParser::new(); -// let aa = "A"; -// let a = Identifier(aa.to_string()); -// let bb = "B"; -// let b = Identifier(bb.to_string()); -// let pp = "P"; -// let p = Identifier(pp.to_string()); -// let xx = "X"; -// let x = xx.to_string(); -// let yy = "Y"; -// let y = yy.to_string(); -// let zz = "Z"; -// let z = zz.to_string(); - -// let rule = format!( -// "{pp}(?{xx}) :- {aa}(?{xx}, ?{yy}), ?{yy} > ?{xx}, {bb}(?{zz}), ?{xx} = 3, ?{zz} < 7, ?{xx} <= ?{zz}, ?{zz} >= ?{yy} ." -// ); - -// let expected_rule = Rule::new( -// vec![Atom::new( -// p, -// vec![Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal(x.clone()), -// ))], -// )], -// vec![ -// Literal::Positive(Atom::new( -// a, -// vec![ -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y.clone()))), -// ], -// )), -// Literal::Positive(Atom::new( -// b, -// vec![Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal(z.clone()), -// ))], -// )), -// ], -// vec![ -// Constraint::GreaterThan( -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y.clone()))), -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), -// ), -// Constraint::Equals( -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), -// Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(3), -// )), -// ), -// Constraint::LessThan( -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), -// Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(7), -// )), -// ), -// Constraint::LessThanEq( -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x))), -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), -// ), -// Constraint::GreaterThanEq( -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z))), -// Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y))), -// ), -// ], -// ); - -// assert_parse!(parser.parse_rule(), &rule, expected_rule,); -// } + assert_parse!(parser.parse_fact(), &fact, expected_fact,); + } -// #[test] -// #[allow(clippy::redundant_clone)] -// fn parse_output() { -// let parser = RuleParser::new(); + #[test] + fn fact_rdf_literal_xsd_string() { + let parser = RuleParser::new(); -// let j2 = Identifier("J2".to_string()); + let prefix = unsafe { Span::new_from_raw_offset(8, 1, "xsd", ()) }; + let iri = "http://www.w3.org/2001/XMLSchema#"; + let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); -// assert_parse!(parser.parse_output_directive(), "@output J2 .", j2.clone()); -// assert_parse_error!( -// parser.parse_output_directive(), -// "@output J2[3] .", -// ParseError::ExpectedOutputDeclaration -// ); -// } + assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); -// #[test] -// fn parse_errors() { -// let parser = RuleParser::new(); - -// assert_expected_token!(parser.parse_dot(), "", "."); -// assert_expected_token!(parser.parse_dot(), ":-", "."); -// assert_expected_token!(parser.parse_comma(), "", ","); -// assert_expected_token!(parser.parse_comma(), ":-", ","); -// assert_expected_token!(parser.parse_not(), "", "~"); -// assert_expected_token!(parser.parse_not(), ":-", "~"); -// assert_expected_token!(parser.parse_arrow(), "", ":-"); -// assert_expected_token!(parser.parse_arrow(), "-:", ":-"); -// assert_expected_token!(parser.parse_open_parenthesis(), "", "("); -// assert_expected_token!(parser.parse_open_parenthesis(), "-:", "("); -// assert_expected_token!(parser.parse_close_parenthesis(), "", ")"); -// assert_expected_token!(parser.parse_close_parenthesis(), "-:", ")"); - -// assert_parse_error!( -// parser.parse_base(), -// "@base . @base .", -// ParseError::LateBaseDeclaration -// ); - -// assert_parse_error!( -// parser.parse_program(), -// "@prefix f: . @base .", -// ParseError::LateBaseDeclaration -// ); - -// assert_parse_error!( -// parser.parse_program(), -// "@output p . @base .", -// ParseError::LateBaseDeclaration -// ); - -// assert_parse_error!( -// parser.parse_program(), -// "@output p . @prefix g: .", -// ParseError::LatePrefixDeclaration -// ); -// } -// #[test] -// #[cfg_attr(miri, ignore)] -// fn parse_function_terms() { -// let parser = RuleParser::new(); - -// let twenty_three = Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(23), -// )); -// let fourty_two = Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(42), -// )); -// let twenty_three_times_fourty_two = Term::Binary { -// operation: BinaryOperation::NumericMultiplication, -// lhs: Box::new(twenty_three.clone()), -// rhs: Box::new(fourty_two.clone()), -// }; - -// assert_parse_error!( -// parser.parse_function_term(), -// "", -// ParseError::ExpectedFunctionTerm -// ); - -// let nullary_function = Term::Function(Identifier(String::from("nullary_function")), vec![]); -// assert_parse!( -// parser.parse_function_term(), -// "nullary_function()", -// nullary_function -// ); -// assert_parse!( -// parser.parse_function_term(), -// "nullary_function( )", -// nullary_function -// ); -// assert_parse_error!( -// parser.parse_function_term(), -// "nullary_function( () )", -// ParseError::ExpectedFunctionTerm -// ); - -// let unary_function = Term::Function( -// Identifier(String::from("unary_function")), -// vec![fourty_two.clone()], -// ); -// assert_parse!( -// parser.parse_function_term(), -// "unary_function(42)", -// unary_function -// ); -// assert_parse!( -// parser.parse_function_term(), -// "unary_function((42))", -// unary_function -// ); -// assert_parse!( -// parser.parse_function_term(), -// "unary_function(( (42 )))", -// unary_function -// ); - -// let binary_function = Term::Function( -// Identifier(String::from("binary_function")), -// vec![fourty_two.clone(), twenty_three.clone()], -// ); -// assert_parse!( -// parser.parse_function_term(), -// "binary_function(42, 23)", -// binary_function -// ); - -// let function_with_nested_algebraic_expression = Term::Function( -// Identifier(String::from("function")), -// vec![twenty_three_times_fourty_two], -// ); -// assert_parse!( -// parser.parse_function_term(), -// "function( 23 *42)", -// function_with_nested_algebraic_expression -// ); - -// let nested_function = Term::Function( -// Identifier(String::from("nested_function")), -// vec![nullary_function.clone()], -// ); - -// assert_parse!( -// parser.parse_function_term(), -// "nested_function(nullary_function())", -// nested_function -// ); - -// let triple_nested_function = Term::Function( -// Identifier(String::from("nested_function")), -// vec![Term::Function( -// Identifier(String::from("nested_function")), -// vec![Term::Function( -// Identifier(String::from("nested_function")), -// vec![nullary_function.clone()], -// )], -// )], -// ); -// assert_parse!( -// parser.parse_function_term(), -// "nested_function( nested_function( (nested_function(nullary_function()) ) ))", -// triple_nested_function -// ); -// } + let expected_fact = Fact(Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_plain_string(v), + ))], + )); -// #[test] -// fn parse_terms() { -// let parser = RuleParser::new(); + assert_parse!(parser.parse_fact(), &fact, expected_fact,); + } -// assert_parse_error!(parser.parse_term(), "", ParseError::ExpectedTerm); + #[test] + fn fact_string_literal() { + let parser = RuleParser::new(); + let predicate = "p"; + let value = "my nice string"; + let p = Identifier(predicate.to_string()); + let v = value.to_string(); + let fact = format!(r#"{predicate}("{value}") ."#); + + let expected_fact = Fact(Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_plain_string(v), + ))], + )); + + assert_parse!(parser.parse_fact(), &fact, expected_fact,); + } -// assert_parse!( -// parser.parse_term(), -// "constant", -// Term::Primitive(PrimitiveTerm::GroundTerm(AnyDataValue::new_iri( -// String::from("constant") -// ))) -// ); -// } + #[test] + fn fact_language_string() { + let parser = RuleParser::new(); + let predicate = "p"; + let v = "Qapla"; + let langtag = "tlh"; + let p = Identifier(predicate.to_string()); + let value = v.to_string(); + let fact = format!(r#"{predicate}("{v}"@{langtag}) ."#); + let tag = langtag.to_string(); + + let expected_fact = Fact(Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_language_tagged_string(value, tag), + ))], + )); + + assert_parse!(parser.parse_fact(), &fact, expected_fact); + } -// #[test] -// fn parse_aggregates() { -// let parser = RuleParser::new(); - -// assert_parse_error!(parser.parse_aggregate(), "", ParseError::ExpectedAggregate); - -// assert_parse!( -// parser.parse_aggregate(), -// "#min(?VARIABLE)", -// Term::Aggregation(Aggregate { -// logical_aggregate_operation: LogicalAggregateOperation::MinNumber, -// terms: vec![Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal(String::from("VARIABLE")) -// ))] -// }) -// ); - -// assert_parse_error!( -// parser.parse_aggregate(), -// "#test(?VAR1, ?VAR2)", -// ParseError::ExpectedAggregate -// ) -// } + #[test] + fn fact_abstract() { + let parser = RuleParser::new(); + let predicate = "p"; + let name = "a"; + let p = Identifier(predicate.to_string()); + let fact = format!(r#"{predicate}({name}) ."#); + + let expected_fact = Fact(Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_iri(name.to_string()), + ))], + )); + + assert_parse!(parser.parse_fact(), &fact, expected_fact,); + } -// #[test] -// fn parse_unary_function() { -// let parser = RuleParser::new(); + #[test] + fn fact_comment() { + let parser = RuleParser::new(); + let predicate = "p"; + let value = "foo"; + let datatype = "bar"; + let p = Identifier(predicate.to_string()); + let v = value.to_string(); + let t = datatype.to_string(); + let fact = format!( + r#"{predicate}(% comment 1 + "{value}"^^<{datatype}> % comment 2 + ) % comment 3 + . % comment 4 + %"# + ); + + let expected_fact = Fact(Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_from_typed_literal(v, t) + .expect("unknown datatype should always work"), + ))], + )); + + assert_parse!(parser.parse_fact(), &fact, expected_fact,); + } -// let expression = "ABS(4)"; -// let expected_term = Term::Unary( -// UnaryOperation::NumericAbsolute, -// Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(4), -// ))), -// ); + #[test] + #[cfg_attr(miri, ignore)] + fn filter() { + let parser = RuleParser::new(); + let aa = "A"; + let a = Identifier(aa.to_string()); + let bb = "B"; + let b = Identifier(bb.to_string()); + let pp = "P"; + let p = Identifier(pp.to_string()); + let xx = "X"; + let x = xx.to_string(); + let yy = "Y"; + let y = yy.to_string(); + let zz = "Z"; + let z = zz.to_string(); + + let rule = format!( + "{pp}(?{xx}) :- {aa}(?{xx}, ?{yy}), ?{yy} > ?{xx}, {bb}(?{zz}), ?{xx} = 3, ?{zz} < 7, ?{xx} <= ?{zz}, ?{zz} >= ?{yy} ." + ); + + let expected_rule = Rule::new( + vec![Atom::new( + p, + vec![Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal(x.clone()), + ))], + )], + vec![ + Literal::Positive(Atom::new( + a, + vec![ + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( + x.clone(), + ))), + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( + y.clone(), + ))), + ], + )), + Literal::Positive(Atom::new( + b, + vec![Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal(z.clone()), + ))], + )), + ], + vec![ + Constraint::GreaterThan( + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y.clone()))), + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), + ), + Constraint::Equals( + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), + Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(3), + )), + ), + Constraint::LessThan( + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), + Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(7), + )), + ), + Constraint::LessThanEq( + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x))), + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), + ), + Constraint::GreaterThanEq( + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z))), + Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y))), + ), + ], + ); -// assert_parse!(parser.parse_arithmetic_factor(), expression, expected_term); -// } + assert_parse!(parser.parse_rule(), &rule, expected_rule,); + } -// #[test] -// fn parse_arithmetic_and_functions() { -// let parser = RuleParser::new(); - -// let expression = "5 * ABS(SQRT(4) - 3)"; - -// let expected_term = Term::Binary { -// operation: BinaryOperation::NumericMultiplication, -// lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(5), -// ))), -// rhs: Box::new(Term::Unary( -// UnaryOperation::NumericAbsolute, -// Box::new(Term::Binary { -// operation: BinaryOperation::NumericSubtraction, -// lhs: Box::new(Term::Unary( -// UnaryOperation::NumericSquareroot, -// Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(4), -// ))), -// )), -// rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(3), -// ))), -// }), -// )), -// }; - -// assert_parse!(parser.parse_term(), expression, expected_term); -// } + #[test] + #[allow(clippy::redundant_clone)] + fn parse_output() { + let parser = RuleParser::new(); -// #[test] -// fn parse_assignment() { -// let parser = RuleParser::new(); - -// let expression = "?X = ABS(?Y - 5) * (7 + ?Z)"; - -// let variable = Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( -// "X".to_string(), -// ))); - -// let term = Term::Binary { -// operation: BinaryOperation::NumericMultiplication, -// lhs: Box::new(Term::Unary( -// UnaryOperation::NumericAbsolute, -// Box::new(Term::Binary { -// operation: BinaryOperation::NumericSubtraction, -// lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal("Y".to_string()), -// ))), -// rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(5), -// ))), -// }), -// )), -// rhs: Box::new(Term::Binary { -// operation: BinaryOperation::NumericAddition, -// lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( -// AnyDataValue::new_integer_from_i64(7), -// ))), -// rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal("Z".to_string()), -// ))), -// }), -// }; - -// let expected = Constraint::Equals(variable, term); - -// assert_parse!(parser.parse_constraint(), expression, expected); -// } + let j2 = Identifier("J2".to_string()); -// #[test] -// fn parse_complex_condition() { -// let parser = RuleParser::new(); - -// let expression = "ABS(?X - ?Y) <= ?Z + SQRT(?Y)"; - -// let left_term = Term::Unary( -// UnaryOperation::NumericAbsolute, -// Box::new(Term::Binary { -// operation: BinaryOperation::NumericSubtraction, -// lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal(String::from("X")), -// ))), -// rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal(String::from("Y")), -// ))), -// }), -// ); - -// let right_term = Term::Binary { -// operation: BinaryOperation::NumericAddition, -// lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal(String::from("Z")), -// ))), -// rhs: Box::new(Term::Unary( -// UnaryOperation::NumericSquareroot, -// Box::new(Term::Primitive(PrimitiveTerm::Variable( -// Variable::Universal(String::from("Y")), -// ))), -// )), -// }; - -// let expected = Constraint::LessThanEq(left_term, right_term); - -// assert_parse!(parser.parse_constraint(), expression, expected); -// } + assert_parse!(parser.parse_output_directive(), "@output J2 .", j2.clone()); + assert_parse_error!( + parser.parse_output_directive(), + "@output J2[3] .", + ParseError::ExpectedOutputDeclaration + ); + } -// #[test] -// fn map_literal() { -// let parser = RuleParser::new(); -// assert_parse!( -// parser.parse_map_literal(), -// r#"{}"#, -// MapDataValue::from_iter([]), -// ); - -// let ident = "foo"; -// let key = AnyDataValue::new_iri(ident.to_string()); - -// let entry = format!("{ident}=23"); -// assert_parse!( -// parser.parse_map_entry(), -// &entry, -// (key.clone(), AnyDataValue::new_integer_from_i64(23)) -// ); - -// let pairs = vec![ -// ( -// AnyDataValue::new_plain_string("23".to_string()), -// AnyDataValue::new_integer_from_i64(42), -// ), -// ( -// AnyDataValue::new_iri("foo".to_string()), -// AnyDataValue::new_integer_from_i64(23), -// ), -// ]; - -// assert_parse!( -// parser.parse_map_literal(), -// r#"{foo = 23, "23" = 42}"#, -// pairs.clone().into_iter().collect::() -// ); -// } + #[test] + fn parse_errors() { + let parser = RuleParser::new(); + + assert_expected_token!(parser.parse_dot(), "", "."); + assert_expected_token!(parser.parse_dot(), ":-", "."); + assert_expected_token!(parser.parse_comma(), "", ","); + assert_expected_token!(parser.parse_comma(), ":-", ","); + assert_expected_token!(parser.parse_not(), "", "~"); + assert_expected_token!(parser.parse_not(), ":-", "~"); + assert_expected_token!(parser.parse_arrow(), "", ":-"); + assert_expected_token!(parser.parse_arrow(), "-:", ":-"); + assert_expected_token!(parser.parse_open_parenthesis(), "", "("); + assert_expected_token!(parser.parse_open_parenthesis(), "-:", "("); + assert_expected_token!(parser.parse_close_parenthesis(), "", ")"); + assert_expected_token!(parser.parse_close_parenthesis(), "-:", ")"); + + assert_parse_error!( + parser.parse_base(), + "@base () -// ); -// } + #[test] + #[cfg_attr(miri, ignore)] + fn program_statement_order() { + assert_matches!( + parse_program( + r#"@output s . + s(?s, ?p, ?o) :- t(?s, ?p, ?o) . + @source t[3]: load-rdf("triples.nt") . + "# + ), + Ok(_) + ); + + let parser = RuleParser::new(); + assert_parse_error!( + parser.parse_program(), + "@base . @base .", + ParseError::LateBaseDeclaration + ); + + assert_parse_error!( + parser.parse_program(), + "@prefix f: . @base .", + ParseError::LateBaseDeclaration + ); + + assert_parse_error!( + parser.parse_program(), + "@output p . @base .", + ParseError::LateBaseDeclaration + ); + + assert_parse_error!( + parser.parse_program(), + "@output p . @prefix g: .", + ParseError::LatePrefixDeclaration + ); + } + #[test] + #[cfg_attr(miri, ignore)] + fn parse_function_terms() { + let parser = RuleParser::new(); + + let twenty_three = Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(23), + )); + let fourty_two = Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(42), + )); + let twenty_three_times_fourty_two = Term::Binary { + operation: BinaryOperation::NumericMultiplication, + lhs: Box::new(twenty_three.clone()), + rhs: Box::new(fourty_two.clone()), + }; -// #[test] -// fn tuple_literal() { -// let parser = RuleParser::new(); + assert_parse_error!( + parser.parse_function_term(), + "", + ParseError::ExpectedFunctionTerm + ); + + let nullary_function = + Term::Function(Identifier(String::from("nullary_function")), vec![]); + assert_parse!( + parser.parse_function_term(), + "nullary_function()", + nullary_function + ); + assert_parse!( + parser.parse_function_term(), + "nullary_function( )", + nullary_function + ); + assert_parse_error!( + parser.parse_function_term(), + "nullary_function( () )", + ParseError::ExpectedFunctionTerm + ); + + let unary_function = Term::Function( + Identifier(String::from("unary_function")), + vec![fourty_two.clone()], + ); + assert_parse!( + parser.parse_function_term(), + "unary_function(42)", + unary_function + ); + assert_parse!( + parser.parse_function_term(), + "unary_function((42))", + unary_function + ); + assert_parse!( + parser.parse_function_term(), + "unary_function(( (42 )))", + unary_function + ); + + let binary_function = Term::Function( + Identifier(String::from("binary_function")), + vec![fourty_two.clone(), twenty_three.clone()], + ); + assert_parse!( + parser.parse_function_term(), + "binary_function(42, 23)", + binary_function + ); + + let function_with_nested_algebraic_expression = Term::Function( + Identifier(String::from("function")), + vec![twenty_three_times_fourty_two], + ); + assert_parse!( + parser.parse_function_term(), + "function( 23 *42)", + function_with_nested_algebraic_expression + ); + + let nested_function = Term::Function( + Identifier(String::from("nested_function")), + vec![nullary_function.clone()], + ); + + assert_parse!( + parser.parse_function_term(), + "nested_function(nullary_function())", + nested_function + ); + + let triple_nested_function = Term::Function( + Identifier(String::from("nested_function")), + vec![Term::Function( + Identifier(String::from("nested_function")), + vec![Term::Function( + Identifier(String::from("nested_function")), + vec![nullary_function.clone()], + )], + )], + ); + assert_parse!( + parser.parse_function_term(), + "nested_function( nested_function( (nested_function(nullary_function()) ) ))", + triple_nested_function + ); + } -// let expected: TupleDataValue = [ -// AnyDataValue::new_iri("something".to_string()), -// AnyDataValue::new_integer_from_i64(42), -// TupleDataValue::from_iter([]).into(), -// ] -// .into_iter() -// .collect(); + #[test] + fn parse_terms() { + let parser = RuleParser::new(); -// assert_parse!( -// parser.parse_tuple_literal(), -// r#"(something, 42, ())"#, -// expected -// ); -// } + assert_parse_error!(parser.parse_term(), "", ParseError::ExpectedTerm); -// #[test] -// fn import_export() { -// let parser = RuleParser::new(); - -// let name = "p".to_string(); -// let predicate = Identifier(name.clone()); -// let qualified = format!("{name} "); -// let arguments = r#"{delimiter = ";", resource = }"#; -// let spec = format!("{qualified} :- dsv{arguments}"); -// let directive = format!("@import {spec} ."); -// let directive_export = format!("@export {spec} ."); -// let attributes = parser.parse_map_literal()(arguments.into()).unwrap().1; - -// assert_parse!( -// parser.parse_import_export_spec(), -// &spec, -// ImportExportDirective { -// predicate: predicate.clone(), -// format: FileFormat::DSV, -// attributes: attributes.clone(), -// } -// ); - -// assert_parse!( -// parser.parse_import(), -// &directive, -// ImportDirective::from(ImportExportDirective { -// predicate: predicate.clone(), -// format: FileFormat::DSV, -// attributes: attributes.clone() -// }) -// ); - -// assert_parse!( -// parser.parse_export(), -// &directive_export, -// ExportDirective::from(ImportExportDirective { -// predicate: predicate.clone(), -// format: FileFormat::DSV, -// attributes: attributes.clone() -// }) -// ); -// } -// } + assert_parse!( + parser.parse_term(), + "constant", + Term::Primitive(PrimitiveTerm::GroundTerm(AnyDataValue::new_iri( + String::from("constant") + ))) + ); + } + + #[test] + fn parse_aggregates() { + let parser = RuleParser::new(); + + assert_parse_error!(parser.parse_aggregate(), "", ParseError::ExpectedAggregate); + + assert_parse!( + parser.parse_aggregate(), + "#min(?VARIABLE)", + Term::Aggregation(Aggregate { + logical_aggregate_operation: LogicalAggregateOperation::MinNumber, + terms: vec![Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal(String::from("VARIABLE")) + ))] + }) + ); + + assert_parse_error!( + parser.parse_aggregate(), + "#test(?VAR1, ?VAR2)", + ParseError::ExpectedAggregate + ) + } + + #[test] + fn parse_unary_function() { + let parser = RuleParser::new(); + + let expression = "ABS(4)"; + let expected_term = Term::Unary( + UnaryOperation::NumericAbsolute, + Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(4), + ))), + ); + + assert_parse!(parser.parse_arithmetic_factor(), expression, expected_term); + } + + #[test] + fn parse_arithmetic_and_functions() { + let parser = RuleParser::new(); + + let expression = "5 * ABS(SQRT(4) - 3)"; + + let expected_term = Term::Binary { + operation: BinaryOperation::NumericMultiplication, + lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(5), + ))), + rhs: Box::new(Term::Unary( + UnaryOperation::NumericAbsolute, + Box::new(Term::Binary { + operation: BinaryOperation::NumericSubtraction, + lhs: Box::new(Term::Unary( + UnaryOperation::NumericSquareroot, + Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(4), + ))), + )), + rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(3), + ))), + }), + )), + }; + + assert_parse!(parser.parse_term(), expression, expected_term); + } + + #[test] + fn parse_assignment() { + let parser = RuleParser::new(); + + let expression = "?X = ABS(?Y - 5) * (7 + ?Z)"; + + let variable = Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( + "X".to_string(), + ))); + + let term = Term::Binary { + operation: BinaryOperation::NumericMultiplication, + lhs: Box::new(Term::Unary( + UnaryOperation::NumericAbsolute, + Box::new(Term::Binary { + operation: BinaryOperation::NumericSubtraction, + lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal("Y".to_string()), + ))), + rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(5), + ))), + }), + )), + rhs: Box::new(Term::Binary { + operation: BinaryOperation::NumericAddition, + lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( + AnyDataValue::new_integer_from_i64(7), + ))), + rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal("Z".to_string()), + ))), + }), + }; + + let expected = Constraint::Equals(variable, term); + + assert_parse!(parser.parse_constraint(), expression, expected); + } + + #[test] + fn parse_complex_condition() { + let parser = RuleParser::new(); + + let expression = "ABS(?X - ?Y) <= ?Z + SQRT(?Y)"; + + let left_term = Term::Unary( + UnaryOperation::NumericAbsolute, + Box::new(Term::Binary { + operation: BinaryOperation::NumericSubtraction, + lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal(String::from("X")), + ))), + rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal(String::from("Y")), + ))), + }), + ); + + let right_term = Term::Binary { + operation: BinaryOperation::NumericAddition, + lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal(String::from("Z")), + ))), + rhs: Box::new(Term::Unary( + UnaryOperation::NumericSquareroot, + Box::new(Term::Primitive(PrimitiveTerm::Variable( + Variable::Universal(String::from("Y")), + ))), + )), + }; + + let expected = Constraint::LessThanEq(left_term, right_term); + + assert_parse!(parser.parse_constraint(), expression, expected); + } + + #[test] + fn map_literal() { + let parser = RuleParser::new(); + assert_parse!( + parser.parse_map_literal(), + r#"{}"#, + MapDataValue::from_iter([]), + ); + + let ident = "foo"; + let key = AnyDataValue::new_iri(ident.to_string()); + + let entry = format!("{ident}=23"); + assert_parse!( + parser.parse_map_entry(), + &entry, + (key.clone(), AnyDataValue::new_integer_from_i64(23)) + ); + + let pairs = vec![ + ( + AnyDataValue::new_plain_string("23".to_string()), + AnyDataValue::new_integer_from_i64(42), + ), + ( + AnyDataValue::new_iri("foo".to_string()), + AnyDataValue::new_integer_from_i64(23), + ), + ]; + + assert_parse!( + parser.parse_map_literal(), + r#"{foo = 23, "23" = 42}"#, + pairs.clone().into_iter().collect::() + ); + } + + #[test] + fn nested_map_literal() { + let parser = RuleParser::new(); + + let pairs = vec![( + AnyDataValue::new_iri("inner".to_string()), + MapDataValue::from_iter([]).into(), + )]; + + assert_parse!( + parser.parse_map_literal(), + r#"{inner = {}}"#, + pairs.clone().into_iter().collect::() + ); + } + + #[test] + fn tuple_literal() { + let parser = RuleParser::new(); + + let expected: TupleDataValue = [ + AnyDataValue::new_iri("something".to_string()), + AnyDataValue::new_integer_from_i64(42), + TupleDataValue::from_iter([]).into(), + ] + .into_iter() + .collect(); + + assert_parse!( + parser.parse_tuple_literal(), + r#"(something, 42, ())"#, + expected + ); + } + + #[test] + fn import_export() { + let parser = RuleParser::new(); + + let name = "p".to_string(); + let predicate = Identifier(name.clone()); + let qualified = format!("{name} "); + let arguments = r#"{delimiter = ";", resource = }"#; + let spec = format!("{qualified} :- dsv{arguments}"); + let directive = format!("@import {spec} ."); + let directive_export = format!("@export {spec} ."); + let attributes = parser.parse_map_literal()(arguments.into()).unwrap().1; + + assert_parse!( + parser.parse_import_export_spec(), + &spec, + ImportExportDirective { + predicate: predicate.clone(), + format: FileFormat::DSV, + attributes: attributes.clone(), + } + ); + + assert_parse!( + parser.parse_import(), + &directive, + ImportDirective::from(ImportExportDirective { + predicate: predicate.clone(), + format: FileFormat::DSV, + attributes: attributes.clone() + }) + ); + + assert_parse!( + parser.parse_export(), + &directive_export, + ExportDirective::from(ImportExportDirective { + predicate: predicate.clone(), + format: FileFormat::DSV, + attributes: attributes.clone() + }) + ); + } + } +} /// NEW PARSER use std::cell::RefCell; diff --git a/nemo/src/io/parser/iri.rs b/nemo/src/io/parser/iri.rs index f125af776..a9da28c12 100644 --- a/nemo/src/io/parser/iri.rs +++ b/nemo/src/io/parser/iri.rs @@ -8,8 +8,8 @@ use nom::{ }; use super::{ + old::token, rfc5234::{alpha, digit, hexdig}, - token, types::{IntermediateResult, Span}, }; diff --git a/nemo/src/io/parser/sparql.rs b/nemo/src/io/parser/sparql.rs index 2940a3df4..5084d1dc8 100644 --- a/nemo/src/io/parser/sparql.rs +++ b/nemo/src/io/parser/sparql.rs @@ -10,12 +10,13 @@ use nom::{ }; use super::{ - iri, map_error, + iri, + old::map_error, + old::token, + old::ParseError, rfc5234::digit, - token, turtle::hex, types::{IntermediateResult, Span}, - ParseError, }; use macros::traced; diff --git a/nemo/src/io/parser/turtle.rs b/nemo/src/io/parser/turtle.rs index 08363a76b..ffcaab5ce 100644 --- a/nemo/src/io/parser/turtle.rs +++ b/nemo/src/io/parser/turtle.rs @@ -13,11 +13,11 @@ use macros::traced; use nemo_physical::datavalues::AnyDataValue; use super::{ - map_error, + old::map_error, + old::token, + old::ParseError, sparql::{iri, Name}, - token, types::{IntermediateResult, Span}, - ParseError, }; /// Characters requiring escape sequences in single-line string literals. From e84d635b696ef16eb2da7a5a07bfb2767314a510 Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Wed, 28 Aug 2024 15:41:08 +0200 Subject: [PATCH 148/214] Don't panic in parser on unimplemented methods --- nemo/src/io/lexer.rs | 9 ++++++--- nemo/src/io/parser/ast/named_tuple.rs | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs index bb8d66398..648504c17 100644 --- a/nemo/src/io/lexer.rs +++ b/nemo/src/io/lexer.rs @@ -182,15 +182,18 @@ impl AstNode for Span<'_> { } fn lsp_identifier(&self) -> Option<(String, String)> { - todo!() + // This was todo!() before but we don't want to panic here. + None } fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - todo!() + // This was todo!() before but we don't want to panic here. + None } fn lsp_range_to_rename(&self) -> Option { - todo!() + // This was todo!() before but we don't want to panic here. + None } } diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs index fa8c2895d..9ef66d0af 100644 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ b/nemo/src/io/parser/ast/named_tuple.rs @@ -42,15 +42,18 @@ impl AstNode for NamedTuple<'_> { } fn lsp_identifier(&self) -> Option<(String, String)> { - todo!() + // This was todo!() before but we don't want to panic here. + None } fn lsp_symbol_info(&self) -> Option<(String, tower_lsp::lsp_types::SymbolKind)> { - todo!() + // This was todo!() before but we don't want to panic here. + None } fn lsp_range_to_rename(&self) -> Option { - todo!() + // This was todo!() before but we don't want to panic here. + None } } From 21501d88a50f846208da315b9d7b80f2966b1a9e Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 4 Sep 2024 05:05:35 +0200 Subject: [PATCH 149/214] Translation into new chase model --- nemo/src/{model => _model}/chase_model.rs | 0 .../chase_model/aggregate.rs | 0 .../src/{model => _model}/chase_model/atom.rs | 0 .../chase_model/constructor.rs | 0 .../{model => _model}/chase_model/program.rs | 172 +- .../src/{model => _model}/chase_model/rule.rs | 0 .../{model => _model}/chase_model/variable.rs | 0 nemo/src/{model => _model}/rule_model.rs | 3 - .../rule_model/_syntax.rs} | 0 .../{model => _model}/rule_model/aggregate.rs | 0 nemo/src/{model => _model}/rule_model/atom.rs | 0 .../rule_model/constraint.rs | 0 .../rule_model/import_export.rs | 0 .../{model => _model}/rule_model/literal.rs | 0 .../{model => _model}/rule_model/predicate.rs | 0 nemo/src/_model/rule_model/program.rs | 270 + nemo/src/_model/rule_model/rule.rs | 384 ++ nemo/src/{model => _model}/rule_model/term.rs | 0 .../{model => _model}/types/complex_types.rs | 0 nemo/src/{model => _model}/types/error.rs | 0 .../types/primitive_logical_value.rs | 0 .../types/primitive_types.rs | 0 nemo/src/chase_model.rs | 6 + nemo/src/chase_model/analysis.rs | 0 nemo/src/chase_model/components.rs | 27 + nemo/src/chase_model/components/aggregate.rs | 91 + nemo/src/chase_model/components/atom.rs | 29 + .../components/atom/ground_atom.rs | 85 + .../components/atom/primitive_atom.rs | 94 + .../components/atom/variable_atom.rs | 74 + nemo/src/chase_model/components/export.rs | 59 + nemo/src/chase_model/components/filter.rs | 29 + nemo/src/chase_model/components/import.rs | 59 + nemo/src/chase_model/components/operation.rs | 54 + nemo/src/chase_model/components/program.rs | 47 + nemo/src/chase_model/components/rule.rs | 144 + nemo/src/chase_model/components/term.rs | 3 + .../components/term/operation_term.rs | 39 + nemo/src/chase_model/translation.rs | 52 + nemo/src/chase_model/translation/aggregate.rs | 142 + nemo/src/chase_model/translation/fact.rs | 43 + nemo/src/chase_model/translation/filter.rs | 54 + nemo/src/chase_model/translation/import.rs | 216 + nemo/src/chase_model/translation/operation.rs | 61 + nemo/src/chase_model/translation/rule.rs | 304 ++ nemo/src/error.rs | 6 +- nemo/src/execution/execution_engine.rs | 2 +- nemo/src/execution/tracing/trace.rs | 2 +- nemo/src/io.rs | 3 +- nemo/src/io/compression_format.rs | 33 +- nemo/src/io/error.rs | 76 + nemo/src/io/export_manager.rs | 280 +- nemo/src/io/formats.rs | 134 +- nemo/src/io/formats/dsv.rs | 337 +- .../formats/{dsv_reader.rs => dsv/reader.rs} | 27 +- .../value_format.rs} | 125 +- .../formats/{dsv_writer.rs => dsv/writer.rs} | 20 +- nemo/src/io/formats/import_export.rs | 1164 ++-- nemo/src/io/formats/json.rs | 57 +- .../{json_reader.rs => json/reader.rs} | 0 nemo/src/io/formats/rdf.rs | 431 +- nemo/src/io/formats/rdf/error.rs | 30 + .../formats/{rdf_reader.rs => rdf/reader.rs} | 58 +- nemo/src/io/formats/rdf/value_format.rs | 94 + .../formats/{rdf_writer.rs => rdf/writer.rs} | 18 +- nemo/src/io/formats/types.rs | 26 - nemo/src/io/import_manager.rs | 111 +- nemo/src/io/lexer.rs | 701 --- nemo/src/io/parser.rs | 4864 ----------------- nemo/src/io/parser/ast.rs | 447 -- nemo/src/io/parser/ast/atom.rs | 122 - nemo/src/io/parser/ast/directive.rs | 193 - nemo/src/io/parser/ast/map.rs | 115 - nemo/src/io/parser/ast/named_tuple.rs | 66 - nemo/src/io/parser/ast/program.rs | 75 - nemo/src/io/parser/ast/statement.rs | 200 - nemo/src/io/parser/ast/term.rs | 367 -- nemo/src/io/parser/ast/tuple.rs | 62 - nemo/src/io/parser/iri.rs | 356 -- nemo/src/io/parser/rfc5234.rs | 25 - nemo/src/io/parser/sparql.rs | 187 - nemo/src/io/parser/turtle.rs | 186 - nemo/src/io/parser/types.rs | 669 --- nemo/src/io/resource_providers.rs | 2 +- nemo/src/io/resource_providers/file.rs | 2 +- nemo/src/io/resource_providers/http.rs | 2 +- nemo/src/lib.rs | 2 +- nemo/src/model.rs | 14 - nemo/src/model/rule_model/program.rs | 270 - nemo/src/model/rule_model/rule.rs | 385 -- nemo/src/rule_model/components/fact.rs | 6 +- .../rule_model/components/import_export.rs | 111 +- .../components/import_export/attributes.rs | 7 + .../components/import_export/compression.rs | 5 +- .../components/import_export/file_formats.rs | 17 + nemo/src/rule_model/components/literal.rs | 18 +- nemo/src/rule_model/components/output.rs | 7 +- nemo/src/rule_model/components/rule.rs | 54 +- .../rule_model/components/term/operation.rs | 8 +- .../components/term/primitive/ground.rs | 5 + .../components/term/primitive/variable.rs | 10 +- .../term/primitive/variable/existential.rs | 5 + .../term/primitive/variable/universal.rs | 5 + nemo/src/rule_model/error/info.rs | 3 + nemo/src/rule_model/error/validation_error.rs | 5 + nemo/src/rule_model/program.rs | 267 +- nemo/src/rule_model/translation.rs | 4 + nemo/src/syntax.rs | 15 + nemo/src/table_manager.rs | 52 +- testfile.rls | 44 - testfile1.rls | 172 - testfile2.rls | 14 - testfile3.rls | 10 - 113 files changed, 4594 insertions(+), 11137 deletions(-) rename nemo/src/{model => _model}/chase_model.rs (100%) rename nemo/src/{model => _model}/chase_model/aggregate.rs (100%) rename nemo/src/{model => _model}/chase_model/atom.rs (100%) rename nemo/src/{model => _model}/chase_model/constructor.rs (100%) rename nemo/src/{model => _model}/chase_model/program.rs (56%) rename nemo/src/{model => _model}/chase_model/rule.rs (100%) rename nemo/src/{model => _model}/chase_model/variable.rs (100%) rename nemo/src/{model => _model}/rule_model.rs (89%) rename nemo/src/{model/rule_model/syntax.rs => _model/rule_model/_syntax.rs} (100%) rename nemo/src/{model => _model}/rule_model/aggregate.rs (100%) rename nemo/src/{model => _model}/rule_model/atom.rs (100%) rename nemo/src/{model => _model}/rule_model/constraint.rs (100%) rename nemo/src/{model => _model}/rule_model/import_export.rs (100%) rename nemo/src/{model => _model}/rule_model/literal.rs (100%) rename nemo/src/{model => _model}/rule_model/predicate.rs (100%) create mode 100644 nemo/src/_model/rule_model/program.rs create mode 100644 nemo/src/_model/rule_model/rule.rs rename nemo/src/{model => _model}/rule_model/term.rs (100%) rename nemo/src/{model => _model}/types/complex_types.rs (100%) rename nemo/src/{model => _model}/types/error.rs (100%) rename nemo/src/{model => _model}/types/primitive_logical_value.rs (100%) rename nemo/src/{model => _model}/types/primitive_types.rs (100%) create mode 100644 nemo/src/chase_model.rs create mode 100644 nemo/src/chase_model/analysis.rs create mode 100644 nemo/src/chase_model/components.rs create mode 100644 nemo/src/chase_model/components/aggregate.rs create mode 100644 nemo/src/chase_model/components/atom.rs create mode 100644 nemo/src/chase_model/components/atom/ground_atom.rs create mode 100644 nemo/src/chase_model/components/atom/primitive_atom.rs create mode 100644 nemo/src/chase_model/components/atom/variable_atom.rs create mode 100644 nemo/src/chase_model/components/export.rs create mode 100644 nemo/src/chase_model/components/filter.rs create mode 100644 nemo/src/chase_model/components/import.rs create mode 100644 nemo/src/chase_model/components/operation.rs create mode 100644 nemo/src/chase_model/components/program.rs create mode 100644 nemo/src/chase_model/components/rule.rs create mode 100644 nemo/src/chase_model/components/term.rs create mode 100644 nemo/src/chase_model/components/term/operation_term.rs create mode 100644 nemo/src/chase_model/translation.rs create mode 100644 nemo/src/chase_model/translation/aggregate.rs create mode 100644 nemo/src/chase_model/translation/fact.rs create mode 100644 nemo/src/chase_model/translation/filter.rs create mode 100644 nemo/src/chase_model/translation/import.rs create mode 100644 nemo/src/chase_model/translation/operation.rs create mode 100644 nemo/src/chase_model/translation/rule.rs create mode 100644 nemo/src/io/error.rs rename nemo/src/io/formats/{dsv_reader.rs => dsv/reader.rs} (96%) rename nemo/src/io/formats/{dsv_value_format.rs => dsv/value_format.rs} (77%) rename nemo/src/io/formats/{dsv_writer.rs => dsv/writer.rs} (86%) rename nemo/src/io/formats/{json_reader.rs => json/reader.rs} (100%) create mode 100644 nemo/src/io/formats/rdf/error.rs rename nemo/src/io/formats/{rdf_reader.rs => rdf/reader.rs} (94%) create mode 100644 nemo/src/io/formats/rdf/value_format.rs rename nemo/src/io/formats/{rdf_writer.rs => rdf/writer.rs} (97%) delete mode 100644 nemo/src/io/formats/types.rs delete mode 100644 nemo/src/io/lexer.rs delete mode 100644 nemo/src/io/parser.rs delete mode 100644 nemo/src/io/parser/ast.rs delete mode 100644 nemo/src/io/parser/ast/atom.rs delete mode 100644 nemo/src/io/parser/ast/directive.rs delete mode 100644 nemo/src/io/parser/ast/map.rs delete mode 100644 nemo/src/io/parser/ast/named_tuple.rs delete mode 100644 nemo/src/io/parser/ast/program.rs delete mode 100644 nemo/src/io/parser/ast/statement.rs delete mode 100644 nemo/src/io/parser/ast/term.rs delete mode 100644 nemo/src/io/parser/ast/tuple.rs delete mode 100644 nemo/src/io/parser/iri.rs delete mode 100644 nemo/src/io/parser/rfc5234.rs delete mode 100644 nemo/src/io/parser/sparql.rs delete mode 100644 nemo/src/io/parser/turtle.rs delete mode 100644 nemo/src/io/parser/types.rs delete mode 100644 nemo/src/model.rs delete mode 100644 nemo/src/model/rule_model/program.rs delete mode 100644 nemo/src/model/rule_model/rule.rs delete mode 100644 testfile.rls delete mode 100644 testfile1.rls delete mode 100644 testfile2.rls delete mode 100644 testfile3.rls diff --git a/nemo/src/model/chase_model.rs b/nemo/src/_model/chase_model.rs similarity index 100% rename from nemo/src/model/chase_model.rs rename to nemo/src/_model/chase_model.rs diff --git a/nemo/src/model/chase_model/aggregate.rs b/nemo/src/_model/chase_model/aggregate.rs similarity index 100% rename from nemo/src/model/chase_model/aggregate.rs rename to nemo/src/_model/chase_model/aggregate.rs diff --git a/nemo/src/model/chase_model/atom.rs b/nemo/src/_model/chase_model/atom.rs similarity index 100% rename from nemo/src/model/chase_model/atom.rs rename to nemo/src/_model/chase_model/atom.rs diff --git a/nemo/src/model/chase_model/constructor.rs b/nemo/src/_model/chase_model/constructor.rs similarity index 100% rename from nemo/src/model/chase_model/constructor.rs rename to nemo/src/_model/chase_model/constructor.rs diff --git a/nemo/src/model/chase_model/program.rs b/nemo/src/_model/chase_model/program.rs similarity index 56% rename from nemo/src/model/chase_model/program.rs rename to nemo/src/_model/chase_model/program.rs index c977aefb1..6e204f1f8 100644 --- a/nemo/src/model/chase_model/program.rs +++ b/nemo/src/_model/chase_model/program.rs @@ -4,11 +4,7 @@ use std::collections::{HashMap, HashSet}; use nemo_physical::datavalues::AnyDataValue; -use crate::{ - error::Error, - io::formats::import_export::{ImportExportHandler, ImportExportHandlers}, - model::{ExportDirective, Identifier, ImportDirective, Program}, -}; +use crate::{io::formats::ImportExportHandler, model::Identifier}; use super::{ChaseAtom, ChaseFact, ChaseRule}; @@ -63,47 +59,47 @@ impl ChaseProgramBuilder { self } - /// Add an imported table. - pub fn import(mut self, import: &ImportDirective) -> Result { - let handler = ImportExportHandlers::import_handler(import)?; - self.program - .import_handlers - .push((import.predicate().clone(), handler)); - Ok(self) - } - - /// Add imported tables. - pub fn imports(self, imports: T) -> Result - where - T: IntoIterator, - { - let mut cur_self: Self = self; - for import in imports { - cur_self = cur_self.import(&import)?; - } - Ok(cur_self) - } - - /// Add an exported table. - pub fn export(mut self, export: &ExportDirective) -> Result { - let handler = ImportExportHandlers::export_handler(export)?; - self.program - .export_handlers - .push((export.predicate().clone(), handler)); - Ok(self) - } - - /// Add exported tables. - pub fn exports(self, exports: T) -> Result - where - T: IntoIterator, - { - let mut cur_self: Self = self; - for export in exports { - cur_self = cur_self.export(&export)?; - } - Ok(cur_self) - } + // /// Add an imported table. + // pub fn import(mut self, import: &ImportDirective) -> Result { + // let handler = ImportExportHandlers::import_handler(import)?; + // self.program + // .import_handlers + // .push((import.predicate().clone(), handler)); + // Ok(self) + // } + + // /// Add imported tables. + // pub fn imports(self, imports: T) -> Result + // where + // T: IntoIterator, + // { + // let mut cur_self: Self = self; + // for import in imports { + // cur_self = cur_self.import(&import)?; + // } + // Ok(cur_self) + // } + + // /// Add an exported table. + // pub fn export(mut self, export: &ExportDirective) -> Result { + // let handler = ImportExportHandlers::export_handler(export)?; + // self.program + // .export_handlers + // .push((export.predicate().clone(), handler)); + // Ok(self) + // } + + // /// Add exported tables. + // pub fn exports(self, exports: T) -> Result + // where + // T: IntoIterator, + // { + // let mut cur_self: Self = self; + // for export in exports { + // cur_self = cur_self.export(&export)?; + // } + // Ok(cur_self) + // } /// Add a rule. pub fn rule(mut self, rule: ChaseRule) -> Self { @@ -178,17 +174,17 @@ impl ChaseProgram { .collect() } - /// Return all imports in the program. - pub(crate) fn imports( - &self, - ) -> impl Iterator)> { - self.import_handlers.iter() - } + // /// Return all imports in the program. + // pub(crate) fn imports( + // &self, + // ) -> impl Iterator)> { + // self.import_handlers.iter() + // } - /// Return all exports in the program. - pub fn exports(&self) -> impl Iterator)> { - self.export_handlers.iter() - } + // /// Return all exports in the program. + // pub fn exports(&self) -> impl Iterator)> { + // self.export_handlers.iter() + // } /// Returns the [AnyDataValue]s used as constants in the rules of the program. pub fn all_datavalues(&self) -> impl Iterator { @@ -196,35 +192,35 @@ impl ChaseProgram { } } -impl TryFrom for ChaseProgram { - type Error = Error; - - fn try_from(program: Program) -> Result { - let mut builder = Self::builder() - .prefixes(program.prefixes().clone()) - .imports(program.imports().cloned())? - .exports(program.exports().cloned())? - .rules( - program - .rules() - .iter() - .cloned() - .map(ChaseRule::try_from) - .collect::, Error>>()?, - ) - .facts( - program - .facts() - .iter() - .map(|fact| ChaseFact::from_flat_atom(&fact.0)), - ); - - if let Some(base) = program.base() { - builder = builder.base(base); - } - - builder = builder.output_predicates(program.output_predicates().cloned()); - - Ok(builder.build()) - } -} +// impl TryFrom for ChaseProgram { +// type Error = Error; + +// fn try_from(program: Program) -> Result { +// let mut builder = Self::builder() +// .prefixes(program.prefixes().clone()) +// .imports(program.imports().cloned())? +// .exports(program.exports().cloned())? +// .rules( +// program +// .rules() +// .iter() +// .cloned() +// .map(ChaseRule::try_from) +// .collect::, Error>>()?, +// ) +// .facts( +// program +// .facts() +// .iter() +// .map(|fact| ChaseFact::from_flat_atom(&fact.0)), +// ); + +// if let Some(base) = program.base() { +// builder = builder.base(base); +// } + +// builder = builder.output_predicates(program.output_predicates().cloned()); + +// Ok(builder.build()) +// } +// } diff --git a/nemo/src/model/chase_model/rule.rs b/nemo/src/_model/chase_model/rule.rs similarity index 100% rename from nemo/src/model/chase_model/rule.rs rename to nemo/src/_model/chase_model/rule.rs diff --git a/nemo/src/model/chase_model/variable.rs b/nemo/src/_model/chase_model/variable.rs similarity index 100% rename from nemo/src/model/chase_model/variable.rs rename to nemo/src/_model/chase_model/variable.rs diff --git a/nemo/src/model/rule_model.rs b/nemo/src/_model/rule_model.rs similarity index 89% rename from nemo/src/model/rule_model.rs rename to nemo/src/_model/rule_model.rs index 41126f748..38a234dda 100644 --- a/nemo/src/model/rule_model.rs +++ b/nemo/src/_model/rule_model.rs @@ -26,6 +26,3 @@ pub use constraint::*; pub mod import_export; pub use import_export::*; - -mod syntax; -pub(crate) use syntax::*; diff --git a/nemo/src/model/rule_model/syntax.rs b/nemo/src/_model/rule_model/_syntax.rs similarity index 100% rename from nemo/src/model/rule_model/syntax.rs rename to nemo/src/_model/rule_model/_syntax.rs diff --git a/nemo/src/model/rule_model/aggregate.rs b/nemo/src/_model/rule_model/aggregate.rs similarity index 100% rename from nemo/src/model/rule_model/aggregate.rs rename to nemo/src/_model/rule_model/aggregate.rs diff --git a/nemo/src/model/rule_model/atom.rs b/nemo/src/_model/rule_model/atom.rs similarity index 100% rename from nemo/src/model/rule_model/atom.rs rename to nemo/src/_model/rule_model/atom.rs diff --git a/nemo/src/model/rule_model/constraint.rs b/nemo/src/_model/rule_model/constraint.rs similarity index 100% rename from nemo/src/model/rule_model/constraint.rs rename to nemo/src/_model/rule_model/constraint.rs diff --git a/nemo/src/model/rule_model/import_export.rs b/nemo/src/_model/rule_model/import_export.rs similarity index 100% rename from nemo/src/model/rule_model/import_export.rs rename to nemo/src/_model/rule_model/import_export.rs diff --git a/nemo/src/model/rule_model/literal.rs b/nemo/src/_model/rule_model/literal.rs similarity index 100% rename from nemo/src/model/rule_model/literal.rs rename to nemo/src/_model/rule_model/literal.rs diff --git a/nemo/src/model/rule_model/predicate.rs b/nemo/src/_model/rule_model/predicate.rs similarity index 100% rename from nemo/src/model/rule_model/predicate.rs rename to nemo/src/_model/rule_model/predicate.rs diff --git a/nemo/src/_model/rule_model/program.rs b/nemo/src/_model/rule_model/program.rs new file mode 100644 index 000000000..b91e1672d --- /dev/null +++ b/nemo/src/_model/rule_model/program.rs @@ -0,0 +1,270 @@ +// use std::collections::{HashMap, HashSet}; + +// use crate::model::{ExportDirective, ImportDirective}; + +// use super::{Atom, Identifier, Rule}; + +// /// A (ground) fact. +// #[derive(Debug, Eq, PartialEq, Clone)] +// pub struct Fact(pub Atom); + +// impl std::fmt::Display for Fact { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// self.0.fmt(f) +// } +// } + +// /// A statement that can occur in the program. +// #[derive(Debug, Eq, PartialEq, Clone)] +// pub enum Statement { +// /// A fact. +// Fact(Fact), +// /// A rule. +// Rule(Rule), +// } + +// /// A complete program. +// #[derive(Debug, Default, Clone)] +// pub struct Program { +// base: Option, +// prefixes: HashMap, +// rules: Vec, +// facts: Vec, +// imports: Vec, +// exports: Vec, +// output_predicates: Vec, +// } + +// /// A Builder for a program. +// #[derive(Debug, Default)] +// pub struct ProgramBuilder { +// program: Program, +// } + +// impl ProgramBuilder { +// /// Construct a new builder. +// pub fn new() -> Self { +// Default::default() +// } + +// /// Construct a [Program] from this builder. +// pub fn build(self) -> Program { +// self.program +// } + +// /// Set the base IRI. +// pub fn base(mut self, base: String) -> Self { +// self.program.base = Some(base); +// self +// } + +// /// Add a prefix. +// pub fn prefix(mut self, prefix: String, iri: String) -> Self { +// self.program.prefixes.insert(prefix, iri); +// self +// } + +// /// Add prefixes. +// pub fn prefixes(mut self, prefixes: T) -> Self +// where +// T: IntoIterator, +// { +// self.program.prefixes.extend(prefixes); +// self +// } + +// /// Add an imported table. +// pub fn import(mut self, import: ImportDirective) -> Self { +// self.program.imports.push(import); +// self +// } + +// /// Add imported tables. +// pub fn imports(mut self, imports: T) -> Self +// where +// T: IntoIterator, +// { +// self.program.imports.extend(imports); +// self +// } + +// /// Add an exported table. +// pub fn export(mut self, export: ExportDirective) -> Self { +// self.program.exports.push(export); +// self +// } + +// /// Add exported tables. +// pub fn exports(mut self, exports: T) -> Self +// where +// T: IntoIterator, +// { +// self.program.exports.extend(exports); +// self +// } + +// /// Add a rule. +// pub fn rule(mut self, rule: Rule) -> Self { +// self.program.rules.push(rule); +// self +// } + +// /// Add rules. +// pub fn rules(mut self, rules: T) -> Self +// where +// T: IntoIterator, +// { +// self.program.rules.extend(rules); +// self +// } + +// /// Add a fact. +// pub fn fact(mut self, fact: Fact) -> Self { +// self.program.facts.push(fact); +// self +// } + +// /// Add facts. +// pub fn facts(mut self, facts: T) -> Self +// where +// T: IntoIterator, +// { +// self.program.facts.extend(facts); +// self +// } + +// /// Mark predicate as output predicate. +// pub fn output_predicate(self, predicate: Identifier) -> Self { +// self.output_predicates([predicate]) +// } + +// /// Mark predicates as output predicates. +// pub fn output_predicates(mut self, predicates: T) -> Self +// where +// T: IntoIterator, +// { +// self.program.output_predicates.extend(predicates); +// self +// } +// } + +// impl Program { +// /// Return a [builder][ProgramBuilder] for the [Program]. +// pub fn builder() -> ProgramBuilder { +// Default::default() +// } + +// /// Get the base IRI, if set. +// #[must_use] +// pub fn base(&self) -> Option { +// self.base.clone() +// } + +// /// Return all rules in the program - immutable. +// #[must_use] +// pub fn rules(&self) -> &Vec { +// &self.rules +// } + +// /// Return all facts in the program. +// #[must_use] +// pub fn facts(&self) -> &Vec { +// &self.facts +// } + +// /// Return a HashSet of all predicates in the program (in rules and facts). +// #[must_use] +// pub fn predicates(&self) -> HashSet { +// self.rules() +// .iter() +// .flat_map(|rule| { +// rule.head() +// .iter() +// .map(|atom| atom.predicate()) +// .chain(rule.body().iter().map(|literal| literal.predicate())) +// }) +// .chain(self.facts().iter().map(|atom| atom.0.predicate())) +// .collect() +// } + +// /// Return a HashSet of all idb predicates (predicates occuring rule heads) in the program. +// #[must_use] +// pub fn idb_predicates(&self) -> HashSet { +// self.rules() +// .iter() +// .flat_map(|rule| rule.head()) +// .map(|atom| atom.predicate()) +// .collect() +// } + +// /// Return a HashSet of all edb predicates (all predicates minus idb predicates) in the program. +// #[must_use] +// pub fn edb_predicates(&self) -> HashSet { +// self.predicates() +// .difference(&self.idb_predicates()) +// .cloned() +// .collect() +// } + +// /// Return an Iterator over all output predicates that +// /// were explicitly marked in output directives. +// pub fn output_predicates(&self) -> impl Iterator { +// self.output_predicates.iter() +// } + +// /// Add output predicates to the program. +// pub fn add_output_predicates(&mut self, predicates: T) +// where +// T: IntoIterator, +// { +// self.output_predicates.extend(predicates); +// } + +// /// Remove all output predicates of the program. +// pub fn clear_output_predicates(&mut self) { +// self.output_predicates.clear(); +// } + +// /// Return all prefixes in the program. +// #[must_use] +// pub fn prefixes(&self) -> &HashMap { +// &self.prefixes +// } + +// /// Return all [ImportDirective]s of the program. +// pub fn imports(&self) -> impl Iterator { +// self.imports.iter() +// } + +// /// Add [ImportDirective]s to the program. +// pub fn add_imports(&mut self, imports: T) +// where +// T: IntoIterator, +// { +// self.imports.extend(imports); +// } + +// /// Return all [ExportDirective]s of the program. +// pub fn exports(&self) -> impl Iterator { +// self.exports.iter() +// } + +// /// Add [ExportDirective]s to the program. +// pub fn add_exports(&mut self, exports: T) +// where +// T: IntoIterator, +// { +// self.exports.extend(exports); +// } + +// /// Remove all [ExportDirective]s of the program. +// pub fn clear_exports(&mut self) { +// self.exports.clear(); +// } + +// /// Look up a given prefix. +// #[must_use] +// pub fn resolve_prefix(&self, tag: &str) -> Option { +// self.prefixes.get(tag).cloned() +// } +// } diff --git a/nemo/src/_model/rule_model/rule.rs b/nemo/src/_model/rule_model/rule.rs new file mode 100644 index 000000000..35a6ca077 --- /dev/null +++ b/nemo/src/_model/rule_model/rule.rs @@ -0,0 +1,384 @@ +// use std::collections::{HashMap, HashSet}; + +// use crate::model::VariableAssignment; + +// use super::{Atom, Constraint, Literal, PrimitiveTerm, Term, Variable}; + +// /// A rule. +// #[derive(Debug, Eq, PartialEq, Clone)] +// pub struct Rule { +// /// Head atoms of the rule +// head: Vec, +// /// Body literals of the rule +// body: Vec, +// /// Constraints on the body of the rule +// constraints: Vec, +// } + +// impl Rule { +// /// Construct a new rule. +// pub fn new(head: Vec, body: Vec, constraints: Vec) -> Self { +// Self { +// head, +// body, +// constraints, +// } +// } + +// fn calculate_derived_variables( +// safe_variables: &HashSet, +// constraints: &[Constraint], +// ) -> HashSet { +// let mut derived_variables = safe_variables.clone(); + +// let mut satisfied_constraints = HashSet::::new(); +// while satisfied_constraints.len() < constraints.len() { +// let num_satisified_constraints = satisfied_constraints.len(); + +// for (constraint_index, constraint) in constraints.iter().enumerate() { +// if satisfied_constraints.contains(&constraint_index) { +// continue; +// } + +// if let Some((variable, term)) = constraint.has_form_assignment() { +// if !derived_variables.contains(variable) +// && term +// .variables() +// .all(|term_variable| derived_variables.contains(term_variable)) +// { +// derived_variables.insert(variable.clone()); +// satisfied_constraints.insert(constraint_index); +// continue; +// } +// } +// } + +// if satisfied_constraints.len() == num_satisified_constraints { +// return derived_variables; +// } +// } + +// derived_variables +// } + +// /// Return all variables that appear in negative literals +// /// but cannot be derived from positive literals. +// /// +// /// For each variable also returns the associated index of the literal. +// /// +// /// Returns an error if one negative variable is associated with multiple literals. +// fn calculate_negative_variables( +// negative: &[Literal], +// safe_variables: &HashSet, +// ) -> Result, ParseError> { +// let mut negative_variables = HashMap::::new(); + +// for (literal_index, negative_literal) in negative.iter().enumerate() { +// let mut current_unsafe = HashMap::::new(); + +// for negative_term in negative_literal.terms() { +// if let Term::Primitive(PrimitiveTerm::Variable(variable)) = negative_term { +// if safe_variables.contains(variable) { +// continue; +// } + +// current_unsafe.insert(variable.clone(), literal_index); + +// if negative_variables.contains_key(variable) { +// return Err(ParseError::UnsafeVariableInMultipleNegativeLiterals( +// variable.clone(), +// )); +// } +// } +// } + +// negative_variables.extend(current_unsafe) +// } + +// Ok(negative_variables) +// } + +// /// Construct a new rule, validating constraints on variable usage. +// pub(crate) fn new_validated( +// head: Vec, +// body: Vec, +// constraints: Vec, +// ) -> Result { +// // All the existential variables used in the rule +// let existential_variable_names = head +// .iter() +// .flat_map(|a| a.existential_variables().flat_map(|v| v.name())) +// .collect::>(); + +// for variable in body +// .iter() +// .flat_map(|l| l.variables()) +// .chain(constraints.iter().flat_map(|c| c.variables())) +// { +// // Existential variables may only occur in the head +// if variable.is_existential() { +// return Err(ParseError::BodyExistential(variable.clone())); +// } + +// // There may not be a universal variable whose name is the same that of an existential +// if let Some(name) = variable.name() { +// if existential_variable_names.contains(&name) { +// return Err(ParseError::BothQuantifiers(name)); +// } +// } +// } + +// // Divide the literals into a positive and a negative part +// let (positive, negative): (Vec<_>, Vec<_>) = body +// .iter() +// .cloned() +// .partition(|literal| literal.is_positive()); + +// // Safe variables are considered to be +// // all variables occuring as primitive terms in a positive body literal +// // or every value that is equal to such a variable +// let safe_variables = Self::safe_variables_literals(&positive); + +// // Derived variables are variables that result from functional expressions +// // expressed as ?Variable = Term constraints, +// // where the term only contains safe or derived variables. +// let derived_variables = Self::calculate_derived_variables(&safe_variables, &constraints); + +// // Negative variables are variables that occur as primitive terms in negative literals +// // bot cannot be derived +// let negative_variables = Self::calculate_negative_variables(&negative, &derived_variables)?; + +// // Each constraint must only use derived variables +// // or if it contains negative variables, then all variables in the constraint +// // must be from the same atom +// for constraint in &constraints { +// let unknown = constraint.variables().find(|variable| { +// !derived_variables.contains(variable) && !negative_variables.contains_key(variable) +// }); + +// if let Some(variable) = unknown { +// return Err(ParseError::UnsafeComplexTerm( +// constraint.to_string(), +// variable.clone(), +// )); +// } + +// if let Some(negative_variable) = constraint +// .variables() +// .find(|variable| negative_variables.contains_key(variable)) +// { +// let negative_literal = &negative[*negative_variables +// .get(negative_variable) +// .expect("Map must contain key")]; +// let allowed_variables = negative_literal +// .variables() +// .cloned() +// .collect::>(); + +// if let Some(not_allowed) = constraint +// .variables() +// .find(|variable| !allowed_variables.contains(variable)) +// { +// return Err(ParseError::ConstraintOutsideVariable( +// constraint.to_string(), +// negative_variable.clone(), +// negative_literal.to_string(), +// not_allowed.clone(), +// )); +// } +// } +// } + +// // Each complex term in the body and head must only use safe or derived variables +// for term in body +// .iter() +// .flat_map(|l| l.terms()) +// .chain(head.iter().flat_map(|a| a.terms())) +// { +// if term.is_primitive() { +// continue; +// } + +// for variable in term.variables() { +// if !derived_variables.contains(variable) { +// return Err(ParseError::UnsafeComplexTerm( +// term.to_string(), +// variable.clone(), +// )); +// } +// } +// } + +// let mut is_existential = false; + +// // Head atoms may only use variables that are safe or derived +// for variable in head.iter().flat_map(|a| a.variables()) { +// if variable.is_existential() { +// is_existential = true; +// } + +// if variable.is_unnamed() { +// return Err(ParseError::UnnamedInHead); +// } + +// if variable.is_universal() && !derived_variables.contains(variable) { +// return Err(ParseError::UnsafeHeadVariable(variable.clone())); +// } +// } + +// // Check for aggregates in the body of a rule +// for literal in &body { +// #[allow(clippy::never_loop)] +// for aggregate in literal.aggregates() { +// return Err(ParseError::AggregateInBody(aggregate.clone())); +// } +// } +// for constraint in &constraints { +// #[allow(clippy::never_loop)] +// for aggregate in constraint.aggregates() { +// return Err(ParseError::AggregateInBody(aggregate.clone())); +// } +// } + +// // We only allow one aggregate per rule, +// // and do not allow them to appear together with existential variables +// let mut aggregate_count = 0; +// for head_atom in &head { +// for term in head_atom.terms() { +// aggregate_count += term.aggregates().len(); + +// if aggregate_count > 1 { +// return Err(ParseError::MultipleAggregates); +// } +// } +// } + +// if aggregate_count > 0 && is_existential { +// return Err(ParseError::AggregatesPlusExistentials); +// } + +// Ok(Rule { +// head, +// body, +// constraints, +// }) +// } + +// /// Return all variables that are "safe". +// /// A variable is safe if it occurs in a positive body literal. +// fn safe_variables_literals(literals: &[Literal]) -> HashSet { +// let mut result = HashSet::new(); + +// for literal in literals { +// if let Literal::Positive(atom) = literal { +// for term in atom.terms() { +// if let Term::Primitive(PrimitiveTerm::Variable(variable)) = term { +// result.insert(variable.clone()); +// } +// } +// } +// } + +// result +// } + +// /// Return all variables that are "safe". +// /// A variable is safe if it occurs in a positive body literal, +// /// or is equal to such a value. +// pub fn safe_variables(&self) -> HashSet { +// Self::safe_variables_literals(&self.body) +// } + +// /// Return the head atoms of the rule - immutable. +// #[must_use] +// pub fn head(&self) -> &Vec { +// &self.head +// } + +// /// Return the head atoms of the rule - mutable. +// #[must_use] +// pub fn head_mut(&mut self) -> &mut Vec { +// &mut self.head +// } + +// /// Return the body literals of the rule - immutable. +// #[must_use] +// pub fn body(&self) -> &Vec { +// &self.body +// } + +// /// Return the body literals of the rule - mutable. +// #[must_use] +// pub fn body_mut(&mut self) -> &mut Vec { +// &mut self.body +// } + +// /// Return the constraints of the rule - immutable. +// #[must_use] +// pub fn constraints(&self) -> &Vec { +// &self.constraints +// } + +// /// Return the filters of the rule - mutable. +// #[must_use] +// pub fn constraints_mut(&mut self) -> &mut Vec { +// &mut self.constraints +// } + +// /// Replaces [Variable]s with [super::Term]s according to the provided assignment. +// pub fn apply_assignment(&mut self, assignment: &VariableAssignment) { +// self.body +// .iter_mut() +// .for_each(|l| l.apply_assignment(assignment)); +// self.head +// .iter_mut() +// .for_each(|a| a.apply_assignment(assignment)); +// self.constraints +// .iter_mut() +// .for_each(|f| f.apply_assignment(assignment)); +// } + +// /// Return the number of negative body atoms contained in the rule. +// pub fn num_negative_body(&self) -> usize { +// self.body +// .iter() +// .filter(|literal| literal.is_negative()) +// .count() +// } +// } + +// impl std::fmt::Display for Rule { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// for (index, atom) in self.head.iter().enumerate() { +// atom.fmt(f)?; + +// if index < self.head.len() - 1 { +// f.write_str(", ")?; +// } +// } + +// f.write_str(" :- ")?; + +// for (index, literal) in self.body.iter().enumerate() { +// literal.fmt(f)?; + +// if index < self.body.len() - 1 { +// f.write_str(", ")?; +// } +// } + +// if !self.constraints.is_empty() { +// f.write_str(", ")?; +// } + +// for (index, constraint) in self.constraints.iter().enumerate() { +// constraint.fmt(f)?; + +// if index < self.constraints.len() - 1 { +// f.write_str(", ")?; +// } +// } + +// f.write_str(" .") +// } +// } diff --git a/nemo/src/model/rule_model/term.rs b/nemo/src/_model/rule_model/term.rs similarity index 100% rename from nemo/src/model/rule_model/term.rs rename to nemo/src/_model/rule_model/term.rs diff --git a/nemo/src/model/types/complex_types.rs b/nemo/src/_model/types/complex_types.rs similarity index 100% rename from nemo/src/model/types/complex_types.rs rename to nemo/src/_model/types/complex_types.rs diff --git a/nemo/src/model/types/error.rs b/nemo/src/_model/types/error.rs similarity index 100% rename from nemo/src/model/types/error.rs rename to nemo/src/_model/types/error.rs diff --git a/nemo/src/model/types/primitive_logical_value.rs b/nemo/src/_model/types/primitive_logical_value.rs similarity index 100% rename from nemo/src/model/types/primitive_logical_value.rs rename to nemo/src/_model/types/primitive_logical_value.rs diff --git a/nemo/src/model/types/primitive_types.rs b/nemo/src/_model/types/primitive_types.rs similarity index 100% rename from nemo/src/model/types/primitive_types.rs rename to nemo/src/_model/types/primitive_types.rs diff --git a/nemo/src/chase_model.rs b/nemo/src/chase_model.rs new file mode 100644 index 000000000..d8ebf7aec --- /dev/null +++ b/nemo/src/chase_model.rs @@ -0,0 +1,6 @@ +//! Normalized version of Nemo's logical rule model +//! that is more suitable for the generation of execution plans. + +pub(crate) mod analysis; +pub(crate) mod components; +pub(crate) mod translation; diff --git a/nemo/src/chase_model/analysis.rs b/nemo/src/chase_model/analysis.rs new file mode 100644 index 000000000..e69de29bb diff --git a/nemo/src/chase_model/components.rs b/nemo/src/chase_model/components.rs new file mode 100644 index 000000000..18503b42b --- /dev/null +++ b/nemo/src/chase_model/components.rs @@ -0,0 +1,27 @@ +//! This module contains the components of the chase model. +//! +//! In general, such components are restricted versions +//! of the respective components of Nemo's logical rule model. + +use crate::rule_model::origin::Origin; + +pub(crate) mod aggregate; +pub(crate) mod atom; +pub(crate) mod export; +pub(crate) mod filter; +pub(crate) mod import; +pub(crate) mod operation; +pub(crate) mod program; +pub(crate) mod rule; +pub(crate) mod term; + +/// Trait implemented by components of the chase model +pub(crate) trait ChaseComponent { + /// Return the [Origin] of this component. + fn origin(&self) -> &Origin; + + /// Set the [Origin] of this component. + fn set_origin(self, origin: Origin) -> Self + where + Self: Sized; +} diff --git a/nemo/src/chase_model/components/aggregate.rs b/nemo/src/chase_model/components/aggregate.rs new file mode 100644 index 000000000..55806bbcc --- /dev/null +++ b/nemo/src/chase_model/components/aggregate.rs @@ -0,0 +1,91 @@ +//! This module defines [ChaseAggregate]. + +use std::collections::HashSet; + +use crate::rule_model::{ + components::term::{aggregate::AggregateKind, primitive::variable::Variable}, + origin::Origin, +}; + +use super::ChaseComponent; + +/// Specifies how the values for a placeholder aggregate variable will get computed. +/// +/// Terminology: +/// * `input_variables` are the distinct variables and the aggregated input variable, not including the group-by variables +/// * `output_variable` is the single aggregated output variable +/// +/// See [nemo_physical::tabular::operations::TrieScanAggregate] +#[derive(Debug, Clone)] +pub(crate) struct ChaseAggregate { + /// Origin of this component + origin: Origin, + + /// Type of aggregate operation + kind: AggregateKind, + + /// Variable that contains the value over which the aggregate is computed + input_variable: Variable, + /// Variable that will contain the result of this operation + output_variable: Variable, + + /// Distinct variables + distinct_variables: Vec, + /// Group-by variables + group_by_variables: HashSet, +} + +impl ChaseAggregate { + /// Create a new [ChaseAggregate]. + pub fn new( + origin: Origin, + kind: AggregateKind, + input_variable: Variable, + output_variable: Variable, + distinct_variables: Vec, + group_by_variables: HashSet, + ) -> Self { + Self { + origin, + kind, + input_variable, + output_variable, + distinct_variables, + group_by_variables, + } + } + + /// Return the aggregated input variable, which is the first of the input variables. + pub fn input_variable(&self) -> &Variable { + &self.input_variable + } + + /// Return the output variable. + pub fn output_variable(&self) -> &Variable { + &self.output_variable + } + + /// Return the distinct variables. + pub fn distinct_variables(&self) -> &Vec { + &self.distinct_variables + } + + /// Return the group by variable. + pub fn group_by_variables(&self) -> &HashSet { + &self.group_by_variables + } +} + +impl ChaseComponent for ChaseAggregate { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/atom.rs b/nemo/src/chase_model/components/atom.rs new file mode 100644 index 000000000..e30eba2ef --- /dev/null +++ b/nemo/src/chase_model/components/atom.rs @@ -0,0 +1,29 @@ +//! Defines the trait [ChaseAtom] + +pub(crate) mod ground_atom; +pub(crate) mod primitive_atom; +pub(crate) mod variable_atom; + +use crate::rule_model::components::{tag::Tag, IterableVariables}; + +use super::ChaseComponent; + +/// Tagged list of terms. +pub(crate) trait ChaseAtom: ChaseComponent + IterableVariables { + /// Type of the terms within the atom. + type TypeTerm; + + /// Return the predicate [Tag]. + fn predicate(&self) -> Tag; + + /// Return an immutable iterator over the list of terms. + fn terms(&self) -> impl Iterator; + + /// Return a mutable iterator over the list of terms. + fn terms_mut(&mut self) -> impl Iterator; + + /// Return the arity of the atom + fn arity(&self) -> usize { + self.terms().count() + } +} diff --git a/nemo/src/chase_model/components/atom/ground_atom.rs b/nemo/src/chase_model/components/atom/ground_atom.rs new file mode 100644 index 000000000..3d83dd204 --- /dev/null +++ b/nemo/src/chase_model/components/atom/ground_atom.rs @@ -0,0 +1,85 @@ +//! This module defines [GroundAtom]. + +use nemo_physical::datavalues::AnyDataValue; + +use crate::{ + chase_model::components::ChaseComponent, + rule_model::{ + components::{ + tag::Tag, + term::primitive::{ground::GroundTerm, variable::Variable}, + IterableVariables, + }, + origin::Origin, + }, +}; + +use super::ChaseAtom; + +/// An atom which may only use [GroundTerm]s +#[derive(Debug, Clone)] +pub(crate) struct GroundAtom { + /// Origin of this component + origin: Origin, + + /// Predicate name of this atom + predicate: Tag, + /// Terms contained in this atom + terms: Vec, +} + +impl GroundAtom { + /// Construct a new [GroundAtom]. + pub(crate) fn new(origin: Origin, predicate: Tag, terms: Vec) -> Self { + Self { + origin, + predicate, + terms, + } + } + + /// Returns all [AnyDataValue]s used as constants in this atom + pub(crate) fn datavalues(&self) -> impl Iterator + '_ { + self.terms().map(|term| term.value()) + } +} + +impl ChaseAtom for GroundAtom { + type TypeTerm = GroundTerm; + + fn predicate(&self) -> Tag { + self.predicate.clone() + } + + fn terms(&self) -> impl Iterator { + self.terms.iter() + } + + fn terms_mut(&mut self) -> impl Iterator { + self.terms.iter_mut() + } +} + +impl IterableVariables for GroundAtom { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(std::iter::empty()) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new(std::iter::empty()) + } +} + +impl ChaseComponent for GroundAtom { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/atom/primitive_atom.rs b/nemo/src/chase_model/components/atom/primitive_atom.rs new file mode 100644 index 000000000..f568b175a --- /dev/null +++ b/nemo/src/chase_model/components/atom/primitive_atom.rs @@ -0,0 +1,94 @@ +//! This module defines [PrimitiveAtom]. + +use nemo_physical::datavalues::AnyDataValue; + +use crate::{ + chase_model::components::ChaseComponent, + rule_model::{ + components::{ + tag::Tag, + term::primitive::{variable::Variable, Primitive}, + IterableVariables, + }, + origin::Origin, + }, +}; + +use super::ChaseAtom; + +/// An atom which may only use [PrimitiveTerm]s +#[derive(Debug, Clone)] +pub(crate) struct PrimitiveAtom { + /// Origin of this component + origin: Origin, + + /// Predicate name of this atom + predicate: Tag, + /// Terms contained in this atom + terms: Vec, +} + +impl PrimitiveAtom { + /// Construct a new [PrimitiveAtom]. + pub(crate) fn new(origin: Origin, predicate: Tag, terms: Vec) -> Self { + Self { + origin, + predicate, + terms, + } + } + + /// Returns all [AnyDataValue]s used as constants in this atom. + pub(crate) fn datavalues(&self) -> impl Iterator + '_ { + self.terms.iter().filter_map(|term| match term { + Primitive::Ground(ground) => Some(ground.value().clone()), + Primitive::Variable(_) => None, + }) + } +} + +impl ChaseAtom for PrimitiveAtom { + type TypeTerm = Primitive; + + fn predicate(&self) -> Tag { + self.predicate.clone() + } + + fn terms(&self) -> impl Iterator { + self.terms.iter() + } + + fn terms_mut(&mut self) -> impl Iterator { + self.terms.iter_mut() + } +} + +impl IterableVariables for PrimitiveAtom { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms().filter_map(|term| match term { + Primitive::Variable(variable) => Some(variable), + Primitive::Ground(_) => None, + })) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new(self.terms_mut().filter_map(|term| match term { + Primitive::Variable(variable) => Some(variable), + Primitive::Ground(_) => None, + })) + } +} + +impl ChaseComponent for PrimitiveAtom { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/atom/variable_atom.rs b/nemo/src/chase_model/components/atom/variable_atom.rs new file mode 100644 index 000000000..2ec8f8cf4 --- /dev/null +++ b/nemo/src/chase_model/components/atom/variable_atom.rs @@ -0,0 +1,74 @@ +//! This module defines [VariableAtom]. + +use crate::{ + chase_model::components::ChaseComponent, + rule_model::{ + components::{tag::Tag, term::primitive::variable::Variable, IterableVariables}, + origin::Origin, + }, +}; + +use super::ChaseAtom; + +/// An atom which may only use [Variable]s. +#[derive(Debug, Clone)] +pub(crate) struct VariableAtom { + /// Origin of this component + origin: Origin, + + /// Predicate name of this atom + predicate: Tag, + /// Variables contained in this atom + variables: Vec, +} + +impl VariableAtom { + /// Construct a new [VariableAtom]. + pub(crate) fn new(origin: Origin, predicate: Tag, variables: Vec) -> Self { + Self { + origin, + predicate, + variables, + } + } +} + +impl ChaseAtom for VariableAtom { + type TypeTerm = Variable; + + fn predicate(&self) -> Tag { + self.predicate.clone() + } + + fn terms(&self) -> impl Iterator { + self.variables.iter() + } + + fn terms_mut(&mut self) -> impl Iterator { + self.variables.iter_mut() + } +} + +impl IterableVariables for VariableAtom { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms()) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new(self.terms_mut()) + } +} + +impl ChaseComponent for VariableAtom { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/export.rs b/nemo/src/chase_model/components/export.rs new file mode 100644 index 000000000..78e922ce2 --- /dev/null +++ b/nemo/src/chase_model/components/export.rs @@ -0,0 +1,59 @@ +//! This module defines [ChaseExport]. + +use crate::{ + io::formats::ImportExportHandler, + rule_model::{components::tag::Tag, origin::Origin}, +}; + +use super::ChaseComponent; + +/// Component for handling exports +#[derive(Debug)] +pub(crate) struct ChaseExport { + /// Origin of this component + origin: Origin, + + /// Predicate that will contain the data + predicate: Tag, + /// Handler object responsible for exporting data + handler: Box, +} + +impl ChaseExport { + /// Create a new [ChaseExport]. + pub(crate) fn new( + origin: Origin, + predicate: Tag, + handler: Box, + ) -> Self { + Self { + origin, + predicate, + handler, + } + } + + /// Return the predicate. + pub(crate) fn predicate(&self) -> &Tag { + &self.predicate + } + + /// Return the handler. + pub(crate) fn handler(&self) -> &Box { + &self.handler + } +} + +impl ChaseComponent for ChaseExport { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/filter.rs b/nemo/src/chase_model/components/filter.rs new file mode 100644 index 000000000..8464fae90 --- /dev/null +++ b/nemo/src/chase_model/components/filter.rs @@ -0,0 +1,29 @@ +//! This module defines [ChaseFilter]. + +use crate::rule_model::origin::Origin; + +use super::term::operation_term::OperationTerm; + +/// Indicates that a new value must be created according to [OperationTerm]. +/// +/// The result will be "stored" in the given variable. +#[derive(Debug)] +pub(crate) struct ChaseFilter { + /// Origin of this component + origin: Origin, + + /// Operation the will be evaluated + filter: OperationTerm, +} + +impl ChaseFilter { + /// Create a new [ChaseFilter]. + pub(crate) fn new(origin: Origin, filter: OperationTerm) -> Self { + Self { origin, filter } + } + + /// Return the filter that is being applied. + pub(crate) fn filter(&self) -> &OperationTerm { + &self.filter + } +} diff --git a/nemo/src/chase_model/components/import.rs b/nemo/src/chase_model/components/import.rs new file mode 100644 index 000000000..dd9fd0033 --- /dev/null +++ b/nemo/src/chase_model/components/import.rs @@ -0,0 +1,59 @@ +//! This module defines [ChaseImport]. + +use crate::{ + io::formats::ImportExportHandler, + rule_model::{components::tag::Tag, origin::Origin}, +}; + +use super::ChaseComponent; + +/// Component for handling imports +#[derive(Debug)] +pub(crate) struct ChaseImport { + /// Origin of this component + origin: Origin, + + /// Predicate that will contain the data + predicate: Tag, + /// Handler object responsible for importing data + handler: Box, +} + +impl ChaseImport { + /// Create a new [ChaseImport]. + pub(crate) fn new( + origin: Origin, + predicate: Tag, + handler: Box, + ) -> Self { + Self { + origin, + predicate, + handler, + } + } + + /// Return the predicate. + pub(crate) fn predicate(&self) -> &Tag { + &self.predicate + } + + /// Return the handler. + pub(crate) fn handler(&self) -> &Box { + &self.handler + } +} + +impl ChaseComponent for ChaseImport { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/operation.rs b/nemo/src/chase_model/components/operation.rs new file mode 100644 index 000000000..d9ab7bc29 --- /dev/null +++ b/nemo/src/chase_model/components/operation.rs @@ -0,0 +1,54 @@ +//! This module defines [ChaseOperation]. + +use crate::rule_model::{components::term::primitive::variable::Variable, origin::Origin}; + +use super::{term::operation_term::OperationTerm, ChaseComponent}; + +/// Indicates that a new value must be created according to [OperationTerm]. +/// +/// The result will be "stored" in the given variable. +#[derive(Debug)] +pub(crate) struct ChaseOperation { + /// Origin of this component + origin: Origin, + + /// Variable that will hold the result of this operation + output_variable: Variable, + /// Operation the will be evaluated + operation: OperationTerm, +} + +impl ChaseOperation { + /// Create a new [ChaseOperation]. + pub(crate) fn new(origin: Origin, output_variable: Variable, operation: OperationTerm) -> Self { + Self { + origin, + output_variable, + operation, + } + } + + /// Return the variable which associated with the result of this constructor. + pub(crate) fn variable(&self) -> &Variable { + &self.output_variable + } + + /// Return the operation that is being evaluated. + pub(crate) fn operation(&self) -> &OperationTerm { + &self.operation + } +} + +impl ChaseComponent for ChaseOperation { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/program.rs b/nemo/src/chase_model/components/program.rs new file mode 100644 index 000000000..a7c01c171 --- /dev/null +++ b/nemo/src/chase_model/components/program.rs @@ -0,0 +1,47 @@ +//! This module defines [ChaseProgram]. + +use crate::rule_model::components::tag::Tag; + +use super::{atom::ground_atom::GroundAtom, rule::ChaseRule}; + +#[derive(Debug, Default)] +pub(crate) struct ChaseProgram { + // import_handlers: Vec<(Identifier, Box)>, + // export_handlers: Vec<(Identifier, Box)>, + /// Rules + rules: Vec, + /// Facts + facts: Vec, + /// Predicates marked as output + output_predicates: Vec, +} + +impl ChaseProgram { + /// Create a new [ChaseProgram]. + pub(crate) fn new( + rules: Vec, + facts: Vec, + output_predicates: Vec, + ) -> Self { + Self { + rules, + facts, + output_predicates, + } + } + + /// Add a new rule to the program. + pub(crate) fn add_rule(&mut self, rule: ChaseRule) { + self.rules.push(rule) + } + + /// Add a new fact to the program. + pub(crate) fn add_fact(&mut self, fact: GroundAtom) { + self.facts.push(fact) + } + + /// Add a new output predicate to the program. + pub(crate) fn add_output_predicate(&mut self, predicate: Tag) { + self.output_predicates.push(predicate) + } +} diff --git a/nemo/src/chase_model/components/rule.rs b/nemo/src/chase_model/components/rule.rs new file mode 100644 index 000000000..6baa5317e --- /dev/null +++ b/nemo/src/chase_model/components/rule.rs @@ -0,0 +1,144 @@ +//! This module defines [ChaseRule]. + +use crate::rule_model::origin::Origin; + +use super::{ + aggregate::ChaseAggregate, + atom::{primitive_atom::PrimitiveAtom, variable_atom::VariableAtom}, + filter::ChaseFilter, + operation::ChaseOperation, + ChaseComponent, +}; + +/// The positive body of a [ChaseRule] +#[derive(Debug, Default)] +struct ChaseRuleBodyPositive { + /// Atoms that bind variables + atoms: Vec, + /// Computation of new bindings + operations: Vec, + /// Filtering of results + filters: Vec, +} + +/// The negative body of a [ChaseRule] +#[derive(Debug, Default)] +struct ChaseRuleBodyNegative { + /// Negated atoms + atoms: Vec, + /// For each negated atom, the filters that are applied + filters: Vec>, +} + +/// Handling of aggregation within a [ChaseRule] +#[derive(Debug, Default)] +struct ChaseRuleAggregation { + /// Aggregate + aggregate: Option, + + /// New values created from the aggregation result + operations: Vec, + /// Filters based on the aggregation result + filters: Vec, +} + +/// Head of a [ChaseRule] +#[derive(Debug, Default)] +struct ChaseRuleHead { + /// Head atoms of the rule + atoms: Vec, + /// Index of the head atom which contains the aggregate + aggregate_head_index: Option, +} + +/// Representation of a rule in a [ChaseProgram][super::program::ChaseProgram] +#[allow(dead_code)] +#[derive(Debug, Default)] +pub(crate) struct ChaseRule { + /// Origin of this component + origin: Origin, + + /// Positive part of the body + positive: ChaseRuleBodyPositive, + /// Negative part of the body + negative: ChaseRuleBodyNegative, + /// Aggregation + aggregation: ChaseRuleAggregation, + /// Head of the rule + head: ChaseRuleHead, +} + +impl ChaseRule { + /// Add an atom to the positive part of the body. + pub(crate) fn add_positive_atom(&mut self, atom: VariableAtom) { + self.positive.atoms.push(atom); + } + + /// Add an operation for the positive part of the body. + pub(crate) fn add_positive_operation(&mut self, operation: ChaseOperation) { + self.positive.operations.push(operation) + } + + /// Add a filter to the positive part of the body. + pub(crate) fn add_positive_filter(&mut self, filter: ChaseFilter) { + self.positive.filters.push(filter); + } + + /// Add an atom to the negative part of the body. + pub(crate) fn add_negative_atom(&mut self, atom: VariableAtom) { + self.negative.atoms.push(atom); + self.negative.filters.push(Vec::default()) + } + + /// Add a filter to the negative part of the body. + pub(crate) fn add_negative_filter(&mut self, atom_index: usize, filter: ChaseFilter) { + self.negative.filters[atom_index].push(filter) + } + + /// Add a filter to the negative part of the body. + /// + /// # Panics + /// Panics if the current filter vector is empty. + pub(crate) fn add_negative_filter_last(&mut self, filter: ChaseFilter) { + self.negative + .filters + .last_mut() + .expect("expected a filter slot") + .push(filter) + } + + /// Add a new aggregation operation to the rule. + pub(crate) fn add_aggregation(&mut self, aggregate: ChaseAggregate, head_index: usize) { + self.aggregation.aggregate = Some(aggregate); + self.head.aggregate_head_index = Some(head_index); + } + + /// Add a new operation that uses the result of aggregation. + pub(crate) fn add_aggregation_operation(&mut self, operation: ChaseOperation) { + self.aggregation.operations.push(operation); + } + + /// Add a new filter that uses the result of aggregation. + pub(crate) fn add_aggregation_filter(&mut self, filter: ChaseFilter) { + self.aggregation.filters.push(filter); + } + + /// Add a new atom to the head of the rule. + pub(crate) fn add_head_atom(&mut self, atom: PrimitiveAtom) { + self.head.atoms.push(atom) + } +} + +impl ChaseComponent for ChaseRule { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/term.rs b/nemo/src/chase_model/components/term.rs new file mode 100644 index 000000000..03d826e85 --- /dev/null +++ b/nemo/src/chase_model/components/term.rs @@ -0,0 +1,3 @@ +//! This module contains all terms defined in the chase model. + +pub(crate) mod operation_term; diff --git a/nemo/src/chase_model/components/term/operation_term.rs b/nemo/src/chase_model/components/term/operation_term.rs new file mode 100644 index 000000000..155bebed3 --- /dev/null +++ b/nemo/src/chase_model/components/term/operation_term.rs @@ -0,0 +1,39 @@ +//! This module defines [Operation] and [OperationTerm]. + +use crate::rule_model::{ + components::term::{operation::operation_kind::OperationKind, primitive::Primitive}, + origin::Origin, +}; + +/// Operation +/// +/// An action or computation performed on [Term]s. +/// This can include for example arithmetic or string operations. +#[derive(Debug)] +pub(crate) struct Operation { + /// Origin of this component + origin: Origin, + + /// The kind of operation + kind: OperationKind, + /// The input arguments for the operation + subterms: Vec, +} + +impl Operation { + /// Create a new [Operation]. + pub(crate) fn new(origin: Origin, kind: OperationKind, subterms: Vec) -> Self { + Self { + origin, + kind, + subterms, + } + } +} + +/// Term that can be evaluated +#[derive(Debug)] +pub(crate) enum OperationTerm { + Primitive(Primitive), + Operation(Operation), +} diff --git a/nemo/src/chase_model/translation.rs b/nemo/src/chase_model/translation.rs new file mode 100644 index 000000000..0f91016ec --- /dev/null +++ b/nemo/src/chase_model/translation.rs @@ -0,0 +1,52 @@ +//! This module defines [ProgramChaseTranslation]. + +pub(crate) mod aggregate; +pub(crate) mod fact; +pub(crate) mod filter; +pub(crate) mod import; +pub(crate) mod operation; +pub(crate) mod rule; + +use crate::rule_model::program::Program; + +use super::components::program::ChaseProgram; + +/// Object for translating a [Program] into a [ChaseProgram] +#[derive(Debug)] +pub(crate) struct ProgramChaseTranslation { + fresh_variable_counter: usize, +} + +impl ProgramChaseTranslation { + /// Initialize a new [ProgramChaseTranslation]. + pub fn new() -> Self { + Self { + fresh_variable_counter: 0, + } + } + + /// Translate a [Program] into a [ChaseProgram]. + pub(crate) fn translate(&mut self, mut program: Program) -> ChaseProgram { + let mut result = ChaseProgram::default(); + + for fact in program.facts() { + result.add_fact(self.build_fact(fact)); + } + + for rule in program.rules_mut() { + result.add_rule(self.build_rule(rule)); + } + + for output in program.outputs() { + result.add_output_predicate(output.predicate().clone()); + } + + result + } + + /// Create a fresh variable name + fn create_fresh_variable(&mut self) -> String { + self.fresh_variable_counter += 1; + format!("__VARIABLE_{}", self.fresh_variable_counter) + } +} diff --git a/nemo/src/chase_model/translation/aggregate.rs b/nemo/src/chase_model/translation/aggregate.rs new file mode 100644 index 000000000..337fac79f --- /dev/null +++ b/nemo/src/chase_model/translation/aggregate.rs @@ -0,0 +1,142 @@ +//! This module contains functions for creating [ChaseAggregate]s. + +use std::collections::HashSet; + +use crate::{ + chase_model::components::{ + aggregate::ChaseAggregate, + operation::ChaseOperation, + rule::ChaseRule, + term::operation_term::{Operation, OperationTerm}, + }, + rule_model::components::{ + term::{ + primitive::{variable::Variable, Primitive}, + Term, + }, + ProgramComponent, + }, +}; + +use super::ProgramChaseTranslation; + +impl ProgramChaseTranslation { + /// Create a [ChaseAggregate] from a given + /// [Aggregate][crate::rule_model::components::term::aggregate::Aggregate]. + /// + /// # Panics + /// Panics if aggregation term contains a structured term or another aggregation. + pub(crate) fn build_aggregate( + &mut self, + result: &mut ChaseRule, + aggregate: &crate::rule_model::components::term::aggregate::Aggregate, + group_by_variables: &HashSet, + ) -> ChaseAggregate { + let origin = aggregate.origin().clone(); + let kind = aggregate.kind(); + let input_variable = match aggregate.aggregate_term() { + Term::Primitive(Primitive::Variable(variable)) => variable.clone(), + Term::Primitive(primitive) => { + let new_variable = Variable::universal(&self.create_fresh_variable()); + result.add_positive_operation(ChaseOperation::new( + origin, + new_variable.clone(), + OperationTerm::Primitive(primitive.clone()), + )); + + new_variable + } + Term::Operation(operation) => { + let new_variable = Variable::universal(&self.create_fresh_variable()); + result.add_positive_operation(ChaseOperation::new( + origin, + new_variable.clone(), + Self::build_operation_term(operation), + )); + + new_variable + } + Term::Aggregate(_) => unreachable!("invalid program: Recursive aggregates not allowed"), + _ => unreachable!("invalid program: complex terms not allowed"), + }; + let output_variable = Variable::universal(&self.create_fresh_variable()); + let distinct_variables = aggregate.distinct().cloned().collect(); + + ChaseAggregate::new( + origin, + kind, + input_variable, + output_variable, + distinct_variables, + group_by_variables.clone(), + ) + } + + /// Create an [OperationTerm] from a given + /// [Operation][crate::rule_model::components::term::operation::Operation]. + /// + /// If this function encounters an aggregate it will use its `output_variable` instead. + /// In this case the given `chase_aggregate` parameter will be set. + /// + /// # Panics + /// Panics if the operation is not "pure", i.e. if it contains as subterms + /// terms that are not operations or primitive terms. + fn build_operation_term_with_aggregate( + &mut self, + result: &mut ChaseRule, + operation: &crate::rule_model::components::term::operation::Operation, + group_by_variables: &HashSet, + chase_aggregate: &mut Option, + ) -> OperationTerm { + let origin = operation.origin().clone(); + let kind = operation.kind(); + let mut subterms = Vec::new(); + + for argument in operation.arguments() { + match argument { + Term::Primitive(primitive) => { + subterms.push(OperationTerm::Primitive(primitive.clone())) + } + Term::Operation(operation) => subterms.push(Self::build_operation_term(operation)), + Term::Aggregate(aggregate) => { + let new_aggregate = self.build_aggregate(result, aggregate, group_by_variables); + + subterms.push(OperationTerm::Primitive(Primitive::Variable( + new_aggregate.output_variable().clone(), + ))); + + *chase_aggregate = Some(new_aggregate); + } + _ => unreachable!( + "invalid program: operation term does not only consist of operation terms" + ), + } + } + + OperationTerm::Operation(Operation::new(origin, kind, subterms)) + } + + /// Create a [ChaseOperation] from a given + /// [Operation][crate::rule_model::components::term::operation::Operation]. + /// that may potentially contain an aggregate. + /// If this is the case, then `chase_aggregate` will be set to `Some`. + /// + /// # Panics + /// Panics if operation contains complex terms or multiple aggregates. + pub(crate) fn build_operation_with_aggregate( + &mut self, + result: &mut ChaseRule, + operation: &crate::rule_model::components::term::operation::Operation, + group_by_variables: &HashSet, + output_variable: Variable, + chase_aggregate: &mut Option, + ) -> ChaseOperation { + let operation_term = self.build_operation_term_with_aggregate( + result, + operation, + group_by_variables, + chase_aggregate, + ); + ChaseOperation::new(operation.origin().clone(), output_variable, operation_term) + } +} diff --git a/nemo/src/chase_model/translation/fact.rs b/nemo/src/chase_model/translation/fact.rs new file mode 100644 index 000000000..f799a22a8 --- /dev/null +++ b/nemo/src/chase_model/translation/fact.rs @@ -0,0 +1,43 @@ +//! This module defines a function for translating +//! logical facts into chase facts. + +use crate::{ + chase_model::components::atom::ground_atom::GroundAtom, + rule_model::components::{ + term::{primitive::Primitive, Term}, + ProgramComponent, + }, +}; + +use super::ProgramChaseTranslation; + +impl ProgramChaseTranslation { + /// Translate a [Fact][crate::rule_model::components::fact::Fact] + /// into a [GroundAtom]. + /// + /// # Panics + /// Panics if the facts contains non-primitive terms or variables. + pub(crate) fn build_fact( + &mut self, + fact: &crate::rule_model::components::fact::Fact, + ) -> GroundAtom { + let origin = fact.origin().clone(); + let predicate = fact.predicate().clone(); + let mut terms = Vec::new(); + + for term in fact.subterms() { + if let Term::Primitive(primitive) = term { + if let Primitive::Ground(value) = primitive { + terms.push(value.clone()); + continue; + } else { + unreachable!("invalid program: fact contains non-ground values") + } + } else { + unreachable!("invalid program: fact contains non-primitive values") + } + } + + GroundAtom::new(origin, predicate, terms) + } +} diff --git a/nemo/src/chase_model/translation/filter.rs b/nemo/src/chase_model/translation/filter.rs new file mode 100644 index 000000000..cea73886b --- /dev/null +++ b/nemo/src/chase_model/translation/filter.rs @@ -0,0 +1,54 @@ +//! This module contains functions for creating [ChaseFilter]s + +use crate::{ + chase_model::components::{ + filter::ChaseFilter, + term::operation_term::{Operation, OperationTerm}, + }, + rule_model::components::{ + term::{ + operation::operation_kind::OperationKind, + primitive::{variable::Variable, Primitive}, + }, + ProgramComponent, + }, +}; + +use super::ProgramChaseTranslation; + +impl ProgramChaseTranslation { + /// Create a new filter that only allows the variable to take on the values + /// of the result of the given [Operation][crate::rule_model::components::term::operation::Operation]. + pub(crate) fn build_filter_operation( + variable: &Variable, + operation: &crate::rule_model::components::term::operation::Operation, + ) -> ChaseFilter { + let origin = operation.origin().clone(); + let operation = Self::build_operation_term(operation); + + let filter = OperationTerm::Operation(Operation::new( + origin.clone(), + OperationKind::Equal, + vec![ + OperationTerm::Primitive(Primitive::from(variable.clone())), + operation, + ], + )); + + ChaseFilter::new(origin, filter) + } + + /// Create a new filter that binds the values of the variable to the provided primitive term. + pub(crate) fn build_filter_primitive(variable: &Variable, term: &Primitive) -> ChaseFilter { + let filter = Operation::new( + term.origin().clone(), + OperationKind::Equal, + vec![ + OperationTerm::Primitive(Primitive::from(variable.clone())), + OperationTerm::Primitive(term.clone()), + ], + ); + + ChaseFilter::new(term.origin().clone(), OperationTerm::Operation(filter)) + } +} diff --git a/nemo/src/chase_model/translation/import.rs b/nemo/src/chase_model/translation/import.rs new file mode 100644 index 000000000..0e5a6c5aa --- /dev/null +++ b/nemo/src/chase_model/translation/import.rs @@ -0,0 +1,216 @@ +//! This module contains functions for building [ChaseImport]s. + +use std::collections::HashMap; + +use oxiri::Iri; + +use crate::{ + chase_model::components::import::ChaseImport, + io::formats::{ + dsv::{value_format::DsvValueFormats, DsvHandler}, + json::JsonHandler, + rdf::{value_format::RdfValueFormats, RdfHandler, RdfVariant}, + Direction, ImportExportHandler, ImportExportResource, + }, + rule_model::components::{ + import_export::{ + attributes::ImportExportAttribute, compression::CompressionFormat, + file_formats::FileFormat, ImportExportDirective, + }, + tag::Tag, + term::Term, + ProgramComponent, + }, +}; + +use super::ProgramChaseTranslation; + +impl ProgramChaseTranslation { + /// Build a [ChaseImport] from a given + /// [ImportDirective][crate::rule_model::components::import_export::ImportDirective]. + pub(crate) fn build_import( + predicate_arity: &HashMap, + import: &crate::rule_model::components::import_export::ImportDirective, + ) -> ChaseImport { + let origin = import.origin().clone(); + let predicate = import.predicate().clone(); + let arity = predicate_arity.get(&predicate); + let attributes = import.attributes(); + + let handler = match import.file_format() { + FileFormat::CSV => { + Self::build_dsv_handler(Direction::Import, Some(','), arity, &attributes) + } + FileFormat::DSV => Self::build_dsv_handler(Direction::Import, None, arity, &attributes), + FileFormat::TSV => { + Self::build_dsv_handler(Direction::Import, Some('\t'), arity, &attributes) + } + FileFormat::JSON => todo!(), + FileFormat::NTriples => todo!(), + FileFormat::NQuads => todo!(), + FileFormat::Turtle => todo!(), + FileFormat::RDFXML => todo!(), + FileFormat::TriG => todo!(), + }; + + ChaseImport::new(origin, predicate, handler) + } + + /// Read resource attribute and check compression. + fn read_resource( + attributes: &HashMap, + ) -> (CompressionFormat, ImportExportResource) { + attributes + .get(&ImportExportAttribute::Resource) + .and_then(|term| ImportExportDirective::string_value(term)) + .map(|resource| CompressionFormat::from_resource(&resource)) + .map(|(format, resource)| (format, ImportExportResource::from_string(resource))) + .expect("invalid program: missing resource in import/export") + } + + /// Read the [DsvValueFormats] from the attributes. + fn read_dsv_value_formats( + attributes: &HashMap, + ) -> Option { + let term = attributes.get(&ImportExportAttribute::Format)?; + + if let Term::Tuple(tuple) = term { + Some( + DsvValueFormats::from_tuple(tuple) + .expect("invalid program: format attributed malformed in dsv import/export"), + ) + } else { + unreachable!("invalid program: format attributed malformed in dsv import/export") + } + } + + /// Read the [RdfValueFormats] from the attributes. + fn read_rdf_value_formats( + attributes: &HashMap, + ) -> Option { + let term = attributes.get(&ImportExportAttribute::Format)?; + + if let Term::Tuple(tuple) = term { + Some( + RdfValueFormats::from_tuple(tuple) + .expect("invalid program: format attributed malformed in rdf import/export"), + ) + } else { + unreachable!("invalid program: format attributed malformed in rdf import/export") + } + } + + /// Read the limit from the attributes. + fn read_limit(attributes: &HashMap) -> Option { + attributes + .get(&ImportExportAttribute::Limit) + .and_then(|term| ImportExportDirective::integer_value(term)) + .map(|limit| u64::try_from(limit).unwrap_or_default()) + } + + /// Read the compression format from the attributes. + fn read_compression( + attributes: &HashMap, + ) -> Option { + if let Some(term) = attributes.get(&ImportExportAttribute::Compression) { + return Some( + CompressionFormat::from_name( + &ImportExportDirective::string_value(term).expect( + "invalid program: compression given in wrong type in import/export", + ), + ) + .expect("invalid program: unknown compression format in import/export"), + ); + } + + None + } + + /// Read the iri base path from the attributes. + fn read_base(attributes: &HashMap) -> Option> { + let term = attributes.get(&ImportExportAttribute::Base)?; + Some(Iri::from( + ImportExportDirective::plain_value(term) + .expect("invalid program: base given in the wrong type"), + )) + } + + /// Build a [DsvHandler]. + fn build_dsv_handler( + direction: Direction, + delimiter: Option, + arity: Option, + attributes: &HashMap, + ) -> Box { + let (mut compression_format, resource) = Self::read_resource(attributes); + + let value_formats = Self::read_dsv_value_formats(attributes) + .or_else(DsvValueFormats::default(arity.unwrap_or_default())); + + let limit = Self::read_limit(attributes); + + let delimiter = if let Some(delimiter) = delimiter { + delimiter + } else { + let term = attributes + .get(&ImportExportAttribute::Delimiter) + .expect("invalid program: unknown delimiter in dsv import/export"); + let string = ImportExportDirective::string_value(term) + .expect("invalid program: delimiter given in wrong type in dsv import/export"); + string.as_bytes()[0] + }; + + if let Some(format) = Self::read_compression(attributes) { + compression_format = format; + } + + Box::new(DsvHandler::new( + delimiter, + resource, + value_formats, + limit, + compression_format, + direction, + )) + } + + /// Build an [RdfHandler]. + fn build_rdf_handler( + direction: Direction, + variant: RdfVariant, + arity: usize, + attributes: &HashMap, + ) -> Box { + let (mut compression_format, resource) = Self::read_resource(attributes); + + if let Some(format) = Self::read_compression(attributes) { + compression_format = format; + } + + let value_formats = + Self::read_rdf_value_formats(attributes).or_else(DsvValueFormats::default(arity)); + + let limit = Self::read_limit(attributes); + + let base = Self::read_base(attributes); + + Box::new(RdfHandler::new( + resource, + base, + variant, + value_formats, + limit, + compression_format, + direction, + )) + } + + /// Build a [JsonHandler]. + fn build_json_handler( + attributes: &HashMap, + ) -> Box { + let (_, resource) = Self::read_resource(attributes); + + Box::new(JsonHandler::new(resource)) + } +} diff --git a/nemo/src/chase_model/translation/operation.rs b/nemo/src/chase_model/translation/operation.rs new file mode 100644 index 000000000..d95868d9d --- /dev/null +++ b/nemo/src/chase_model/translation/operation.rs @@ -0,0 +1,61 @@ +//! This module defines a function from translating +//! operations in the logical model to operations in the chase model. + +use crate::{ + chase_model::components::{ + operation::ChaseOperation, + term::operation_term::{Operation, OperationTerm}, + }, + rule_model::components::{ + term::{primitive::variable::Variable, Term}, + ProgramComponent, + }, +}; + +use super::ProgramChaseTranslation; + +impl ProgramChaseTranslation { + /// Create an [OperationTerm] from a given + /// [Operation][crate::rule_model::components::term::operation::Operation]. + /// + /// # Panics + /// Panics if the operation is not "pure", i.e. if it contains as subterms + /// terms that are not operations or primitive terms. + pub(crate) fn build_operation_term( + operation: &crate::rule_model::components::term::operation::Operation, + ) -> OperationTerm { + let origin = operation.origin().clone(); + let kind = operation.kind(); + let mut subterms = Vec::new(); + + for argument in operation.arguments() { + match argument { + Term::Primitive(primitive) => { + subterms.push(OperationTerm::Primitive(primitive.clone())) + } + Term::Operation(operation) => subterms.push(Self::build_operation_term(operation)), + _ => unreachable!( + "invalid program: operation term does not only consist of operation terms" + ), + } + } + + OperationTerm::Operation(Operation::new(origin, kind, subterms)) + } + + /// Create a [ChaseOperation] form a given + /// [Operation][crate::rule_model::components::term::operation::Operation]. + /// + /// # Panics + /// Panics if the operation is not "pure", i.e. if it contains as subterms + /// terms that are not operations or primitive terms. + pub(crate) fn build_operation( + output_variable: &Variable, + operation: &crate::rule_model::components::term::operation::Operation, + ) -> ChaseOperation { + let origin = operation.origin().clone(); + let operation = Self::build_operation_term(operation); + + ChaseOperation::new(origin, output_variable.clone(), operation) + } +} diff --git a/nemo/src/chase_model/translation/rule.rs b/nemo/src/chase_model/translation/rule.rs new file mode 100644 index 000000000..ff7c0ba14 --- /dev/null +++ b/nemo/src/chase_model/translation/rule.rs @@ -0,0 +1,304 @@ +//! This module defines a function for translating logical rules into chase rules + +use std::collections::{HashMap, HashSet}; + +use crate::{ + chase_model::components::{ + aggregate::ChaseAggregate, + atom::{primitive_atom::PrimitiveAtom, variable_atom::VariableAtom}, + filter::ChaseFilter, + rule::ChaseRule, + }, + rule_model::components::{ + atom::Atom, + literal::Literal, + term::{ + primitive::{ + variable::{Variable, VariableName}, + Primitive, + }, + Term, + }, + IterableVariables, ProgramComponent, + }, +}; + +use super::ProgramChaseTranslation; + +impl ProgramChaseTranslation { + /// Translate a [rule][crate::rule_model::components::rule::Rule] + /// into a [ChaseRule]. + /// + /// # Panics + /// Panics if + /// * the rule contains structured terms + /// * the body contains any aggregates + pub(crate) fn build_rule( + &mut self, + rule: &mut crate::rule_model::components::rule::Rule, + ) -> ChaseRule { + let mut result = ChaseRule::default(); + + let variable_assignments = Self::variables_assignments(rule); + Self::apply_variable_assignment(rule, &variable_assignments); + + // Handle positive and negative atoms + for literal in rule.body() { + match literal { + Literal::Positive(atom) => { + let (variable_atom, filters) = self.build_body_atom(atom); + result.add_positive_atom(variable_atom); + for filter in filters { + result.add_positive_filter(filter); + } + } + Literal::Negative(atom) => { + let (variable_atom, filters) = self.build_body_atom(atom); + result.add_negative_atom(variable_atom); + for filter in filters { + result.add_negative_filter_last(filter); + } + } + Literal::Operation(_) => { + // Will be handled below + } + } + } + + // Handle operations + self.handle_operations(&mut result, rule); + + // Handle head + self.handle_head(&mut result, rule.head()); + + result + } + + /// Creates a map assigning variables that are equal to each other. + fn variables_assignments( + rule: &crate::rule_model::components::rule::Rule, + ) -> HashMap { + let mut assignment = HashMap::::new(); + + for literal in rule.body() { + if let Literal::Operation(operation) = literal { + if let Some((left, term)) = operation.variable_assignment() { + if let Term::Primitive(Primitive::Variable(right)) = term { + // Operation has the form ?left = ?right + if let Some(assigned) = assignment.get(left) { + assignment.insert(right.clone(), assigned.clone()); + } else if let Some(assigned) = assignment.get(right) { + assignment.insert(left.clone(), assigned.clone()); + } else { + assignment.insert(left.clone(), right.clone()); + } + } + } + } + } + + assignment + } + + /// Replace each variable occurring in the rule + /// according to the given variable assignment map. + fn apply_variable_assignment( + rule: &mut crate::rule_model::components::rule::Rule, + assignment: &HashMap, + ) { + for variable in rule.variables_mut() { + if let Some(new_variable) = assignment.get(variable) { + if let Some(name) = new_variable.name() { + let new_name = VariableName::new(name); + variable.rename(new_name); + } + } + } + } + + /// For a given positive body atom, return the corresponding [VariableAtom]. + /// + /// # Panics + /// Panics if atom contains a structured term or an aggregate. + fn build_body_atom(&mut self, atom: &Atom) -> (VariableAtom, Vec) { + let origin = atom.origin().clone(); + let predicate = atom.predicate().clone(); + let mut variables = Vec::new(); + + let mut filters = Vec::new(); + + let mut used_variables = HashSet::<&Variable>::new(); + + for argument in atom.arguments() { + match argument { + Term::Primitive(Primitive::Variable(variable)) => { + if !used_variables.insert(variable) { + // If the variable was already used in the same atom, + // we create a new variable + + let new_variable = Variable::universal(&self.create_fresh_variable()); + let new_filter = Self::build_filter_primitive( + &new_variable, + &Primitive::Variable(variable.clone()), + ); + + variables.push(new_variable); + filters.push(new_filter); + } else { + variables.push(variable.clone()); + } + } + Term::Primitive(primitive) => { + let new_variable = Variable::universal(&self.create_fresh_variable()); + let new_filter = Self::build_filter_primitive(&new_variable, primitive); + + variables.push(new_variable); + filters.push(new_filter); + } + Term::Operation(operation) => { + let new_variable = Variable::universal(&self.create_fresh_variable()); + let new_filter = Self::build_filter_operation(&new_variable, operation); + + variables.push(new_variable); + filters.push(new_filter); + } + _ => unreachable!( + "invalid program: body may not include structured terms or aggregates" + ), + } + } + + let variable_atom = VariableAtom::new(origin, predicate, variables); + (variable_atom, filters) + } + + /// Translates each [Operation][crate::rule_model::components::term::operation::Operation] + /// into a [ChaseFilter] or [ChaseOperation][crate::chase_model::components::operation::ChaseOperation] + /// depending on the occurring variables. + fn handle_operations( + &mut self, + result: &mut ChaseRule, + rule: &crate::rule_model::components::rule::Rule, + ) { + let mut derived_variables = rule.positive_variables(); + let mut handled_literals = HashSet::new(); + + // We compute a new value if + // * the operation has the form of an assignment + // * the "right-hand side" of the assignment only contains derived variables + // We compute derived variables by starting with variables + // that are bound by positive body literals + // and variables that are given a value through an assignment as outlined above + loop { + let current_count = derived_variables.len(); + + for (literal_index, literal) in rule.body().iter().enumerate() { + if handled_literals.contains(&literal_index) { + continue; + } + + if let Literal::Operation(operation) = literal { + if let Some((variable, term)) = operation.variable_assignment() { + if variable.is_universal() + && variable.name().is_some() + && term + .variables() + .all(|variable| derived_variables.contains(variable)) + { + derived_variables.insert(variable); + + let new_operation = Self::build_operation(variable, operation); + result.add_positive_operation(new_operation); + + handled_literals.insert(literal_index); + } + } + } + } + + if derived_variables.len() == current_count { + break; + } + } + + // The remaining operation terms become filters + for (literal_index, literal) in rule.body().iter().enumerate() { + if handled_literals.contains(&literal_index) { + continue; + } + + if let Literal::Operation(operation) = literal { + let new_operation = Self::build_operation_term(operation); + let new_filter = ChaseFilter::new(operation.origin().clone(), new_operation); + + result.add_positive_filter(new_filter); + } + } + } + + /// Translates each head atom into the [PrimitiveAtom], + /// while taking care of operations and aggregates. + fn handle_head(&mut self, result: &mut ChaseRule, head: &Vec) { + let mut chase_aggregate: Option = None; + + for atom in head { + let origin = atom.origin().clone(); + let predicate = atom.predicate().clone(); + let mut terms = Vec::new(); + + for (argument_index, argument) in atom.arguments().enumerate() { + let group_by_variables = + Self::compute_group_by_variables(atom.arguments(), argument_index); + + match argument { + Term::Primitive(primitive) => terms.push(primitive.clone()), + Term::Aggregate(aggregate) => { + let new_aggregate = + self.build_aggregate(result, aggregate, &group_by_variables); + + terms.push(Primitive::Variable(new_aggregate.output_variable().clone())); + chase_aggregate = Some(new_aggregate); + } + Term::Operation(operation) => { + let new_variable = Variable::universal(&self.create_fresh_variable()); + + let new_operation = self.build_operation_with_aggregate( + result, + operation, + &group_by_variables, + new_variable.clone(), + &mut chase_aggregate, + ); + + result.add_aggregation_operation(new_operation); + terms.push(Primitive::Variable(new_variable)); + } + _ => unreachable!("invalid program: rule head contains complex terms"), + } + } + + result.add_head_atom(PrimitiveAtom::new(origin, predicate, terms)) + } + } + + /// Compute group-by-variables for a head atom. + /// + /// Essentially, these are all variables contained in some terms + /// that are not the term containing the aggregate. + fn compute_group_by_variables<'a>( + terms: impl Iterator, + current_index: usize, + ) -> HashSet { + let mut result = HashSet::new(); + + for (term_index, term) in terms.enumerate() { + if term_index == current_index { + continue; + } + + result.extend(term.variables().cloned()); + } + + result + } +} diff --git a/nemo/src/error.rs b/nemo/src/error.rs index babce2bd7..003fc5553 100644 --- a/nemo/src/error.rs +++ b/nemo/src/error.rs @@ -6,8 +6,7 @@ use nemo_physical::datavalues::DataValueCreationError; use thiserror::Error; use crate::{ - execution::selection_strategy::strategy::SelectionStrategyError, - io::{formats::import_export::ImportExportError, parser::LocatedParseError}, + execution::selection_strategy::strategy::SelectionStrategyError, io::error::ImportExportError, program_analysis::analysis::RuleAnalysisError, }; @@ -35,9 +34,6 @@ pub enum Error { /// Rule analysis errors #[error(transparent)] RuleAnalysisError(#[from] RuleAnalysisError), - /// Parse errors - #[error(transparent)] - ParseError(#[from] LocatedParseError), /// IO Error #[error(transparent)] IO(#[from] std::io::Error), diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 6338bfae0..d84dae680 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -157,7 +157,7 @@ impl ExecutionEngine { .push(TableSource::from_simple_table(table)); } - // Add all the sources to the table mananager + // Add all the sources to the table manager for (predicate, sources) in predicate_to_sources { table_manager.add_edb(predicate, sources); } diff --git a/nemo/src/execution/tracing/trace.rs b/nemo/src/execution/tracing/trace.rs index d774ea1d6..f6da33b66 100644 --- a/nemo/src/execution/tracing/trace.rs +++ b/nemo/src/execution/tracing/trace.rs @@ -13,7 +13,7 @@ use serde::Serialize; use crate::model::{ chase_model::{ChaseAtom, ChaseFact}, - Atom, PrimitiveTerm, Program, Rule, Term, Variable, + Atom, PrimitiveTerm, Term, Variable, }; /// Index of a rule within a [Program] diff --git a/nemo/src/io.rs b/nemo/src/io.rs index 46defcab6..ec43e5561 100644 --- a/nemo/src/io.rs +++ b/nemo/src/io.rs @@ -3,11 +3,10 @@ //! This module acts as a mediation layer between the logical and physical layer and offers traits to allow both layers an abstract view on the io process. pub mod compression_format; +pub mod error; pub mod export_manager; pub mod formats; pub mod import_manager; -pub mod lexer; -pub mod parser; pub mod resource_providers; pub use export_manager::ExportManager; diff --git a/nemo/src/io/compression_format.rs b/nemo/src/io/compression_format.rs index 9cd76e28e..230d20c96 100644 --- a/nemo/src/io/compression_format.rs +++ b/nemo/src/io/compression_format.rs @@ -6,11 +6,11 @@ use std::{ path::PathBuf, }; -use flate2::bufread::MultiGzDecoder; -use flate2::{write::GzEncoder, Compression}; +use flate2::{bufread::MultiGzDecoder, write::GzEncoder, Compression}; + use nemo_physical::resource::Resource; -use crate::error::Error; +use crate::{error::Error, rule_model::components::import_export::compression::CompressionFormat}; /// Compression level for gzip output, cf. gzip(1): /// @@ -21,23 +21,13 @@ use crate::error::Error; /// > (that is, biased towards high compression at expense of speed). const GZIP_COMPRESSION_LEVEL: Compression = Compression::new(6); -/// Represent the compression of a file -#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] -pub enum CompressionFormat { - /// No file compression - #[default] - None, - /// Compress with Gzip - Gzip, -} - impl CompressionFormat { /// Derive a compression format from the file extension of the given resource, /// and return the compression format and the resource string without this extenions. pub fn from_resource(resource: &Resource) -> (CompressionFormat, Resource) { match resource { resource if resource.ends_with(".gz") => ( - CompressionFormat::Gzip, + CompressionFormat::GZip, resource.as_str()[0..resource.len() - 3].to_string(), ), _ => (CompressionFormat::None, resource.to_owned()), @@ -58,7 +48,7 @@ impl CompressionFormat { let writer = options.open(path)?; Ok(Box::new(writer)) } - CompressionFormat::Gzip => { + CompressionFormat::GZip => { let writer = GzEncoder::new(options.open(path)?, GZIP_COMPRESSION_LEVEL); Ok(Box::new(writer)) } @@ -75,7 +65,7 @@ impl CompressionFormat { pub fn try_decompression(&self, read: R) -> Option> { match self { Self::None => Some(Box::new(read)), - Self::Gzip => { + Self::GZip => { let gz_reader = MultiGzDecoder::new(read); if gz_reader.header().is_some() { Some(Box::new(BufReader::new(gz_reader))) @@ -90,7 +80,7 @@ impl CompressionFormat { pub(crate) fn extension(&self) -> Option<&str> { match self { Self::None => None, - Self::Gzip => Some("gz"), + Self::GZip => Some("gz"), } } @@ -114,12 +104,3 @@ impl CompressionFormat { } } } - -impl std::fmt::Display for CompressionFormat { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::None => write!(f, "None (no compression)"), - Self::Gzip => write!(f, "GZip"), - } - } -} diff --git a/nemo/src/io/error.rs b/nemo/src/io/error.rs new file mode 100644 index 000000000..cc55dbc72 --- /dev/null +++ b/nemo/src/io/error.rs @@ -0,0 +1,76 @@ +use std::path::PathBuf; + +use nemo_physical::datavalues::AnyDataValue; +use thiserror::Error; + +use crate::rule_model::components::import_export::file_formats::FileFormat; + +/// Errors related to the creation and usage of [ImportExportHandler]s. +#[derive(Debug, Error)] +pub enum ImportExportError { + /// Format is not supported for reading. + #[error(r#"Format "{0}" cannot be read"#)] + UnsupportedRead(FileFormat), + /// Format is not supported for writing. + #[error(r#"Format "{0}" cannot be written"#)] + UnsupportedWrite(FileFormat), + /// A required attribute is missing. + #[error(r#"Missing required attribute "{0}""#)] + MissingAttribute(String), + /// A given attribute is not valid for the format. + #[error(r#"Unknown attribute "{0}""#)] + UnknownAttribute(String), + /// File format name is not known. + #[error(r#"Unknown file format "{0}""#)] + UnknownFileFormat(String), + /// Attribute value is invalid. + #[error(r#"Invalid attribute value "{value}" for attribute "{attribute}": {description}"#)] + InvalidAttributeValue { + /// The given value. + value: AnyDataValue, + /// The attribute the value was given for. + attribute: AnyDataValue, + /// A description of why the value was invalid. + description: String, + }, + /// Value format is unsupported for this format. + #[error(r#"Unsupported value format "{value_format}" for format {format}"#)] + InvalidValueFormat { + /// The given value format. + value_format: String, + /// The file format. + format: FileFormat, + }, + /// Arity is unsupported for this format. + #[error(r#"import produces tuples of arity {arity}, but it should be arity {expected}"#)] + InvalidArity { + /// The given arity. + arity: usize, + /// The expected arity. + expected: usize, + }, + /// Arity is unsupported for this format, exact value is required. + #[error(r#"unsupported arity "{arity}" for format {format}, must be {required}"#)] + InvalidArityExact { + /// The given arity. + arity: usize, + /// The required arity. + required: usize, + /// The file format. + format: FileFormat, + }, + /// Format does not support complex types + #[error(r"Format {format} does not support complex types")] + UnsupportedComplexTypes { + /// The file format. + format: FileFormat, + }, + /// File could not be read + #[error(r#"File "{path}" could not be read."#)] + IoError { + /// Contains the wrapped error + error: std::io::Error, + /// Path that could not be read + path: PathBuf, + }, +} diff --git a/nemo/src/io/export_manager.rs b/nemo/src/io/export_manager.rs index 9f87f7de7..0c656b123 100644 --- a/nemo/src/io/export_manager.rs +++ b/nemo/src/io/export_manager.rs @@ -2,25 +2,16 @@ //! [ExportDirective]s and to write tables to files. use std::{ - fs::{create_dir_all, OpenOptions}, io::{ErrorKind, Write}, path::PathBuf, }; -use nemo_physical::datavalues::AnyDataValue; - -use crate::{ - error::Error, - io::formats::import_export::ImportExportHandlers, - model::{ExportDirective, Identifier}, -}; +use crate::{error::Error, rule_model::components::import_export::compression::CompressionFormat}; +use nemo_physical::datavalues::AnyDataValue; use sanitise_file_name::{sanitise_with_options, Options}; -use super::{ - compression_format::CompressionFormat, - formats::import_export::{ImportExportError, ImportExportHandler}, -}; +use super::formats::ImportExportHandler; /// Main object for exporting data to files and for accessing aspects /// of [ExportDirective]s that might be of public interest. @@ -32,16 +23,11 @@ pub struct ExportManager { overwrite: bool, /// If `true`, then writing operations will not be performed. The object can still be used for validation etc. disable_write: bool, - /// Default compression format to be used. - pub(crate) compression_format: CompressionFormat, + /// Compression format to be used. + compression_format: CompressionFormat, } impl ExportManager { - /// Constructor. - pub fn new() -> Self { - Default::default() - } - /// Use the given path as a base path. pub fn set_base_path(mut self, path: PathBuf) -> Self { self.base_path = path; @@ -59,7 +45,7 @@ impl ExportManager { /// choice. pub fn compress(mut self, compress: bool) -> Self { self.compression_format = if compress { - CompressionFormat::Gzip + CompressionFormat::GZip } else { CompressionFormat::None }; @@ -80,14 +66,14 @@ impl ExportManager { /// Validates the given [ExportDirective]. /// This also checks whether the specified file could (likely) be written. - pub fn validate(&self, export_directive: &ExportDirective) -> Result<(), Error> { - let handler = ImportExportHandlers::export_handler(export_directive)?; + pub fn validate(&self, handler: &dyn ImportExportHandler) -> Result<(), Error> { + // let handler = ImportExportHandlers::export_handler(export_directive)?; if handler.resource_is_stdout() { return Ok(()); } - let path = self.output_file_path(&*handler, export_directive.predicate()); + let path = self.output_file_path(handler); let meta_info = path.metadata(); if let Err(err) = meta_info { @@ -106,156 +92,142 @@ impl ExportManager { } } - /// Export a (possibly empty) table according to the given [ExportDirective]. - /// If the table is empty (i.e., [Option<_>::None]), an empty output file will be created. + /// Get the output file name for the given [ExportDirective]. /// - /// The `predicate_arity` is the arity of the predicate that is to be exported. This information - /// is used for validation and as a hint to exporters that were not initialized with details - /// about the arity. - pub fn export_table<'a>( - &self, - export_directive: &ExportDirective, - table: Option> + 'a>, - predicate_arity: usize, - ) -> Result { - if self.disable_write { - return Ok(false); - } - - let handler = ImportExportHandlers::export_handler(export_directive)?; - - let writer = self.writer(&*handler, export_directive.predicate())?; + /// This is a complete path (based on our base path), + /// which includes all extensions. + fn output_file_path(&self, export_handler: &dyn ImportExportHandler) -> PathBuf { + let mut pred_path = self.base_path.to_path_buf(); - self.export_table_with_handler_writer(&*handler, writer, table, predicate_arity)?; + let sanitize_options = Options::> { + url_safe: true, + ..Default::default() + }; - Ok(handler.resource_is_stdout()) - } + let file_name_unsafe = export_handler + .resource() + .unwrap_or_else(|| export_handler.file_extension()); + let file_name = sanitise_with_options(&file_name_unsafe, &sanitize_options); + pred_path.push(file_name); - /// Export a (possibly empty) table according to the given [ExportDirective], - /// but direct output into the given writer instead of using whatever - /// resource the directive specifies. - /// - /// The `predicate_arity` is the arity of the predicate that is to be exported. This information - /// is used for validation and as a hint to exporters that were not initialized with details - /// about the arity. - /// - /// This function ignores [ExportManager::disable_write]. - pub fn export_table_with_writer<'a>( - &self, - export_directive: &ExportDirective, - writer: Box, - table: Option> + 'a>, - predicate_arity: usize, - ) -> Result<(), Error> { - let handler = ImportExportHandlers::export_handler(export_directive)?; - self.export_table_with_handler_writer(&*handler, writer, table, predicate_arity) + pred_path = export_handler + .compression_format() + .path_with_extension(pred_path); + pred_path } /// Export a (possibly empty) table according to the given [ImportExportHandler], - /// and direct output into the given writer instead of using whatever - /// resource the handler specifies. + /// and direct output into the given writer. /// - /// The `predicate_arity` is the arity of the predicate that is to be exported. This information - /// is used for validation and as a hint to exporters that were not initialized with details - /// about the arity. + /// Nothing is written if writing is disabled. /// - /// This function ignores [ExportManager::disable_write]. + /// If this operation succeeds, then it returns `Ok(true)` if the resource is stdout + /// and `Ok(false)` otherwise. pub(crate) fn export_table_with_handler_writer<'a>( &self, export_handler: &dyn ImportExportHandler, writer: Box, table: Option> + 'a>, - predicate_arity: usize, - ) -> Result<(), Error> { - if let Some(export_arity) = export_handler.predicate_arity() { - if export_arity != predicate_arity { - return Err(ImportExportError::InvalidArity { - arity: export_arity, - expected: predicate_arity, - } - .into()); - } + ) -> Result { + if self.disable_write { + return Ok(false); } + if let Some(table) = table { - let table_writer = export_handler.writer(writer, predicate_arity)?; + let table_writer = export_handler.writer(writer)?; table_writer.export_table_data(Box::new(table))?; } - Ok(()) - } - - /// Create a writer based on an export handler. The predicate is used to - /// obtain a default file name if needed. - /// - /// This function may already create directories, and should not be used if - /// [ExportManager::disable_write] is `true`. - fn writer( - &self, - export_handler: &dyn ImportExportHandler, - predicate: &Identifier, - ) -> Result, Error> { - if export_handler.resource_is_stdout() { - Ok(Box::new(std::io::stdout().lock())) - } else { - let output_path = self.output_file_path(export_handler, predicate); - - log::info!( - "Exporting predicate \"{}\" to {output_path:?}", - predicate.name() - ); - - if let Some(parent) = output_path.parent() { - create_dir_all(parent)?; - } - export_handler - .compression_format() - .unwrap_or(self.compression_format) - .file_writer(output_path, Self::open_options(self.overwrite)) - } + Ok(export_handler.resource_is_stdout()) } - /// Get the output file name for the given [ExportDirective]. This is a complete path (based on our base path), - /// which includes all extensions. - fn output_file_path( - &self, - export_handler: &dyn ImportExportHandler, - predicate: &Identifier, - ) -> PathBuf { - let mut pred_path = self.base_path.to_path_buf(); - - let sanitise_options = Options::> { - url_safe: true, - ..Default::default() - }; - - let file_name_unsafe = export_handler.resource().unwrap_or_else(|| { - if let Some(ext) = export_handler.file_extension() { - predicate.name() + "." + ext.as_str() - } else { - predicate.name() - } - }); - let file_name = sanitise_with_options(&file_name_unsafe, &sanitise_options); - pred_path.push(file_name); - - pred_path = export_handler - .compression_format() - .unwrap_or(self.compression_format) - .path_with_extension(pred_path); - pred_path - } - - /// Provide suitable options writing to files under the given settings. - fn open_options(overwrite: bool) -> OpenOptions { - let mut options = OpenOptions::new(); - options.write(true); - - if overwrite { - options.create(true).truncate(true); - } else { - options.create_new(true); - }; - - options - } + // /// Export a (possibly empty) table according to the given [ExportDirective]. + // /// If the table is empty (i.e., [Option<_>::None]), an empty output file will be created. + // /// + // /// The `predicate_arity` is the arity of the predicate that is to be exported. This information + // /// is used for validation and as a hint to exporters that were not initialized with details + // /// about the arity. + // pub fn export_table<'a>( + // &self, + // export_directive: &ExportDirective, + // table: Option> + 'a>, + // predicate_arity: usize, + // ) -> Result { + // if self.disable_write { + // return Ok(false); + // } + + // let handler = ImportExportHandlers::export_handler(export_directive)?; + + // let writer = self.writer(&*handler, export_directive.predicate())?; + + // self.export_table_with_handler_writer(&*handler, writer, table, predicate_arity)?; + + // Ok(handler.resource_is_stdout()) + // } + + // /// Export a (possibly empty) table according to the given [ExportDirective], + // /// but direct output into the given writer instead of using whatever + // /// resource the directive specifies. + // /// + // /// The `predicate_arity` is the arity of the predicate that is to be exported. This information + // /// is used for validation and as a hint to exporters that were not initialized with details + // /// about the arity. + // /// + // /// This function ignores [ExportManager::disable_write]. + // pub fn export_table_with_writer<'a>( + // &self, + // export_directive: &ExportDirective, + // writer: Box, + // table: Option> + 'a>, + // predicate_arity: usize, + // ) -> Result<(), Error> { + // let handler = ImportExportHandlers::export_handler(export_directive)?; + // self.export_table_with_handler_writer(&*handler, writer, table, predicate_arity) + // } + + // /// Create a writer based on an export handler. The predicate is used to + // /// obtain a default file name if needed. + // /// + // /// This function may already create directories, and should not be used if + // /// [ExportManager::disable_write] is `true`. + // fn writer( + // &self, + // export_handler: &dyn ImportExportHandler, + // predicate: &Identifier, + // ) -> Result, Error> { + // if export_handler.resource_is_stdout() { + // Ok(Box::new(std::io::stdout().lock())) + // } else { + // let output_path = self.output_file_path(export_handler, predicate); + + // log::info!( + // "Exporting predicate \"{}\" to {output_path:?}", + // predicate.name() + // ); + + // if let Some(parent) = output_path.parent() { + // create_dir_all(parent)?; + // } + + // export_handler + // .compression_format() + // .unwrap_or(self.compression_format) + // .file_writer(output_path, Self::open_options(self.overwrite)) + // } + // } + + // /// Provide suitable options writing to files under the given settings. + // fn open_options(overwrite: bool) -> OpenOptions { + // let mut options = OpenOptions::new(); + // options.write(true); + + // if overwrite { + // options.create(true).truncate(true); + // } else { + // options.create_new(true); + // }; + + // options + // } } diff --git a/nemo/src/io/formats.rs b/nemo/src/io/formats.rs index 9b330b823..984a8169d 100644 --- a/nemo/src/io/formats.rs +++ b/nemo/src/io/formats.rs @@ -1,21 +1,129 @@ //! The input and output formats supported by Nemo. -pub(crate) mod import_export; -pub(crate) mod types; - pub mod dsv; -pub mod dsv_reader; -pub mod dsv_value_format; -pub mod dsv_writer; - +pub mod json; pub mod rdf; -pub mod rdf_reader; -pub mod rdf_writer; -pub mod json; -pub mod json_reader; +use std::io::{BufRead, Write}; + +use dyn_clone::DynClone; + +use nemo_physical::{ + datasources::table_providers::TableProvider, datavalues::AnyDataValue, resource::Resource, +}; -pub(crate) use dsv::DsvHandler; -pub(crate) use rdf::RdfHandler; +use crate::{ + error::Error, + rule_model::components::import_export::{ + compression::CompressionFormat, file_formats::FileFormat, + }, +}; const PROGRESS_NOTIFY_INCREMENT: u64 = 10_000_000; + +/// Representation of a resource (file, URL, etc.) for import or export. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) enum ImportExportResource { + /// A concrete resource string. + Resource(Resource), + /// Use stdout (only for export) + Stdout, +} + +impl ImportExportResource { + /// Convert a [String] to a [ImportExportResource]. + pub(crate) fn from_string(string: String) -> Self { + if string.is_empty() { + Self::Stdout + } else { + Self::Resource(string) + } + } + + /// Retrieve the contained resource, if any. + pub(crate) fn resource(&self) -> Option { + if let ImportExportResource::Resource(resource) = &self { + Some(resource.clone()) + } else { + None + } + } +} + +/// An [ImportExportHandler] represents a data format for input and/or output, and provides +/// specific methods for handling data of that format. Each handler is configured by format-specific +/// attributes, which define the behavior in detail, including the kind of data that this format +/// is compatible with. The attributes are provided when creating the format, and should then +/// be validated. +/// +/// An implementation of [ImportExportHandler] provides methods to validate and refine parameters +/// that were used with this format, to create suitable [TableProvider] and [TableWriter] objects +/// to read and write data in the given format, and to report information about the type of +/// data that this format can handle (such as predicate arity and type). +pub(crate) trait ImportExportHandler: std::fmt::Debug + DynClone + Send { + /// Return the associated [FileFormat]. + fn file_format(&self) -> FileFormat; + + /// Obtain a [TableProvider] for this format and the given reader, if supported. + /// + /// If reading is not supported, an error will be returned. + fn reader(&self, read: Box) -> Result, Error>; + + /// Obtain a [TableWriter] for this format and the given writer, if supported. + /// + /// If writing is not supported, an error will be returned. + fn writer(&self, writer: Box) -> Result, Error>; + + /// Obtain the resource used for this data exchange. + /// + /// In typical cases, this is the name of a file to read from or write to. + /// If no resource was specified, or if the resource is not identified by a + /// name (such as stdout), then `None` is returned. + fn resource(&self) -> Option { + self.import_export_resource().resource() + } + + /// Returns true if the selected resource is stdout. + fn resource_is_stdout(&self) -> bool { + self.import_export_resource() == &ImportExportResource::Stdout + } + + /// Returns the expected arity of the predicate related to this directive. + /// + /// For import, this is the arity of the data that is created, for export it is the + /// arity of the data that is consumed. + fn predicate_arity(&self) -> usize; + + /// Returns the default file extension for data of this format, if any. + /// This will be used when making default file names. + fn file_extension(&self) -> String; + + /// Returns the chosen compression format for imported/exported data. + fn compression_format(&self) -> CompressionFormat; + + /// Returns the [ImportExportResource] used for this data exchange. + fn import_export_resource(&self) -> &ImportExportResource; +} + +dyn_clone::clone_trait_object!(ImportExportHandler); + +/// Direction of import/export activities. +/// We often share code for the two directions, and a direction +/// is then used to enable smaller distinctions where needed. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub(crate) enum Direction { + /// Processing input. + Import, + /// Processing output. + Export, +} + +/// A trait for exporting table data, e.g., to some file. +// TODO Maybe this should be directly in io, since it is the interface to the OutputManager? +pub trait TableWriter { + /// Export a table. + fn export_table_data<'a>( + self: Box, + table: Box> + 'a>, + ) -> Result<(), Error>; +} diff --git a/nemo/src/io/formats/dsv.rs b/nemo/src/io/formats/dsv.rs index 4632bc0c9..30a66e4f5 100644 --- a/nemo/src/io/formats/dsv.rs +++ b/nemo/src/io/formats/dsv.rs @@ -1,29 +1,26 @@ //! Handler for resources of type DSV (delimiter-separated values). +pub(crate) mod reader; +pub(crate) mod value_format; +pub(crate) mod writer; + use std::io::{BufRead, Write}; use nemo_physical::datasources::table_providers::TableProvider; -use nemo_physical::datavalues::{AnyDataValue, MapDataValue}; +use reader::DsvReader; +use value_format::DsvValueFormats; +use writer::DsvWriter; -use crate::io::compression_format::CompressionFormat; -use crate::model::{ - PARAMETER_NAME_COMPRESSION, PARAMETER_NAME_DSV_DELIMITER, PARAMETER_NAME_FORMAT, - PARAMETER_NAME_LIMIT, PARAMETER_NAME_RESOURCE, -}; use crate::{ error::Error, - io::formats::types::{Direction, TableWriter}, - model::FileFormat, + rule_model::components::import_export::{ + compression::CompressionFormat, file_formats::FileFormat, + }, }; -use super::dsv_reader::DsvReader; -use super::dsv_value_format::DsvValueFormat; -use super::dsv_writer::DsvWriter; -use super::import_export::{ - ImportExportError, ImportExportHandler, ImportExportHandlers, ImportExportResource, -}; +use super::{Direction, ImportExportHandler, ImportExportResource, TableWriter}; -/// Internal enum to distnguish variants of the DSV format. +/// Internal enum to distinguish variants of the DSV format. #[allow(clippy::upper_case_acronyms)] enum DsvVariant { /// Delimiter-separated values @@ -45,152 +42,165 @@ pub(crate) struct DsvHandler { /// success for imports, so a concrete value is required there. resource: ImportExportResource, /// The list of value formats to be used for importing/exporting data. - /// If only the arity is given, this will use the most general export format - /// for each value (and the list will still be set). The list can be `None` - /// if neither formats nor arity were given for writing: in this case, a default - /// arity-based formats can be used if the arity is clear from another source. - value_formats: Option>, + value_formats: DsvValueFormats, /// Maximum number of statements that should be imported/exported. limit: Option, - /// Compression format to be used, if specified. This can also be inferred - /// from the resource, if given. So the only case where `None` is possible - /// is when no resource is given (during output). - compression_format: Option, + /// Compression format to be used + compression_format: CompressionFormat, /// Direction of the operation. direction: Direction, } impl DsvHandler { - /// Construct a DSV file handler with an arbitrary delimiter. - pub(crate) fn try_new_dsv( - attributes: &MapDataValue, - direction: Direction, - ) -> Result, ImportExportError> { - Self::try_new(DsvVariant::DSV, attributes, direction) - } - - /// Construct a CSV file handler. - pub(crate) fn try_new_csv( - attributes: &MapDataValue, - direction: Direction, - ) -> Result, ImportExportError> { - Self::try_new(DsvVariant::CSV, attributes, direction) - } - - /// Construct a TSV file handler. - pub(crate) fn try_new_tsv( - attributes: &MapDataValue, + /// Create a new [DsvHandler]. + pub(crate) fn new( + delimiter: u8, + resource: ImportExportResource, + value_formats: DsvValueFormats, + limit: Option, + compression_format: CompressionFormat, direction: Direction, - ) -> Result, ImportExportError> { - Self::try_new(DsvVariant::TSV, attributes, direction) - } - - /// Construct a DSV handler of the given variant. - fn try_new( - variant: DsvVariant, - attributes: &MapDataValue, - direction: Direction, - ) -> Result, ImportExportError> { - // Basic checks for unsupported attributes: - ImportExportHandlers::check_attributes( - attributes, - &[ - PARAMETER_NAME_FORMAT, - PARAMETER_NAME_RESOURCE, - PARAMETER_NAME_DSV_DELIMITER, - PARAMETER_NAME_COMPRESSION, - PARAMETER_NAME_LIMIT, - ], - )?; - - let delimiter = Self::extract_delimiter(variant, attributes)?; - let resource = ImportExportHandlers::extract_resource(attributes, direction)?; - let value_formats = Self::extract_value_formats(attributes)?; - let (compression_format, _) = - ImportExportHandlers::extract_compression_format(attributes, &resource)?; - let limit = - ImportExportHandlers::extract_unsigned_integer(attributes, PARAMETER_NAME_LIMIT, true)?; - - Ok(Box::new(Self { + ) -> Self { + Self { delimiter, resource, value_formats, limit, compression_format, direction, - })) - } - - fn extract_value_formats( - attributes: &MapDataValue, - ) -> Result>, ImportExportError> { - let value_format_strings = ImportExportHandlers::extract_value_format_strings(attributes)?; - - if let Some(format_strings) = value_format_strings { - Ok(Some(Self::formats_from_strings(format_strings)?)) - } else { - Ok(None) - } - } - - fn formats_from_strings( - value_format_strings: Vec, - ) -> Result, ImportExportError> { - let mut value_formats = Vec::with_capacity(value_format_strings.len()); - for s in value_format_strings { - value_formats.push(DsvValueFormat::from_string(s.as_str())?); - } - Ok(value_formats) - } - - fn extract_delimiter( - variant: DsvVariant, - attributes: &MapDataValue, - ) -> Result { - let delim_opt: Option; - if let Some(string) = - ImportExportHandlers::extract_string(attributes, PARAMETER_NAME_DSV_DELIMITER, true)? - { - if string.len() == 1 { - delim_opt = Some(string.as_bytes()[0]); - } else { - return Err(ImportExportError::invalid_att_value_error( - PARAMETER_NAME_DSV_DELIMITER, - AnyDataValue::new_plain_string(string.to_owned()), - "delimiter should be exactly one byte", - )); - } - } else { - delim_opt = None; } - - let delimiter: u8 = match (variant, delim_opt) { - (DsvVariant::DSV, Some(delim)) => delim, - (DsvVariant::DSV, None) => { - return Err(ImportExportError::MissingAttribute( - PARAMETER_NAME_DSV_DELIMITER.to_string(), - )); - } - (DsvVariant::CSV, None) => b',', - (DsvVariant::TSV, None) => b'\t', - (DsvVariant::CSV, Some(_)) | (DsvVariant::TSV, Some(_)) => { - return Err(ImportExportError::UnknownAttribute( - PARAMETER_NAME_DSV_DELIMITER.to_string(), - )); - } - }; - - Ok(delimiter) } - /// Returns the set value formats, or finds a default value based on the - /// required arity. - fn value_formats_or_default(&self, arity: usize) -> Vec { - self.value_formats.clone().unwrap_or_else(|| { - Self::formats_from_strings(ImportExportHandlers::default_value_format_strings(arity)) - .unwrap() - }) - } + // /// Construct a DSV file handler with an arbitrary delimiter. + // pub(crate) fn try_new_dsv( + // attributes: &MapDataValue, + // direction: Direction, + // ) -> Result, ImportExportError> { + // Self::try_new(DsvVariant::DSV, attributes, direction) + // } + + // /// Construct a CSV file handler. + // pub(crate) fn try_new_csv( + // attributes: &MapDataValue, + // direction: Direction, + // ) -> Result, ImportExportError> { + // Self::try_new(DsvVariant::CSV, attributes, direction) + // } + + // /// Construct a TSV file handler. + // pub(crate) fn try_new_tsv( + // attributes: &MapDataValue, + // direction: Direction, + // ) -> Result, ImportExportError> { + // Self::try_new(DsvVariant::TSV, attributes, direction) + // } + + // /// Construct a DSV handler of the given variant. + // fn try_new( + // variant: DsvVariant, + // attributes: &MapDataValue, + // direction: Direction, + // ) -> Result, ImportExportError> { + // // Basic checks for unsupported attributes: + // ImportExportHandlers::check_attributes( + // attributes, + // &[ + // PARAMETER_NAME_FORMAT, + // PARAMETER_NAME_RESOURCE, + // PARAMETER_NAME_DSV_DELIMITER, + // PARAMETER_NAME_COMPRESSION, + // PARAMETER_NAME_LIMIT, + // ], + // )?; + + // let delimiter = Self::extract_delimiter(variant, attributes)?; + // let resource = ImportExportHandlers::extract_resource(attributes, direction)?; + // let value_formats = Self::extract_value_formats(attributes)?; + // let (compression_format, _) = + // ImportExportHandlers::extract_compression_format(attributes, &resource)?; + // let limit = + // ImportExportHandlers::extract_unsigned_integer(attributes, PARAMETER_NAME_LIMIT, true)?; + + // Ok(Box::new(Self { + // delimiter, + // resource, + // value_formats, + // limit, + // compression_format, + // direction, + // })) + // } + + // fn extract_value_formats( + // attributes: &MapDataValue, + // ) -> Result>, ImportExportError> { + // let value_format_strings = ImportExportHandlers::extract_value_format_strings(attributes)?; + + // if let Some(format_strings) = value_format_strings { + // Ok(Some(Self::formats_from_strings(format_strings)?)) + // } else { + // Ok(None) + // } + // } + + // fn formats_from_strings( + // value_format_strings: Vec, + // ) -> Result, ImportExportError> { + // let mut value_formats = Vec::with_capacity(value_format_strings.len()); + // for s in value_format_strings { + // value_formats.push(DsvValueFormat::from_string(s.as_str())?); + // } + // Ok(value_formats) + // } + + // fn extract_delimiter( + // variant: DsvVariant, + // attributes: &MapDataValue, + // ) -> Result { + // let delim_opt: Option; + // if let Some(string) = + // ImportExportHandlers::extract_string(attributes, PARAMETER_NAME_DSV_DELIMITER, true)? + // { + // if string.len() == 1 { + // delim_opt = Some(string.as_bytes()[0]); + // } else { + // return Err(ImportExportError::invalid_att_value_error( + // PARAMETER_NAME_DSV_DELIMITER, + // AnyDataValue::new_plain_string(string.to_owned()), + // "delimiter should be exactly one byte", + // )); + // } + // } else { + // delim_opt = None; + // } + + // let delimiter: u8 = match (variant, delim_opt) { + // (DsvVariant::DSV, Some(delim)) => delim, + // (DsvVariant::DSV, None) => { + // return Err(ImportExportError::MissingAttribute( + // PARAMETER_NAME_DSV_DELIMITER.to_string(), + // )); + // } + // (DsvVariant::CSV, None) => b',', + // (DsvVariant::TSV, None) => b'\t', + // (DsvVariant::CSV, Some(_)) | (DsvVariant::TSV, Some(_)) => { + // return Err(ImportExportError::UnknownAttribute( + // PARAMETER_NAME_DSV_DELIMITER.to_string(), + // )); + // } + // }; + + // Ok(delimiter) + // } + + // /// Returns the set value formats, or finds a default value based on the + // /// required arity. + // fn value_formats_or_default(&self, arity: usize) -> Vec { + // self.value_formats.clone().unwrap_or_else(|| { + // Self::formats_from_strings(ImportExportHandlers::default_value_format_strings(arity)) + // .unwrap() + // }) + // } } impl ImportExportHandler for DsvHandler { @@ -202,53 +212,34 @@ impl ImportExportHandler for DsvHandler { } } - fn reader( - &self, - read: Box, - arity: usize, - ) -> Result, Error> { + fn reader(&self, read: Box) -> Result, Error> { Ok(Box::new(DsvReader::new( read, self.delimiter, - self.value_formats_or_default(arity), + self.value_formats.clone(), + None, self.limit, ))) } - fn writer(&self, writer: Box, arity: usize) -> Result, Error> { + fn writer(&self, writer: Box) -> Result, Error> { Ok(Box::new(DsvWriter::new( self.delimiter, writer, - self.value_formats_or_default(arity), + self.value_formats.clone(), self.limit, ))) } - fn predicate_arity(&self) -> Option { - match self.direction { - Direction::Import => self.value_formats.as_ref().map(|vfs| { - vfs.iter().fold(0, |acc, fmt| { - if *fmt == DsvValueFormat::Skip { - acc - } else { - acc + 1 - } - }) - }), - Direction::Export => self.value_formats.as_ref().map(|vfs| vfs.len()), - } + fn predicate_arity(&self) -> usize { + self.value_formats.arity() } - fn file_extension(&self) -> Option { - match self.file_format() { - FileFormat::CSV => Some("csv".to_string()), - FileFormat::DSV => Some("dsv".to_string()), - FileFormat::TSV => Some("tsv".to_string()), - _ => unreachable!(), - } + fn file_extension(&self) -> String { + self.file_format().extension().to_string() } - fn compression_format(&self) -> Option { + fn compression_format(&self) -> CompressionFormat { self.compression_format } diff --git a/nemo/src/io/formats/dsv_reader.rs b/nemo/src/io/formats/dsv/reader.rs similarity index 96% rename from nemo/src/io/formats/dsv_reader.rs rename to nemo/src/io/formats/dsv/reader.rs index 90e7d5186..c1341ff96 100644 --- a/nemo/src/io/formats/dsv_reader.rs +++ b/nemo/src/io/formats/dsv/reader.rs @@ -11,7 +11,7 @@ use nemo_physical::datasources::{table_providers::TableProvider, tuple_writer::T use crate::io::formats::PROGRESS_NOTIFY_INCREMENT; -use super::dsv_value_format::{DataValueParserFunction, DsvValueFormat}; +use super::value_format::{DataValueParserFunction, DsvValueFormat, DsvValueFormats}; /// A reader object for reading [DSV](https://en.wikipedia.org/wiki/Delimiter-separated_values) (delimiter separated values) files. /// @@ -22,10 +22,17 @@ use super::dsv_value_format::{DataValueParserFunction, DsvValueFormat}; /// Parsing of individual values can be done in several ways (DSV does not specify a data model at this level), /// as defined by [DsvValueFormat]. pub(super) struct DsvReader { + /// Buffer from which content is read read: Box, + + /// Delimiter used to separate values in the file delimiter: u8, + /// Escape character used escape: Option, - value_formats: Vec, + /// List of [DsvValueFormat] indicating for each column + /// the type of value parser that should be use + value_formats: DsvValueFormats, + /// Maximum number of entries that should be read. limit: Option, } @@ -34,15 +41,15 @@ impl DsvReader { pub(super) fn new( read: Box, delimiter: u8, - value_formats: Vec, + value_formats: DsvValueFormats, + escape: Option, limit: Option, ) -> Self { Self { read, delimiter, - //escape: b'\\', - escape: None, value_formats, + escape, limit, } } @@ -151,7 +158,10 @@ mod test { // use quickcheck_macros::quickcheck; use test_log::test; - use crate::io::formats::{dsv_reader::DsvReader, dsv_value_format::DsvValueFormat}; + use crate::io::formats::dsv::{ + reader::DsvReader, + value_format::{DsvValueFormat, DsvValueFormats}, + }; use nemo_physical::{datasources::tuple_writer::TupleWriter, management::database::Dict}; #[test] @@ -169,12 +179,13 @@ mod test { let reader = DsvReader::new( Box::new(data.as_bytes()), b';', - vec![ + DsvValueFormats::new(vec![ DsvValueFormat::Anything, DsvValueFormat::String, DsvValueFormat::Integer, DsvValueFormat::Double, - ], + ]), + None, None, ); let dict = RefCell::new(Dict::default()); diff --git a/nemo/src/io/formats/dsv_value_format.rs b/nemo/src/io/formats/dsv/value_format.rs similarity index 77% rename from nemo/src/io/formats/dsv_value_format.rs rename to nemo/src/io/formats/dsv/value_format.rs index 7720899cf..4fae5e541 100644 --- a/nemo/src/io/formats/dsv_value_format.rs +++ b/nemo/src/io/formats/dsv/value_format.rs @@ -1,19 +1,15 @@ //! This module defines the syntactic formats supported with values in DSV files. -//! This includes reading (parsing) and writing (serialization) for each supported -//! format. - -use oxiri::Iri; +//! This includes reading (parsing) and writing (serialization) +//! for each supported format. +use enum_assoc::Assoc; use nemo_physical::datavalues::{AnyDataValue, DataValue, DataValueCreationError}; -use crate::io::lexer::ParserState; -use crate::io::parser::types::Input; -use crate::model::{ - VALUE_FORMAT_ANY, VALUE_FORMAT_DOUBLE, VALUE_FORMAT_INT, VALUE_FORMAT_SKIP, VALUE_FORMAT_STRING, +use crate::{ + parser::{ast::token::Token, input::ParserInput, ParserState}, + rule_model::components::term::{primitive::Primitive, tuple::Tuple, Term}, + syntax::directive::value_formats, }; -use crate::{io::lexer::lex_tag, model::FileFormat}; - -use super::import_export::ImportExportError; pub(super) type DataValueParserFunction = fn(String) -> Result; @@ -23,42 +19,102 @@ pub(super) type DataValueSerializerFunction = fn(&AnyDataValue) -> Option &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] +pub(crate) enum DsvValueFormat { /// Format that tries various heuristics to interpret and represent values /// in the most natural way. The format can interpret any content (the final /// fallback is to use it as a string). + #[assoc(name = value_formats::ANY)] + #[assoc(from_name = value_formats::ANY)] Anything, /// Format that interprets the DSV values as literal string values. /// All data will be interpreted in this way. + #[assoc(name = value_formats::STRING)] + #[assoc(from_name = value_formats::STRING)] String, /// Format that interprets numeric DSV values as integers, and rejects /// all values that are not in this form. + #[assoc(name = value_formats::INT)] + #[assoc(from_name = value_formats::INT)] Integer, /// Format that interprets numeric DSV values as double-precision floating /// point numbers, and rejects all values that are not in this form. + #[assoc(name = value_formats::DOUBLE)] + #[assoc(from_name = value_formats::DOUBLE)] Double, /// Special format to indicate that the value should be skipped as if the whole /// column where not there. + #[assoc(name = value_formats::SKIP)] + #[assoc(from_name = value_formats::SKIP)] Skip, } -impl DsvValueFormat { - /// Try to convert a string name for a value format to one of the supported - /// DSV value formats, or return an error for unsupported formats. - pub(super) fn from_string(name: &str) -> Result { - match name { - VALUE_FORMAT_ANY => Ok(DsvValueFormat::Anything), - VALUE_FORMAT_STRING => Ok(DsvValueFormat::String), - VALUE_FORMAT_INT => Ok(DsvValueFormat::Integer), - VALUE_FORMAT_DOUBLE => Ok(DsvValueFormat::Double), - VALUE_FORMAT_SKIP => Ok(DsvValueFormat::Skip), - _ => Err(ImportExportError::InvalidValueFormat { - value_format: name.to_string(), - format: FileFormat::DSV, - }), + +/// Indicate what value parser should be used for each column. +#[derive(Debug, Clone)] +pub(crate) struct DsvValueFormats(Vec); + +impl DsvValueFormats { + pub(crate) fn new(formats: Vec) -> Self { + Self(formats) + } + + /// Return a list of [DsvValueFormat]s with default entries. + pub(crate) fn default(arity: usize) -> Self { + Self((0..arity).map(|_| DsvValueFormat::Anything).collect()) + } + + /// Create a [DsvValueFormats] from a [Tuple]. + /// + /// Returns `None` if tuple contains an unknown value. + pub(crate) fn from_tuple(tuple: &Tuple) -> Option { + let mut result = Vec::new(); + + for value in tuple.arguments() { + if let Term::Primitive(Primitive::Ground(ground)) = value { + if let Some(format) = DsvValueFormat::from_name(&ground.to_string()) { + result.push(format); + continue; + } + } + + return None; + } + + Some(Self::new(result)) + } + + /// Return the arity (ignoring the skipped columns) + pub(crate) fn arity(&self) -> usize { + let mut arity = 0; + + for &format in &self.0 { + if format != DsvValueFormat::Skip { + arity += 1; + } } + + arity + } + + /// Return the length of the format tuple. + pub(crate) fn len(&self) -> usize { + self.0.len() } + /// Return whether the tuple is empty. + pub(crate) fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Return an iterator over the [DsvValueFormat]s. + pub(crate) fn iter(&self) -> impl Iterator { + self.0.iter() + } +} + +impl DsvValueFormat { /// Return a function for parsing value strings for this format. pub(super) fn data_value_parser_function(&self) -> DataValueParserFunction { match self { @@ -145,18 +201,19 @@ impl DsvValueFormat { } // Check if it's a valid tag name - let refcell = std::cell::RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - if let Ok((remainder, _)) = lex_tag::>(Input::new(input, parser_state)) - { - if remainder.input.is_empty() { + let parser_input = ParserInput::new(input, ParserState::default()); + if let Ok((rest, _)) = Token::name(parser_input) { + if rest.span.0.is_empty() { return Ok(AnyDataValue::new_iri(input.to_string())); } } // Might still be a full IRI - if let Ok(iri) = Iri::parse(input) { - return Ok(AnyDataValue::new_iri(iri.to_string())); + let parser_input = ParserInput::new(input, ParserState::default()); + if let Ok((rest, iri)) = Token::iri(parser_input) { + if rest.span.0.is_empty() { + return Ok(AnyDataValue::new_iri(iri.to_string())); + } } // Otherwise treat the input as a string literal diff --git a/nemo/src/io/formats/dsv_writer.rs b/nemo/src/io/formats/dsv/writer.rs similarity index 86% rename from nemo/src/io/formats/dsv_writer.rs rename to nemo/src/io/formats/dsv/writer.rs index 231c8f78f..93a75c85a 100644 --- a/nemo/src/io/formats/dsv_writer.rs +++ b/nemo/src/io/formats/dsv/writer.rs @@ -5,11 +5,14 @@ use std::io::Write; use csv::{Writer, WriterBuilder}; use nemo_physical::datavalues::AnyDataValue; -use crate::{error::Error, io::formats::types::TableWriter}; +use crate::{ + error::Error, + io::formats::{ + dsv::value_format::DataValueSerializerFunction, TableWriter, PROGRESS_NOTIFY_INCREMENT, + }, +}; -use super::dsv_value_format::DataValueSerializerFunction; -use super::dsv_value_format::DsvValueFormat; -use super::PROGRESS_NOTIFY_INCREMENT; +use super::value_format::{DsvValueFormat, DsvValueFormats}; /// A writer object for writing [DSV](https://en.wikipedia.org/wiki/Delimiter-separated_values) (delimiter separated values) files. /// @@ -18,8 +21,13 @@ use super::PROGRESS_NOTIFY_INCREMENT; /// Writing of individual values can be done in several ways (DSV does not specify a data model at this level), /// as defined by [DsvValueFormat]. pub(super) struct DsvWriter { + /// Buffer to write into writer: Writer>, - value_formats: Vec, + + /// List of [DsvValueFormat] indicating for each column + /// the type of value parser that should be use + value_formats: DsvValueFormats, + /// Maximum number of entries that should be written. limit: Option, } @@ -27,7 +35,7 @@ impl DsvWriter { pub(super) fn new( delimiter: u8, writer: Box, - value_formats: Vec, + value_formats: DsvValueFormats, limit: Option, ) -> Self { DsvWriter { diff --git a/nemo/src/io/formats/import_export.rs b/nemo/src/io/formats/import_export.rs index abc212400..11234a7d3 100644 --- a/nemo/src/io/formats/import_export.rs +++ b/nemo/src/io/formats/import_export.rs @@ -1,626 +1,538 @@ -//! Definitions for the [ImportExportHandler] trait that provides the main -//! handle for supported file formats, and of [ImportExportHandlers] as a -//! main entry point for obtaining such handlers. - -use std::{ - collections::HashSet, - io::{BufRead, Write}, - path::PathBuf, -}; - -use dyn_clone::DynClone; -use nemo_physical::{ - datasources::table_providers::TableProvider, - datavalues::{AnyDataValue, DataValue, MapDataValue, TupleDataValue, ValueDomain}, - resource::Resource, -}; - -use crate::{ - error::Error, - io::compression_format::CompressionFormat, - model::{ - ExportDirective, FileFormat, ImportDirective, ImportExportDirective, - PARAMETER_NAME_COMPRESSION, PARAMETER_NAME_FORMAT, PARAMETER_NAME_RESOURCE, - VALUE_COMPRESSION_GZIP, VALUE_COMPRESSION_NONE, VALUE_FORMAT_ANY, VALUE_FORMAT_SKIP, - }, -}; - -use thiserror::Error; - -use super::{ - json::JsonHandler, - types::{Direction, TableWriter}, - DsvHandler, RdfHandler, -}; - -/// Representation of a resource (file, URL, etc.) for import or export. -#[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) enum ImportExportResource { - /// Value to indicate that the resource was not given. - Unspecified, - /// A concrete resource string. - Resource(Resource), - /// Use stdout (only for export) - Stdout, -} - -impl ImportExportResource { - /// Retrieve the contained resource, if any. - pub(crate) fn resource(&self) -> Option { - if let ImportExportResource::Resource(resource) = &self { - Some(resource.clone()) - } else { - None - } - } -} - -/// An [ImportExportHandler] represents a data format for input and/or output, and provides -/// specific methods for handling data of that format. Each handler is configured by format-specific -/// attributes, which define the behavior in detail, including the kind of data that this format -/// is compatible with. The attributes are provided when creating the format, and should then -/// be validated. -/// -/// An implementation of [ImportExportHandler] provides methods to validate and refine parameters -/// that were used with this format, to create suitable [TableProvider] and [TableWriter] objects -/// to read and write data in the given format, and to report information about the type of -/// data that this format can handle (such as predicate arity and type). -pub(crate) trait ImportExportHandler: std::fmt::Debug + DynClone + Send { - /// Return the associated [FileFormat]. - fn file_format(&self) -> FileFormat; - - /// Obtain a [TableProvider] for this format and the given reader, if supported. - /// If reading is not supported, an error will be returned. - /// - /// The arity is the arity of predicates that the caller expects to receive from this - /// call. If the handler has a fixed arity (as returned by [ImportExportHandler::arity]), - /// then this will normally be the same value (clashing arity requirements are detected - /// during program analysis). However, even if not speciied, the arity might also be inferred - /// from the usage of the imported data in a program, or other requirements known to the caller. - fn reader(&self, read: Box, arity: usize) - -> Result, Error>; - - /// Obtain a [TableWriter] for this format and the given writer, if supported. - /// If writing is not supported, an error will be returned. - /// - /// The arity is the arity of predicates that the caller would like to write. If the handler - /// has a fixed arity (as returned by [ImportExportHandler::arity]), - /// then this will normally be the same value (clashing arity requirements are detected - /// during program analysis). However, even if not speciied, the arity might still be known - /// in normal circumstances. - fn writer(&self, writer: Box, arity: usize) -> Result, Error>; - - /// Obtain the resource used for this data exchange. - /// In typical cases, this is the name of a file to read from or write to. - /// If no resource was specified, or if the resource is not identified by a - /// name (such as stdout), then `None` is returned. - fn resource(&self) -> Option { - self.import_export_resource().resource() - } - - /// Returns true if the selected resource is stdout. - fn resource_is_stdout(&self) -> bool { - self.import_export_resource() == &ImportExportResource::Stdout - } - - /// Returns the expected arity of the predicate related to this directive, if specified. - /// For import, this is the arity of the data that is created, for export it is the - /// arity of the data that is consumed. - fn predicate_arity(&self) -> Option; - - /// Returns the default file extension for data of this format, if any. - /// This will be used when making default file names. - fn file_extension(&self) -> Option; - - /// Returns the chosen compression format for imported/exported data. - fn compression_format(&self) -> Option; - - /// Returns the [ImportExportResource] used for this data exchange. - fn import_export_resource(&self) -> &ImportExportResource; -} - -dyn_clone::clone_trait_object!(ImportExportHandler); - -/// Struct with static methods to manage the conversion of [ImportExportDirective]s to -/// [ImportExportHandler]s. -pub(crate) struct ImportExportHandlers; - -impl ImportExportHandlers { - /// Obtain an [ImportExportHandler] for the given [ImportDirective], and return - /// an error if the given attributes are not suitable for the chosen format. - pub(crate) fn import_handler( - directive: &ImportDirective, - ) -> Result, ImportExportError> { - Self::handler(&directive.0, Direction::Import) - } - - /// Obtain an [ImportExportHandler] for the given [ExportDirective], and return - /// an error if the given attributes are not suitable for the chosen format. - pub(crate) fn export_handler( - directive: &ExportDirective, - ) -> Result, ImportExportError> { - Self::handler(&directive.0, Direction::Export) - } - - /// Obtain an [ImportExportHandler] for the given [ImportExportDirective], and return - /// an error if the given attributes are not suitable for the chosen format. - fn handler( - directive: &ImportExportDirective, - direction: Direction, - ) -> Result, ImportExportError> { - match directive.format { - FileFormat::CSV => DsvHandler::try_new_csv(&directive.attributes, direction), - FileFormat::DSV => DsvHandler::try_new_dsv(&directive.attributes, direction), - FileFormat::TSV => DsvHandler::try_new_tsv(&directive.attributes, direction), - FileFormat::JSON => { - if direction == Direction::Export { - Err(ImportExportError::UnsupportedWrite(FileFormat::JSON)) - } else { - JsonHandler::try_new_import(&directive.attributes) - } - } - FileFormat::RDF(variant) => { - RdfHandler::try_new(variant, &directive.attributes, direction) - } - } - } - - /// Check if all given attributes are among the valid attributes, - /// and return an error otherwise. - pub(super) fn check_attributes( - attributes: &MapDataValue, - valid_attributes: &[&str], - ) -> Result<(), ImportExportError> { - let given: HashSet = attributes - .map_keys() - .expect("map values always have keys") - .cloned() - .collect(); - let valid: HashSet = valid_attributes - .iter() - .map(|att| AnyDataValue::new_iri(att.to_string())) - .collect(); - - if let Some(unknown) = given.difference(&valid).next() { - return Err(ImportExportError::UnknownAttribute(unknown.to_string())); - } - Ok(()) - } - - /// Extract the resource from the given attributes. This can be [ImportExportResource::Unspecified] - /// for export (where we can use default names). If the value is invalid or missing for import, an - /// error is returned. - pub(super) fn extract_resource( - attributes: &MapDataValue, - direction: Direction, - ) -> Result { - let resource: Option = - Self::extract_string_or_iri(attributes, PARAMETER_NAME_RESOURCE, true)?; - - if let Some(string) = resource { - if string.is_empty() { - Ok(ImportExportResource::Stdout) - } else { - Ok(ImportExportResource::Resource(string)) - } - } else { - if direction == Direction::Import { - return Err(ImportExportError::MissingAttribute( - PARAMETER_NAME_RESOURCE.to_string(), - )); - } - Ok(ImportExportResource::Unspecified) - } - } - - /// Extract the compression format from the given attributes, and possibly resource. - /// If a resource is given, then the resource name without the compression-specific - /// extension is also returned. - /// - /// An error is returned if an unknown compression format was explicitly specified, - /// or if the compression format of the resource is not in agreement with an explicitly - /// stated one. - pub(super) fn extract_compression_format( - attributes: &MapDataValue, - resource: &ImportExportResource, - ) -> Result<(Option, Option), ImportExportError> { - let cf_name = Self::extract_string_or_iri(attributes, PARAMETER_NAME_COMPRESSION, true) - .expect("no errors with allow missing"); - - let stated_compression_format: Option; - if let Some(cf_name) = &cf_name { - match cf_name.as_str() { - VALUE_COMPRESSION_NONE => stated_compression_format = Some(CompressionFormat::None), - VALUE_COMPRESSION_GZIP => stated_compression_format = Some(CompressionFormat::Gzip), - _ => { - return Err(ImportExportError::invalid_att_value_error( - PARAMETER_NAME_COMPRESSION, - AnyDataValue::new_plain_string(cf_name.to_owned()), - format!( - "unknown compression format, supported formats: {:?}", - [VALUE_COMPRESSION_GZIP, VALUE_COMPRESSION_NONE] - ) - .as_str(), - )); - } - } - } else { - stated_compression_format = None; - } - - let resource_compression_format: Option; - let inner_resource: Option; - if let ImportExportResource::Resource(res) = resource { - let (rcf, inner_res) = CompressionFormat::from_resource(res); - resource_compression_format = Some(rcf); - inner_resource = Some(inner_res); - } else { - resource_compression_format = None; - inner_resource = None; - } - - match (stated_compression_format, resource_compression_format) { - (Some(scf), None) => Ok((Some(scf), inner_resource)), - (None, Some(rcf)) => Ok((Some(rcf), inner_resource)), - (Some(scf), Some(rcf)) => { - if scf == rcf { - Ok((Some(scf), inner_resource)) - } else { - Err(ImportExportError::invalid_att_value_error( - PARAMETER_NAME_COMPRESSION, - AnyDataValue::new_plain_string( - cf_name.expect("given if stated compression is known"), - ), - "compression method should match resource extension" - .to_string() - .as_str(), - )) - } - } - (None, None) => Ok((None, inner_resource)), - } - } - - /// Extract a string value for the given attribute name. Returns an error if the - /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). - /// It can be specified whether it should be allowed that the atttribute is not set at all (and - /// `None` would then be returned). If given, the value must always be a string, however. - pub(super) fn extract_string( - attributes: &MapDataValue, - attribute_name: &str, - allow_missing: bool, - ) -> Result, ImportExportError> { - if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { - match c.value_domain() { - ValueDomain::PlainString => Ok(Some(c.to_plain_string_unchecked())), - _ => Err(ImportExportError::invalid_att_value_error( - attribute_name, - c.clone(), - "expecting string value", - )), - } - } else { - Ok(None) - } - } - - /// Extract a string or IRI value for the given attribute name. Returns an error if the - /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). - /// It can be specified whether it should be allowed that the atttribute is not set at all (and - /// `None` would then be returned). If given, the value must always be a string or an IRI, however. - /// This method is used for parameters that are actually strings in nature, but where we conveniently want to - /// allow the user to omit the quotes. - pub(super) fn extract_string_or_iri( - attributes: &MapDataValue, - attribute_name: &str, - allow_missing: bool, - ) -> Result, ImportExportError> { - if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { - if let Some(s) = Self::string_from_datavalue(&c) { - Ok(Some(s)) - } else { - Err(ImportExportError::invalid_att_value_error( - attribute_name, - c.clone(), - "expecting string or IRI value", - )) - } - } else { - Ok(None) - } - } - - /// Extract an unsigned integer value for the given attribute name. Returns an error if the - /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). - /// It can be specified whether it should be allowed that the attribute is not set at all (and - /// `None` would then be returned). If given, the value must always be an integer, however. - pub(super) fn extract_unsigned_integer( - attributes: &MapDataValue, - attribute_name: &str, - allow_missing: bool, - ) -> Result, ImportExportError> { - if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { - if c.fits_into_u64() { - Ok(Some(c.to_u64_unchecked())) - } else { - Err(ImportExportError::invalid_att_value_error( - attribute_name, - c.clone(), - "expecting unsigned integer value that fits into 64bits", - )) - } - } else { - Ok(None) - } - } - - /// Extract an IRI value string for the given attribute name. Returns an error if the - /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). - /// It can be specified whether it should be allowed that the atttribute is not set at all (and - /// `None` would then be returned). If given, the value must always be an IRI, however. - pub(super) fn extract_iri( - attributes: &MapDataValue, - attribute_name: &str, - allow_missing: bool, - ) -> Result, ImportExportError> { - if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { - match c.value_domain() { - ValueDomain::Iri => Ok(Some(c.to_iri_unchecked())), - _ => Err(ImportExportError::invalid_att_value_error( - attribute_name, - c.clone(), - "expecting IRI value", - )), - } - } else { - Ok(None) - } - } - - /// Extract a value for the given attribute name. The boolean flag constrols - /// if an error should be generated if the attribute is missing, or if `Ok(None)` - /// should be returned in this case. - pub(super) fn extract_att_value( - attributes: &MapDataValue, - attribute_name: &str, - allow_missing: bool, - ) -> Result, ImportExportError> { - if let Some(c) = attributes.map_element(&AnyDataValue::new_iri(attribute_name.to_string())) - { - Ok(Some(c.clone())) - } else if allow_missing { - return Ok(None); - } else { - return Err(ImportExportError::MissingAttribute( - attribute_name.to_string(), - )); - } - } - - /// Extract the list of strings that specify value formats. If no list is given, `None` - /// is returned. Errors may occur if the attribute is given but the value is not a list of strings, - /// or if all values are skipped. - /// - /// See [ImportExportHandlers::extract_value_format_strings_and_arity] for a method that also - /// checks the arity information, and uses it to make default formats if needed. - pub(super) fn extract_value_format_strings( - attributes: &MapDataValue, - ) -> Result>, ImportExportError> { - let value_format_strings: Option>; - if let Some(c) = Self::extract_att_value(attributes, PARAMETER_NAME_FORMAT, true)? { - let mut value_formats: Vec = Vec::new(); - if c.value_domain() == ValueDomain::Tuple { - for i in 0..c.len_unchecked() { - let v = c.tuple_element_unchecked(i); - if let Some(s) = Self::string_from_datavalue(v) { - value_formats.push(s); - } else { - return Err(ImportExportError::invalid_att_value_error( - PARAMETER_NAME_FORMAT, - v.clone(), - "list must contain strings only", - )); - } - } - } else { - return Err(ImportExportError::invalid_att_value_error( - PARAMETER_NAME_FORMAT, - c.clone(), - "expecting list of value formats", - )); - } - value_format_strings = Some(value_formats); - } else { - value_format_strings = None; - } - - // Check if any non-skipped value is contained - if let Some(true) = value_format_strings - .as_ref() - .map(|v| v.iter().all(|fmt| *fmt == VALUE_FORMAT_SKIP)) - { - return Err(ImportExportError::invalid_att_value_error( - PARAMETER_NAME_FORMAT, - Self::datavalue_from_format_strings(&value_format_strings.expect("checked above")), - "cannot import/export zero-ary data", - )); - } - - Ok(value_format_strings) - } - - /// Returns a list of string names of value formats that can be used as a - /// default if only the arity of a predicate is known. - pub(super) fn default_value_format_strings(arity: usize) -> Vec { - vec![VALUE_FORMAT_ANY; arity] - .into_iter() - .map(|s| s.to_string()) - .collect() - } - - /// Get a list of value format strings while taking the expected arity of data in - /// the file into account. - /// - /// Formats will first be extracted from the attributes. For import, the total number - /// of formats must match the expected file arity. For export, the total number of - /// non-skip formats must match the expected file arity. - /// - /// If no formats are given, we assume that "skip" is not used, so file arity = - /// predicate arity = format number, and we can make a list of default value formats. - /// `None` is only returned if the file arity was not given (in which case this function - /// is the same as [ImportExportHandlers::extract_value_format_strings]). - /// - /// The given `file_arity` is not checked: callers are expected to have ensured that it - /// is a non-zero usize that fits into i64. - pub(super) fn extract_value_format_strings_with_file_arity( - attributes: &MapDataValue, - file_arity: Option, - direction: Direction, - ) -> Result>, ImportExportError> { - let value_format_strings: Option> = - Self::extract_value_format_strings(attributes)?; - - if let Some(file_arity) = file_arity { - if let Some(ref vfs) = value_format_strings { - let declared_file_arity = match direction { - Direction::Import => vfs.len(), - Direction::Export => vfs.iter().fold(0, |acc: usize, fmt| { - // Only count formats other than VALUE_FORMAT_SKIP: - if *fmt == VALUE_FORMAT_SKIP { - acc - } else { - acc + 1 - } - }), - }; - - // Check if arity is consistent with given value formats. - if file_arity != declared_file_arity { - return Err(ImportExportError::invalid_att_value_error( - PARAMETER_NAME_FORMAT, - Self::datavalue_from_format_strings(vfs), - format!( - "value format declaration must be compatible with expected arity {} of tuples in file", - file_arity - ) - .as_str(), - )); - } - - Ok(value_format_strings) - } else { - Ok(Some(Self::default_value_format_strings(file_arity))) - } - } else { - Ok(value_format_strings) - } - } - - /// Turn a list of formats into a data value for error reporting. - fn datavalue_from_format_strings(format_strings: &[String]) -> AnyDataValue { - TupleDataValue::from_iter( - format_strings - .iter() - .map(|format| AnyDataValue::new_plain_string(format.to_owned())) - .collect::>(), - ) - .into() - } - - /// Extract a string from an [AnyDataValue] that is a plain string - /// or IRI. This is in particularly used to allow users to omit the quotes - /// around simple attribute values. - fn string_from_datavalue(v: &AnyDataValue) -> Option { - match v.value_domain() { - ValueDomain::PlainString => Some(v.to_plain_string_unchecked()), - ValueDomain::Iri => Some(v.to_iri_unchecked()), - _ => None, - } - } -} - -/// Errors related to the creation and usage of [ImportExportHandler]s. -#[derive(Debug, Error)] -pub enum ImportExportError { - /// Format is not supported for reading. - #[error(r#"Format "{0}" cannot be read"#)] - UnsupportedRead(FileFormat), - /// Format is not supported for writing. - #[error(r#"Format "{0}" cannot be written"#)] - UnsupportedWrite(FileFormat), - /// A required attribute is missing. - #[error(r#"Missing required attribute "{0}""#)] - MissingAttribute(String), - /// A given attribute is not valid for the format. - #[error(r#"Unknown attribute "{0}""#)] - UnknownAttribute(String), - /// File format name is not known. - #[error(r#"Unknown file format "{0}""#)] - UnknownFileFormat(String), - /// Attribute value is invalid. - #[error(r#"Invalid attribute value "{value}" for attribute "{attribute}": {description}"#)] - InvalidAttributeValue { - /// The given value. - value: AnyDataValue, - /// The attribute the value was given for. - attribute: AnyDataValue, - /// A description of why the value was invalid. - description: String, - }, - /// Value format is unsupported for this format. - #[error(r#"Unsupported value format "{value_format}" for format {format}"#)] - InvalidValueFormat { - /// The given value format. - value_format: String, - /// The file format. - format: FileFormat, - }, - /// Arity is unsupported for this format. - #[error(r#"import produces tuples of arity {arity}, but it should be arity {expected}"#)] - InvalidArity { - /// The given arity. - arity: usize, - /// The expected arity. - expected: usize, - }, - /// Arity is unsupported for this format, exact value is required. - #[error(r#"unsupported arity "{arity}" for format {format}, must be {required}"#)] - InvalidArityExact { - /// The given arity. - arity: usize, - /// The required arity. - required: usize, - /// The file format. - format: FileFormat, - }, - /// Format does not support complex types - #[error(r"Format {format} does not support complex types")] - UnsupportedComplexTypes { - /// The file format. - format: FileFormat, - }, - /// File could not be read - #[error(r#"File "{path}" could not be read."#)] - IoError { - /// Contains the wrapped error - error: std::io::Error, - /// Path that could not be read - path: PathBuf, - }, -} - -impl ImportExportError { - /// Convenience method to create ImportExportError::InvalidAttributeValue from static strings, which is a common - /// task in handlers. - pub(crate) fn invalid_att_value_error( - attribute: &str, - value: AnyDataValue, - reason: &str, - ) -> ImportExportError { - ImportExportError::InvalidAttributeValue { - attribute: AnyDataValue::new_iri(attribute.to_string()), - value: value.clone(), - description: reason.to_string(), - } - } -} +// //! Definitions for the [ImportExportHandler] trait that provides the main +// //! handle for supported file formats, and of [ImportExportHandlers] as a +// //! main entry point for obtaining such handlers. + +// use std::{ +// collections::HashSet, +// io::{BufRead, Write}, +// path::PathBuf, +// }; + +// use dyn_clone::DynClone; +// use nemo_physical::{ +// datasources::table_providers::TableProvider, +// datavalues::{AnyDataValue, DataValue, MapDataValue, TupleDataValue, ValueDomain}, +// resource::Resource, +// }; + +// use crate::{ +// error::Error, +// io::compression_format::CompressionFormat, +// model::{ +// ExportDirective, FileFormat, ImportDirective, ImportExportDirective, +// PARAMETER_NAME_COMPRESSION, PARAMETER_NAME_FORMAT, PARAMETER_NAME_RESOURCE, +// VALUE_COMPRESSION_GZIP, VALUE_COMPRESSION_NONE, VALUE_FORMAT_ANY, VALUE_FORMAT_SKIP, +// }, +// }; + +// use thiserror::Error; + +// use super::{ +// json::JsonHandler, +// types::{Direction, TableWriter}, +// DsvHandler, RdfHandler, +// }; + +// /// Struct with static methods to manage the conversion of [ImportExportDirective]s to +// /// [ImportExportHandler]s. +// pub(crate) struct ImportExportHandlers; + +// impl ImportExportHandlers { +// /// Obtain an [ImportExportHandler] for the given [ImportDirective], and return +// /// an error if the given attributes are not suitable for the chosen format. +// pub(crate) fn import_handler( +// directive: &ImportDirective, +// ) -> Result, ImportExportError> { +// Self::handler(&directive.0, Direction::Import) +// } + +// /// Obtain an [ImportExportHandler] for the given [ExportDirective], and return +// /// an error if the given attributes are not suitable for the chosen format. +// pub(crate) fn export_handler( +// directive: &ExportDirective, +// ) -> Result, ImportExportError> { +// Self::handler(&directive.0, Direction::Export) +// } + +// /// Obtain an [ImportExportHandler] for the given [ImportExportDirective], and return +// /// an error if the given attributes are not suitable for the chosen format. +// fn handler( +// directive: &ImportExportDirective, +// direction: Direction, +// ) -> Result, ImportExportError> { +// match directive.format { +// FileFormat::CSV => DsvHandler::try_new_csv(&directive.attributes, direction), +// FileFormat::DSV => DsvHandler::try_new_dsv(&directive.attributes, direction), +// FileFormat::TSV => DsvHandler::try_new_tsv(&directive.attributes, direction), +// FileFormat::JSON => { +// if direction == Direction::Export { +// Err(ImportExportError::UnsupportedWrite(FileFormat::JSON)) +// } else { +// JsonHandler::try_new_import(&directive.attributes) +// } +// } +// FileFormat::RDF(variant) => { +// RdfHandler::try_new(variant, &directive.attributes, direction) +// } +// } +// } + +// /// Check if all given attributes are among the valid attributes, +// /// and return an error otherwise. +// pub(super) fn check_attributes( +// attributes: &MapDataValue, +// valid_attributes: &[&str], +// ) -> Result<(), ImportExportError> { +// let given: HashSet = attributes +// .map_keys() +// .expect("map values always have keys") +// .cloned() +// .collect(); +// let valid: HashSet = valid_attributes +// .iter() +// .map(|att| AnyDataValue::new_iri(att.to_string())) +// .collect(); + +// if let Some(unknown) = given.difference(&valid).next() { +// return Err(ImportExportError::UnknownAttribute(unknown.to_string())); +// } +// Ok(()) +// } + +// /// Extract the resource from the given attributes. This can be [ImportExportResource::Unspecified] +// /// for export (where we can use default names). If the value is invalid or missing for import, an +// /// error is returned. +// pub(super) fn extract_resource( +// attributes: &MapDataValue, +// direction: Direction, +// ) -> Result { +// let resource: Option = +// Self::extract_string_or_iri(attributes, PARAMETER_NAME_RESOURCE, true)?; + +// if let Some(string) = resource { +// if string.is_empty() { +// Ok(ImportExportResource::Stdout) +// } else { +// Ok(ImportExportResource::Resource(string)) +// } +// } else { +// if direction == Direction::Import { +// return Err(ImportExportError::MissingAttribute( +// PARAMETER_NAME_RESOURCE.to_string(), +// )); +// } +// Ok(ImportExportResource::Unspecified) +// } +// } + +// /// Extract the compression format from the given attributes, and possibly resource. +// /// If a resource is given, then the resource name without the compression-specific +// /// extension is also returned. +// /// +// /// An error is returned if an unknown compression format was explicitly specified, +// /// or if the compression format of the resource is not in agreement with an explicitly +// /// stated one. +// pub(super) fn extract_compression_format( +// attributes: &MapDataValue, +// resource: &ImportExportResource, +// ) -> Result<(Option, Option), ImportExportError> { +// let cf_name = Self::extract_string_or_iri(attributes, PARAMETER_NAME_COMPRESSION, true) +// .expect("no errors with allow missing"); + +// let stated_compression_format: Option; +// if let Some(cf_name) = &cf_name { +// match cf_name.as_str() { +// VALUE_COMPRESSION_NONE => stated_compression_format = Some(CompressionFormat::None), +// VALUE_COMPRESSION_GZIP => stated_compression_format = Some(CompressionFormat::Gzip), +// _ => { +// return Err(ImportExportError::invalid_att_value_error( +// PARAMETER_NAME_COMPRESSION, +// AnyDataValue::new_plain_string(cf_name.to_owned()), +// format!( +// "unknown compression format, supported formats: {:?}", +// [VALUE_COMPRESSION_GZIP, VALUE_COMPRESSION_NONE] +// ) +// .as_str(), +// )); +// } +// } +// } else { +// stated_compression_format = None; +// } + +// let resource_compression_format: Option; +// let inner_resource: Option; +// if let ImportExportResource::Resource(res) = resource { +// let (rcf, inner_res) = CompressionFormat::from_resource(res); +// resource_compression_format = Some(rcf); +// inner_resource = Some(inner_res); +// } else { +// resource_compression_format = None; +// inner_resource = None; +// } + +// match (stated_compression_format, resource_compression_format) { +// (Some(scf), None) => Ok((Some(scf), inner_resource)), +// (None, Some(rcf)) => Ok((Some(rcf), inner_resource)), +// (Some(scf), Some(rcf)) => { +// if scf == rcf { +// Ok((Some(scf), inner_resource)) +// } else { +// Err(ImportExportError::invalid_att_value_error( +// PARAMETER_NAME_COMPRESSION, +// AnyDataValue::new_plain_string( +// cf_name.expect("given if stated compression is known"), +// ), +// "compression method should match resource extension" +// .to_string() +// .as_str(), +// )) +// } +// } +// (None, None) => Ok((None, inner_resource)), +// } +// } + +// /// Extract a string value for the given attribute name. Returns an error if the +// /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). +// /// It can be specified whether it should be allowed that the atttribute is not set at all (and +// /// `None` would then be returned). If given, the value must always be a string, however. +// pub(super) fn extract_string( +// attributes: &MapDataValue, +// attribute_name: &str, +// allow_missing: bool, +// ) -> Result, ImportExportError> { +// if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { +// match c.value_domain() { +// ValueDomain::PlainString => Ok(Some(c.to_plain_string_unchecked())), +// _ => Err(ImportExportError::invalid_att_value_error( +// attribute_name, +// c.clone(), +// "expecting string value", +// )), +// } +// } else { +// Ok(None) +// } +// } + +// /// Extract a string or IRI value for the given attribute name. Returns an error if the +// /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). +// /// It can be specified whether it should be allowed that the atttribute is not set at all (and +// /// `None` would then be returned). If given, the value must always be a string or an IRI, however. +// /// This method is used for parameters that are actually strings in nature, but where we conveniently want to +// /// allow the user to omit the quotes. +// pub(super) fn extract_string_or_iri( +// attributes: &MapDataValue, +// attribute_name: &str, +// allow_missing: bool, +// ) -> Result, ImportExportError> { +// if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { +// if let Some(s) = Self::string_from_datavalue(&c) { +// Ok(Some(s)) +// } else { +// Err(ImportExportError::invalid_att_value_error( +// attribute_name, +// c.clone(), +// "expecting string or IRI value", +// )) +// } +// } else { +// Ok(None) +// } +// } + +// /// Extract an unsigned integer value for the given attribute name. Returns an error if the +// /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). +// /// It can be specified whether it should be allowed that the attribute is not set at all (and +// /// `None` would then be returned). If given, the value must always be an integer, however. +// pub(super) fn extract_unsigned_integer( +// attributes: &MapDataValue, +// attribute_name: &str, +// allow_missing: bool, +// ) -> Result, ImportExportError> { +// if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { +// if c.fits_into_u64() { +// Ok(Some(c.to_u64_unchecked())) +// } else { +// Err(ImportExportError::invalid_att_value_error( +// attribute_name, +// c.clone(), +// "expecting unsigned integer value that fits into 64bits", +// )) +// } +// } else { +// Ok(None) +// } +// } + +// /// Extract an IRI value string for the given attribute name. Returns an error if the +// /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). +// /// It can be specified whether it should be allowed that the atttribute is not set at all (and +// /// `None` would then be returned). If given, the value must always be an IRI, however. +// pub(super) fn extract_iri( +// attributes: &MapDataValue, +// attribute_name: &str, +// allow_missing: bool, +// ) -> Result, ImportExportError> { +// if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { +// match c.value_domain() { +// ValueDomain::Iri => Ok(Some(c.to_iri_unchecked())), +// _ => Err(ImportExportError::invalid_att_value_error( +// attribute_name, +// c.clone(), +// "expecting IRI value", +// )), +// } +// } else { +// Ok(None) +// } +// } + +// /// Extract a value for the given attribute name. The boolean flag constrols +// /// if an error should be generated if the attribute is missing, or if `Ok(None)` +// /// should be returned in this case. +// pub(super) fn extract_att_value( +// attributes: &MapDataValue, +// attribute_name: &str, +// allow_missing: bool, +// ) -> Result, ImportExportError> { +// if let Some(c) = attributes.map_element(&AnyDataValue::new_iri(attribute_name.to_string())) +// { +// Ok(Some(c.clone())) +// } else if allow_missing { +// return Ok(None); +// } else { +// return Err(ImportExportError::MissingAttribute( +// attribute_name.to_string(), +// )); +// } +// } + +// /// Extract the list of strings that specify value formats. If no list is given, `None` +// /// is returned. Errors may occur if the attribute is given but the value is not a list of strings, +// /// or if all values are skipped. +// /// +// /// See [ImportExportHandlers::extract_value_format_strings_and_arity] for a method that also +// /// checks the arity information, and uses it to make default formats if needed. +// pub(super) fn extract_value_format_strings( +// attributes: &MapDataValue, +// ) -> Result>, ImportExportError> { +// let value_format_strings: Option>; +// if let Some(c) = Self::extract_att_value(attributes, PARAMETER_NAME_FORMAT, true)? { +// let mut value_formats: Vec = Vec::new(); +// if c.value_domain() == ValueDomain::Tuple { +// for i in 0..c.len_unchecked() { +// let v = c.tuple_element_unchecked(i); +// if let Some(s) = Self::string_from_datavalue(v) { +// value_formats.push(s); +// } else { +// return Err(ImportExportError::invalid_att_value_error( +// PARAMETER_NAME_FORMAT, +// v.clone(), +// "list must contain strings only", +// )); +// } +// } +// } else { +// return Err(ImportExportError::invalid_att_value_error( +// PARAMETER_NAME_FORMAT, +// c.clone(), +// "expecting list of value formats", +// )); +// } +// value_format_strings = Some(value_formats); +// } else { +// value_format_strings = None; +// } + +// // Check if any non-skipped value is contained +// if let Some(true) = value_format_strings +// .as_ref() +// .map(|v| v.iter().all(|fmt| *fmt == VALUE_FORMAT_SKIP)) +// { +// return Err(ImportExportError::invalid_att_value_error( +// PARAMETER_NAME_FORMAT, +// Self::datavalue_from_format_strings(&value_format_strings.expect("checked above")), +// "cannot import/export zero-ary data", +// )); +// } + +// Ok(value_format_strings) +// } + +// /// Returns a list of string names of value formats that can be used as a +// /// default if only the arity of a predicate is known. +// pub(super) fn default_value_format_strings(arity: usize) -> Vec { +// vec![VALUE_FORMAT_ANY; arity] +// .into_iter() +// .map(|s| s.to_string()) +// .collect() +// } + +// /// Get a list of value format strings while taking the expected arity of data in +// /// the file into account. +// /// +// /// Formats will first be extracted from the attributes. For import, the total number +// /// of formats must match the expected file arity. For export, the total number of +// /// non-skip formats must match the expected file arity. +// /// +// /// If no formats are given, we assume that "skip" is not used, so file arity = +// /// predicate arity = format number, and we can make a list of default value formats. +// /// `None` is only returned if the file arity was not given (in which case this function +// /// is the same as [ImportExportHandlers::extract_value_format_strings]). +// /// +// /// The given `file_arity` is not checked: callers are expected to have ensured that it +// /// is a non-zero usize that fits into i64. +// pub(super) fn extract_value_format_strings_with_file_arity( +// attributes: &MapDataValue, +// file_arity: Option, +// direction: Direction, +// ) -> Result>, ImportExportError> { +// let value_format_strings: Option> = +// Self::extract_value_format_strings(attributes)?; + +// if let Some(file_arity) = file_arity { +// if let Some(ref vfs) = value_format_strings { +// let declared_file_arity = match direction { +// Direction::Import => vfs.len(), +// Direction::Export => vfs.iter().fold(0, |acc: usize, fmt| { +// // Only count formats other than VALUE_FORMAT_SKIP: +// if *fmt == VALUE_FORMAT_SKIP { +// acc +// } else { +// acc + 1 +// } +// }), +// }; + +// // Check if arity is consistent with given value formats. +// if file_arity != declared_file_arity { +// return Err(ImportExportError::invalid_att_value_error( +// PARAMETER_NAME_FORMAT, +// Self::datavalue_from_format_strings(vfs), +// format!( +// "value format declaration must be compatible with expected arity {} of tuples in file", +// file_arity +// ) +// .as_str(), +// )); +// } + +// Ok(value_format_strings) +// } else { +// Ok(Some(Self::default_value_format_strings(file_arity))) +// } +// } else { +// Ok(value_format_strings) +// } +// } + +// /// Turn a list of formats into a data value for error reporting. +// fn datavalue_from_format_strings(format_strings: &[String]) -> AnyDataValue { +// TupleDataValue::from_iter( +// format_strings +// .iter() +// .map(|format| AnyDataValue::new_plain_string(format.to_owned())) +// .collect::>(), +// ) +// .into() +// } + +// /// Extract a string from an [AnyDataValue] that is a plain string +// /// or IRI. This is in particularly used to allow users to omit the quotes +// /// around simple attribute values. +// fn string_from_datavalue(v: &AnyDataValue) -> Option { +// match v.value_domain() { +// ValueDomain::PlainString => Some(v.to_plain_string_unchecked()), +// ValueDomain::Iri => Some(v.to_iri_unchecked()), +// _ => None, +// } +// } +// } + +// /// Errors related to the creation and usage of [ImportExportHandler]s. +// #[derive(Debug, Error)] +// pub enum ImportExportError { +// /// Format is not supported for reading. +// #[error(r#"Format "{0}" cannot be read"#)] +// UnsupportedRead(FileFormat), +// /// Format is not supported for writing. +// #[error(r#"Format "{0}" cannot be written"#)] +// UnsupportedWrite(FileFormat), +// /// A required attribute is missing. +// #[error(r#"Missing required attribute "{0}""#)] +// MissingAttribute(String), +// /// A given attribute is not valid for the format. +// #[error(r#"Unknown attribute "{0}""#)] +// UnknownAttribute(String), +// /// File format name is not known. +// #[error(r#"Unknown file format "{0}""#)] +// UnknownFileFormat(String), +// /// Attribute value is invalid. +// #[error(r#"Invalid attribute value "{value}" for attribute "{attribute}": {description}"#)] +// InvalidAttributeValue { +// /// The given value. +// value: AnyDataValue, +// /// The attribute the value was given for. +// attribute: AnyDataValue, +// /// A description of why the value was invalid. +// description: String, +// }, +// /// Value format is unsupported for this format. +// #[error(r#"Unsupported value format "{value_format}" for format {format}"#)] +// InvalidValueFormat { +// /// The given value format. +// value_format: String, +// /// The file format. +// format: FileFormat, +// }, +// /// Arity is unsupported for this format. +// #[error(r#"import produces tuples of arity {arity}, but it should be arity {expected}"#)] +// InvalidArity { +// /// The given arity. +// arity: usize, +// /// The expected arity. +// expected: usize, +// }, +// /// Arity is unsupported for this format, exact value is required. +// #[error(r#"unsupported arity "{arity}" for format {format}, must be {required}"#)] +// InvalidArityExact { +// /// The given arity. +// arity: usize, +// /// The required arity. +// required: usize, +// /// The file format. +// format: FileFormat, +// }, +// /// Format does not support complex types +// #[error(r"Format {format} does not support complex types")] +// UnsupportedComplexTypes { +// /// The file format. +// format: FileFormat, +// }, +// /// File could not be read +// #[error(r#"File "{path}" could not be read."#)] +// IoError { +// /// Contains the wrapped error +// error: std::io::Error, +// /// Path that could not be read +// path: PathBuf, +// }, +// } + +// impl ImportExportError { +// /// Convenience method to create ImportExportError::InvalidAttributeValue from static strings, which is a common +// /// task in handlers. +// pub(crate) fn invalid_att_value_error( +// attribute: &str, +// value: AnyDataValue, +// reason: &str, +// ) -> ImportExportError { +// ImportExportError::InvalidAttributeValue { +// attribute: AnyDataValue::new_iri(attribute.to_string()), +// value: value.clone(), +// description: reason.to_string(), +// } +// } +// } diff --git a/nemo/src/io/formats/json.rs b/nemo/src/io/formats/json.rs index dcea6a669..3693e5f1e 100644 --- a/nemo/src/io/formats/json.rs +++ b/nemo/src/io/formats/json.rs @@ -1,68 +1,67 @@ //! Handler for resources of type JSON (java script object notation). +pub(crate) mod reader; + use std::io::BufRead; -use nemo_physical::{datasources::table_providers::TableProvider, datavalues::MapDataValue}; +use nemo_physical::datasources::table_providers::TableProvider; +use reader::JsonReader; -use super::{ - import_export::{ - ImportExportError, ImportExportHandler, ImportExportHandlers, ImportExportResource, - }, - json_reader::JsonReader, - types::Direction, +use crate::rule_model::components::import_export::{ + compression::CompressionFormat, file_formats::FileFormat, }; +use super::{ImportExportHandler, ImportExportResource, TableWriter}; + #[derive(Debug, Clone)] pub(crate) struct JsonHandler { resource: ImportExportResource, } impl JsonHandler { - pub(crate) fn try_new_import( - attributes: &MapDataValue, - ) -> Result, ImportExportError> { - // todo: check attributes - let resource = ImportExportHandlers::extract_resource(attributes, Direction::Import)?; - - Ok(Box::new(JsonHandler { resource })) + pub fn new(resource: ImportExportResource) -> Self { + Self { resource } } + + // pub(crate) fn try_new_import( + // attributes: &MapDataValue, + // ) -> Result, ImportExportError> { + // // todo: check attributes + // let resource = ImportExportHandler::extract_resource(attributes, Direction::Import)?; + + // Ok(Box::new(JsonHandler { resource })) + // } } impl ImportExportHandler for JsonHandler { - fn file_format(&self) -> crate::model::FileFormat { - crate::model::FileFormat::JSON + fn file_format(&self) -> FileFormat { + FileFormat::JSON } fn reader( &self, read: Box, - arity: usize, ) -> Result, crate::error::Error> { - if arity != 3 { - return Err(ImportExportError::InvalidArity { arity, expected: 3 }.into()); - } - Ok(Box::new(JsonReader::new(read))) } fn writer( &self, _writer: Box, - _arity: usize, - ) -> Result, crate::error::Error> { + ) -> Result, crate::error::Error> { unimplemented!("writing json is currently not supported") } - fn predicate_arity(&self) -> Option { - Some(3) + fn predicate_arity(&self) -> usize { + 3 } - fn file_extension(&self) -> Option { - Some("json".into()) + fn file_extension(&self) -> String { + self.file_format().extension().to_string() } - fn compression_format(&self) -> Option { - None + fn compression_format(&self) -> CompressionFormat { + CompressionFormat::None } fn import_export_resource(&self) -> &ImportExportResource { diff --git a/nemo/src/io/formats/json_reader.rs b/nemo/src/io/formats/json/reader.rs similarity index 100% rename from nemo/src/io/formats/json_reader.rs rename to nemo/src/io/formats/json/reader.rs diff --git a/nemo/src/io/formats/rdf.rs b/nemo/src/io/formats/rdf.rs index de518d886..c8f5c063c 100644 --- a/nemo/src/io/formats/rdf.rs +++ b/nemo/src/io/formats/rdf.rs @@ -1,88 +1,53 @@ //! Handler for resources of type RDF (Rsource Description Format). + +pub mod error; +pub(crate) mod reader; +pub(crate) mod value_format; +pub(crate) mod writer; + use std::io::{BufRead, Write}; -use nemo_physical::{ - datasources::table_providers::TableProvider, - datavalues::{AnyDataValue, DataValueCreationError, MapDataValue}, - resource::Resource, -}; +use enum_assoc::Assoc; +use nemo_physical::datasources::table_providers::TableProvider; use oxiri::Iri; +use reader::RdfReader; +use value_format::RdfValueFormats; +use writer::RdfWriter; use crate::{ error::Error, - io::{ - compression_format::CompressionFormat, - formats::types::{Direction, TableWriter}, - }, - model::{ - FileFormat, RdfVariant, PARAMETER_NAME_BASE, PARAMETER_NAME_COMPRESSION, - PARAMETER_NAME_FORMAT, PARAMETER_NAME_LIMIT, PARAMETER_NAME_RESOURCE, VALUE_FORMAT_ANY, - VALUE_FORMAT_SKIP, - }, -}; - -use super::{ - import_export::{ - ImportExportError, ImportExportHandler, ImportExportHandlers, ImportExportResource, + rule_model::components::import_export::{ + compression::CompressionFormat, file_formats::FileFormat, }, - rdf_reader::RdfReader, - rdf_writer::RdfWriter, }; -use thiserror::Error; +use super::{Direction, ImportExportHandler, ImportExportResource, TableWriter}; -/// Errors that can occur when reading/writing RDF resources and converting them -/// to/from [AnyDataValue]s. -#[allow(variant_size_differences)] -#[derive(Error, Debug)] -pub enum RdfFormatError { - /// A problem occurred in converting an RDF term to a data value. - #[error(transparent)] - DataValueConversion(#[from] DataValueCreationError), - /// Error of encountering unsupported value in subject position - #[error("values used as subjects of RDF triples must be IRIs or nulls")] - RdfInvalidSubject, - /// Error of encountering RDF* features in data - #[error("RDF* terms are not supported")] - RdfStarUnsupported, - /// Error in Rio's Turtle parser - #[error(transparent)] - RioTurtle(#[from] rio_turtle::TurtleError), - /// Error in Rio's RDF/XML parser - #[error(transparent)] - RioXml(#[from] rio_xml::RdfXmlError), - /// Unable to determine RDF format. - #[error("could not determine which RDF parser to use for resource {0}")] - UnknownRdfFormat(Resource), +/// The different supported variants of the RDF format. +#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] +#[func(pub fn file_format(&self) -> FileFormat)] +pub enum RdfVariant { + /// RDF 1.1 N-Triples + #[assoc(file_format = FileFormat::NTriples)] + NTriples, + /// RDF 1.1 N-Quads + #[assoc(file_format = FileFormat::NQuads)] + NQuads, + /// RDF 1.1 Turtle + #[assoc(file_format = FileFormat::Turtle)] + Turtle, + /// RDF 1.1 RDF/XML + #[assoc(file_format = FileFormat::RDFXML)] + RDFXML, + /// RDF 1.1 TriG + #[assoc(file_format = FileFormat::TriG)] + TriG, } -/// Enum for the value formats that are supported for RDF. In many cases, -/// RDF defines how formatting should be done, so there is not much to select here. -/// -/// Note that, irrespective of the format, RDF restricts the terms that are -/// allowed in subject, predicate, and graph name positions, and only such terms -/// will be handled there (others are dropped silently). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(super) enum RdfValueFormat { - /// General format that accepts any RDF term. - Anything, - /// Special format to indicate that the value should be skipped as if the whole - /// column where not there. - Skip, -} -impl RdfValueFormat { - /// Try to convert a string name for a value format to one of the supported - /// RDF value formats, or return an error for unsupported formats. - pub(super) fn from_string(name: &str) -> Result { - match name { - VALUE_FORMAT_ANY => Ok(RdfValueFormat::Anything), - VALUE_FORMAT_SKIP => Ok(RdfValueFormat::Skip), - _ => Err(ImportExportError::InvalidValueFormat { - value_format: name.to_string(), - format: FileFormat::RDF(RdfVariant::Unspecified), - }), - } +impl std::fmt::Display for RdfVariant { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.file_format().fmt(f) } } @@ -99,217 +64,211 @@ pub struct RdfHandler { /// The specific RDF format to be used. variant: RdfVariant, /// The list of value formats to be used for importing/exporting data. - /// Since the arity is known for - value_formats: Option>, + value_formats: RdfValueFormats, /// Maximum number of statements that should be imported/exported. limit: Option, - /// Compression format to be used, if specified. This can also be inferred - /// from the resource, if given. So the only case where `None` is possible - /// is when no resource is given (during output). - compression_format: Option, + /// Compression format to be used + compression_format: CompressionFormat, /// Direction of the operation. direction: Direction, } impl RdfHandler { - /// Construct an RDF handler of the given variant. - pub(crate) fn try_new( + /// Create a new [RdfHandler]. + pub fn new( + resource: ImportExportResource, + base: Option>, variant: RdfVariant, - attributes: &MapDataValue, + value_formats: RdfValueFormats, + limit: Option, + compression_format: CompressionFormat, direction: Direction, - ) -> Result, ImportExportError> { - // Basic checks for unsupported attributes: - ImportExportHandlers::check_attributes( - attributes, - &[ - PARAMETER_NAME_RESOURCE, - PARAMETER_NAME_BASE, - PARAMETER_NAME_COMPRESSION, - PARAMETER_NAME_FORMAT, - PARAMETER_NAME_LIMIT, - ], - )?; - - let resource = ImportExportHandlers::extract_resource(attributes, direction)?; - - let base: Option>; - if let Some(base_string) = - ImportExportHandlers::extract_iri(attributes, PARAMETER_NAME_BASE, true)? - { - if let Ok(b) = Iri::parse(base_string.clone()) { - // TODO: Export should not accept base as parameter, since we cannot use it - base = Some(b); - } else { - return Err(ImportExportError::invalid_att_value_error( - PARAMETER_NAME_BASE, - AnyDataValue::new_iri(base_string.clone()), - "must be a valid IRI", - )); - } - } else { - base = None; - } - - let (compression_format, inner_resource) = - ImportExportHandlers::extract_compression_format(attributes, &resource)?; - - let refined_variant: RdfVariant; - if variant == RdfVariant::Unspecified { - if let Some(ref res) = inner_resource { - refined_variant = Self::rdf_variant_from_resource(res); - } else { - // We can still guess a default format based on the arity - // information provided on import/export: - refined_variant = RdfVariant::Unspecified; - } - } else { - refined_variant = variant; - } - - let value_formats = Self::extract_value_formats(attributes, refined_variant, direction)?; - let limit = - ImportExportHandlers::extract_unsigned_integer(attributes, PARAMETER_NAME_LIMIT, true)?; - - Ok(Box::new(Self { + ) -> Self { + Self { resource, base, - variant: refined_variant, + variant, value_formats, limit, compression_format, direction, - })) + } } - fn extract_value_formats( - attributes: &MapDataValue, - variant: RdfVariant, - direction: Direction, - ) -> Result>, ImportExportError> { - // Input arity for known formats: - let arity = match variant { - RdfVariant::Unspecified => None, - RdfVariant::NTriples | RdfVariant::Turtle | RdfVariant::RDFXML => Some(3), - RdfVariant::NQuads | RdfVariant::TriG => Some(4), - }; + // /// Construct an RDF handler of the given variant. + // pub(crate) fn try_new( + // variant: RdfVariant, + // attributes: &MapDataValue, + // direction: Direction, + // ) -> Result, ImportExportError> { + // // Basic checks for unsupported attributes: + // ImportExportHandlers::check_attributes( + // attributes, + // &[ + // PARAMETER_NAME_RESOURCE, + // PARAMETER_NAME_BASE, + // PARAMETER_NAME_COMPRESSION, + // PARAMETER_NAME_FORMAT, + // PARAMETER_NAME_LIMIT, + // ], + // )?; - let value_format_strings = - ImportExportHandlers::extract_value_format_strings_with_file_arity( - attributes, arity, direction, - )?; + // let resource = ImportExportHandlers::extract_resource(attributes, direction)?; - if let Some(format_strings) = value_format_strings { - Ok(Some(Self::formats_from_strings(format_strings)?)) - } else { - Ok(None) - } - } + // let base: Option>; + // if let Some(base_string) = + // ImportExportHandlers::extract_iri(attributes, PARAMETER_NAME_BASE, true)? + // { + // if let Ok(b) = Iri::parse(base_string.clone()) { + // // TODO: Export should not accept base as parameter, since we cannot use it + // base = Some(b); + // } else { + // return Err(ImportExportError::invalid_att_value_error( + // PARAMETER_NAME_BASE, + // AnyDataValue::new_iri(base_string.clone()), + // "must be a valid IRI", + // )); + // } + // } else { + // base = None; + // } - fn formats_from_strings( - value_format_strings: Vec, - ) -> Result, ImportExportError> { - let mut value_formats = Vec::with_capacity(value_format_strings.len()); - for s in value_format_strings { - value_formats.push(RdfValueFormat::from_string(s.as_str())?); - } - Ok(value_formats) - } + // let (compression_format, inner_resource) = + // ImportExportHandlers::extract_compression_format(attributes, &resource)?; - /// Extract [RdfVariant] from file extension. The resource should already - /// have been stripped of any compression-related extensions. - fn rdf_variant_from_resource(resource: &Resource) -> RdfVariant { - match resource { - resource if resource.ends_with(".ttl") => RdfVariant::Turtle, - resource if resource.ends_with(".rdf") => RdfVariant::RDFXML, - resource if resource.ends_with(".nt") => RdfVariant::NTriples, - resource if resource.ends_with(".nq") => RdfVariant::NQuads, - resource if resource.ends_with(".trig") => RdfVariant::TriG, - _ => RdfVariant::Unspecified, - } - } + // let refined_variant: RdfVariant; + // if variant == RdfVariant::Unspecified { + // if let Some(ref res) = inner_resource { + // refined_variant = Self::rdf_variant_from_resource(res); + // } else { + // // We can still guess a default format based on the arity + // // information provided on import/export: + // refined_variant = RdfVariant::Unspecified; + // } + // } else { + // refined_variant = variant; + // } - /// Returns the set RDF variant, or finds a default value based on the - /// required arity. An error occurs if the arity is not compatible with - /// any variant of RDF. - fn rdf_variant_or_default(&self, arity: usize) -> Result { - if self.variant == RdfVariant::Unspecified { - match arity { - 3 => Ok(RdfVariant::NTriples), - 4 => Ok(RdfVariant::NQuads), - _ => Err(ImportExportError::InvalidArity { arity, expected: 3 }), - } - } else { - Ok(self.variant) - } - } + // let value_formats = Self::extract_value_formats(attributes, refined_variant, direction)?; + // let limit = + // ImportExportHandlers::extract_unsigned_integer(attributes, PARAMETER_NAME_LIMIT, true)?; - /// Returns the set value formats, or finds a default value based on the - /// required arity. - fn value_formats_or_default(&self, arity: usize) -> Vec { - self.value_formats.clone().unwrap_or_else(|| { - Self::formats_from_strings(ImportExportHandlers::default_value_format_strings(arity)) - .unwrap() - }) - } + // Ok(Box::new(Self { + // resource, + // base, + // variant: refined_variant, + // value_formats, + // limit, + // compression_format, + // direction, + // })) + // } + + // fn extract_value_formats( + // attributes: &MapDataValue, + // variant: RdfVariant, + // direction: Direction, + // ) -> Result>, ImportExportError> { + // // Input arity for known formats: + // let arity = match variant { + // RdfVariant::Unspecified => None, + // RdfVariant::NTriples | RdfVariant::Turtle | RdfVariant::RDFXML => Some(3), + // RdfVariant::NQuads | RdfVariant::TriG => Some(4), + // }; + + // let value_format_strings = + // ImportExportHandlers::extract_value_format_strings_with_file_arity( + // attributes, arity, direction, + // )?; + + // if let Some(format_strings) = value_format_strings { + // Ok(Some(Self::formats_from_strings(format_strings)?)) + // } else { + // Ok(None) + // } + // } + + // fn formats_from_strings( + // value_format_strings: Vec, + // ) -> Result, ImportExportError> { + // let mut value_formats = Vec::with_capacity(value_format_strings.len()); + // for s in value_format_strings { + // value_formats.push(RdfValueFormat::from_string(s.as_str())?); + // } + // Ok(value_formats) + // } + + // /// Extract [RdfVariant] from file extension. The resource should already + // /// have been stripped of any compression-related extensions. + // fn rdf_variant_from_resource(resource: &Resource) -> RdfVariant { + // match resource { + // resource if resource.ends_with(".ttl") => RdfVariant::Turtle, + // resource if resource.ends_with(".rdf") => RdfVariant::RDFXML, + // resource if resource.ends_with(".nt") => RdfVariant::NTriples, + // resource if resource.ends_with(".nq") => RdfVariant::NQuads, + // resource if resource.ends_with(".trig") => RdfVariant::TriG, + // _ => RdfVariant::Unspecified, + // } + // } + + // /// Returns the set RDF variant, or finds a default value based on the + // /// required arity. An error occurs if the arity is not compatible with + // /// any variant of RDF. + // fn rdf_variant_or_default(&self, arity: usize) -> Result { + // if self.variant == RdfVariant::Unspecified { + // match arity { + // 3 => Ok(RdfVariant::NTriples), + // 4 => Ok(RdfVariant::NQuads), + // _ => Err(ImportExportError::InvalidArity { arity, expected: 3 }), + // } + // } else { + // Ok(self.variant) + // } + // } + + // /// Returns the set value formats, or finds a default value based on the + // /// required arity. + // fn value_formats_or_default(&self, arity: usize) -> Vec { + // self.value_formats.clone().unwrap_or_else(|| { + // Self::formats_from_strings(ImportExportHandlers::default_value_format_strings(arity)) + // .unwrap() + // }) + // } } impl ImportExportHandler for RdfHandler { fn file_format(&self) -> FileFormat { - FileFormat::RDF(self.variant) + self.variant.file_format() } - fn reader( - &self, - read: Box, - arity: usize, - ) -> Result, Error> { + fn reader(&self, read: Box) -> Result, Error> { Ok(Box::new(RdfReader::new( read, - self.rdf_variant_or_default(arity)?, + self.variant, self.base.clone(), - self.value_formats_or_default(arity), + self.value_formats.clone(), self.limit, ))) } - fn writer(&self, writer: Box, arity: usize) -> Result, Error> { + fn writer(&self, writer: Box) -> Result, Error> { Ok(Box::new(RdfWriter::new( writer, - self.rdf_variant_or_default(arity)?, - self.value_formats_or_default(arity), + self.variant, + self.value_formats.clone(), self.limit, ))) } - fn predicate_arity(&self) -> Option { - // Our extraction ensures that there is always a suitable default - // list of value formats if we know the RDF variant. - match self.direction { - Direction::Import => self.value_formats.as_ref().map(|vfs| { - vfs.iter().fold(0, |acc, fmt| { - if *fmt == RdfValueFormat::Skip { - acc - } else { - acc + 1 - } - }) - }), - Direction::Export => self.value_formats.as_ref().map(|vfs| vfs.len()), - } + fn predicate_arity(&self) -> usize { + self.value_formats.arity() } - fn file_extension(&self) -> Option { - match self.variant { - RdfVariant::Unspecified => None, - RdfVariant::NTriples => Some("nt".to_string()), - RdfVariant::NQuads => Some("nq".to_string()), - RdfVariant::Turtle => Some("ttl".to_string()), - RdfVariant::RDFXML => Some("rdf".to_string()), - RdfVariant::TriG => Some("trig".to_string()), - } + fn file_extension(&self) -> String { + self.file_format().extension().to_string() } - fn compression_format(&self) -> Option { + fn compression_format(&self) -> CompressionFormat { self.compression_format } diff --git a/nemo/src/io/formats/rdf/error.rs b/nemo/src/io/formats/rdf/error.rs new file mode 100644 index 000000000..775e4ddb2 --- /dev/null +++ b/nemo/src/io/formats/rdf/error.rs @@ -0,0 +1,30 @@ +//! Defines the + +use thiserror::Error; + +use nemo_physical::{datavalues::DataValueCreationError, resource::Resource}; + +/// Errors that can occur when reading/writing RDF resources and converting them +/// to/from [AnyDataValue]s. +#[allow(variant_size_differences)] +#[derive(Error, Debug)] +pub enum RdfFormatError { + /// A problem occurred in converting an RDF term to a data value. + #[error(transparent)] + DataValueConversion(#[from] DataValueCreationError), + /// Error of encountering unsupported value in subject position + #[error("values used as subjects of RDF triples must be IRIs or nulls")] + RdfInvalidSubject, + /// Error of encountering RDF* features in data + #[error("RDF* terms are not supported")] + RdfStarUnsupported, + /// Error in Rio's Turtle parser + #[error(transparent)] + RioTurtle(#[from] rio_turtle::TurtleError), + /// Error in Rio's RDF/XML parser + #[error(transparent)] + RioXml(#[from] rio_xml::RdfXmlError), + /// Unable to determine RDF format. + #[error("could not determine which RDF parser to use for resource {0}")] + UnknownRdfFormat(Resource), +} diff --git a/nemo/src/io/formats/rdf_reader.rs b/nemo/src/io/formats/rdf/reader.rs similarity index 94% rename from nemo/src/io/formats/rdf_reader.rs rename to nemo/src/io/formats/rdf/reader.rs index b9cfa9d8a..12e1bf208 100644 --- a/nemo/src/io/formats/rdf_reader.rs +++ b/nemo/src/io/formats/rdf/reader.rs @@ -1,5 +1,6 @@ //! Reader for various RDF formats, which supports triples files (N-Triples, Turtle, RDF/XML) and //! quads files (N-Quads, TriG). + use bytesize::ByteSize; use nemo_physical::{ datasources::{table_providers::TableProvider, tuple_writer::TupleWriter}, @@ -17,9 +18,13 @@ use rio_api::{ use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleParser}; use rio_xml::RdfXmlParser; -use crate::{io::formats::PROGRESS_NOTIFY_INCREMENT, model::RdfVariant}; +use crate::io::formats::PROGRESS_NOTIFY_INCREMENT; -use super::rdf::{RdfFormatError, RdfValueFormat}; +use super::{ + error::RdfFormatError, + value_format::{RdfValueFormat, RdfValueFormats}, + RdfVariant, +}; /// IRI to be used for the default graph used by Nemo when loading RDF data with /// named graphs (quads). @@ -30,10 +35,15 @@ const DEFAULT_GRAPH: &str = "tag:nemo:defaultgraph"; /// A [TableProvider] for RDF 1.1 files containing triples. pub(super) struct RdfReader { + /// Buffer from which content is read read: Box, + /// RDF format variant: RdfVariant, + /// Base url, if given base: Option>, - value_formats: Vec, + /// Possible [RdfValueFormat] considered when parsing + value_formats: RdfValueFormats, + /// If given, this reader will only consider the first `limit` entries limit: Option, /// Map to store how nulls relate to blank nodes. /// @@ -45,12 +55,12 @@ pub(super) struct RdfReader { } impl RdfReader { - /// Create a new [RDFReader] + /// Create a new [RdfReader]. pub(super) fn new( read: Box, variant: RdfVariant, base: Option>, - value_formats: Vec, + value_formats: RdfValueFormats, limit: Option, ) -> Self { Self { @@ -63,10 +73,13 @@ impl RdfReader { } } + /// Convert [NamedNode] to [AnyDataValue]. fn datavalue_from_named_node(value: NamedNode) -> AnyDataValue { AnyDataValue::new_iri(value.iri.to_string()) } + /// Create a [AnyDataValue] from a [BlankNode], + /// adding it to the given [NullMap] if new. fn datavalue_from_blank_node( bnode_map: &mut NullMap, tuple_writer: &mut TupleWriter, @@ -95,6 +108,10 @@ impl RdfReader { } } + /// Create a [AnyDataValue] for a given [Subject]. + /// + /// This function might create new nulls and enter + /// them into the given [NullMap]. fn datavalue_from_subject( bnode_map: &mut NullMap, tuple_writer: &mut TupleWriter, @@ -109,6 +126,10 @@ impl RdfReader { } } + /// Create a [AnyDataValue] for a given [Term]. + /// + /// This function might create new nulls and enter + /// them into the given [NullMap]. fn datavalue_from_term( bnode_map: &mut NullMap, tuple_writer: &mut TupleWriter, @@ -122,6 +143,10 @@ impl RdfReader { } } + /// Create a [AnyDataValue] for a given [GraphName]. + /// + /// This function might create new nulls and enter + /// them into the given [NullMap]. fn datavalue_from_graph_name( bnode_map: &mut NullMap, tuple_writer: &mut TupleWriter, @@ -325,9 +350,6 @@ impl TableProvider for RdfReader { RdfVariant::TriG => self.read_quads_with_parser(tuple_writer, |read| { TriGParser::new(read, base_iri.clone()) }), - RdfVariant::Unspecified => unreachable!( - "the reader should not be instantiated with unknown format by the handler" - ), } } } @@ -361,7 +383,7 @@ mod test { use rio_turtle::{NTriplesParser, TurtleParser}; use test_log::test; - use crate::{io::formats::rdf::RdfValueFormat, model::RdfVariant}; + use crate::io::formats::rdf::{value_format::RdfValueFormats, RdfVariant}; #[test] fn parse_triples_nt() { @@ -376,11 +398,7 @@ mod test { Box::new(data), RdfVariant::NTriples, None, - vec![ - RdfValueFormat::Anything, - RdfValueFormat::Anything, - RdfValueFormat::Anything, - ], + RdfValueFormats::default(3), None, ); let dict = RefCell::new(Dict::default()); @@ -401,11 +419,7 @@ mod test { Box::new(data), RdfVariant::Turtle, None, - vec![ - RdfValueFormat::Anything, - RdfValueFormat::Anything, - RdfValueFormat::Anything, - ], + RdfValueFormats::default(3), None, ); let dict = RefCell::new(Dict::default()); @@ -428,11 +442,7 @@ mod test { Box::new(data), RdfVariant::NTriples, None, - vec![ - RdfValueFormat::Anything, - RdfValueFormat::Anything, - RdfValueFormat::Anything, - ], + RdfValueFormats::default(3), None, ); let dict = RefCell::new(Dict::default()); diff --git a/nemo/src/io/formats/rdf/value_format.rs b/nemo/src/io/formats/rdf/value_format.rs new file mode 100644 index 000000000..ee4ccc769 --- /dev/null +++ b/nemo/src/io/formats/rdf/value_format.rs @@ -0,0 +1,94 @@ +//! This module defines the syntactic formats supported with values in Rdf files. +//! This includes reading (parsing) and writing (serialization) +//! for each supported format. + +use enum_assoc::Assoc; + +use crate::{ + rule_model::components::term::{primitive::Primitive, tuple::Tuple, Term}, + syntax::directive::value_formats, +}; + +/// Enum for the value formats that are supported for RDF. In many cases, +/// RDF defines how formatting should be done, so there is not much to select here. +/// +/// Note that, irrespective of the format, RDF restricts the terms that are +/// allowed in subject, predicate, and graph name positions, and only such terms +/// will be handled there (others are dropped silently). +#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] +#[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] +pub(super) enum RdfValueFormat { + /// General format that accepts any RDF term. + #[assoc(name = value_formats::ANY)] + #[assoc(from_name = value_formats::ANY)] + Anything, + /// Special format to indicate that the value should be skipped as if the whole + /// column where not there. + #[assoc(name = value_formats::SKIP)] + #[assoc(from_name = value_formats::SKIP)] + Skip, +} + +/// Indicate what value parser should be used for each column. +#[derive(Debug, Clone)] +pub(crate) struct RdfValueFormats(Vec); + +impl RdfValueFormats { + pub(crate) fn new(formats: Vec) -> Self { + Self(formats) + } + + /// Return a list of [RdfValueFormat]s with default entries. + pub(crate) fn default(arity: usize) -> Self { + Self((0..arity).map(|_| RdfValueFormat::Anything).collect()) + } + + /// Create a [DsvValueFormats] from a [Tuple]. + /// + /// Returns `None` if tuple contains an unknown value. + pub(crate) fn from_tuple(tuple: &Tuple) -> Option { + let mut result = Vec::new(); + + for value in tuple.arguments() { + if let Term::Primitive(Primitive::Ground(ground)) = value { + if let Some(format) = RdfValueFormat::from_name(&ground.to_string()) { + result.push(format); + continue; + } + } + + return None; + } + + Some(Self::new(result)) + } + + /// Return the length of the format tuple. + pub(crate) fn len(&self) -> usize { + self.0.len() + } + + /// Return whether the tuple is empty. + pub(crate) fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Return the arity (ignoring the skipped columns) + pub(crate) fn arity(&self) -> usize { + let mut arity = 0; + + for &format in &self.0 { + if format != RdfValueFormat::Skip { + arity += 1; + } + } + + arity + } + + /// Return an iterator over the [RdfValueFormat]s. + pub(crate) fn iter(&self) -> impl Iterator { + self.0.iter() + } +} diff --git a/nemo/src/io/formats/rdf_writer.rs b/nemo/src/io/formats/rdf/writer.rs similarity index 97% rename from nemo/src/io/formats/rdf_writer.rs rename to nemo/src/io/formats/rdf/writer.rs index 078c8671a..5935cbb52 100644 --- a/nemo/src/io/formats/rdf_writer.rs +++ b/nemo/src/io/formats/rdf/writer.rs @@ -9,8 +9,15 @@ use rio_turtle::{NQuadsFormatter, NTriplesFormatter, TriGFormatter, TurtleFormat use rio_xml::RdfXmlFormatter; use std::io::Write; -use super::{rdf::RdfValueFormat, types::TableWriter}; -use crate::{error::Error, io::formats::PROGRESS_NOTIFY_INCREMENT, model::RdfVariant}; +use crate::{ + error::Error, + io::formats::{TableWriter, PROGRESS_NOTIFY_INCREMENT}, +}; + +use super::{ + value_format::{RdfValueFormat, RdfValueFormats}, + RdfVariant, +}; /// Private struct to record the type of an RDF term that /// is to be created on demand. @@ -184,7 +191,7 @@ impl<'a> QuadBuffer { pub(super) struct RdfWriter { writer: Box, variant: RdfVariant, - value_formats: Vec, + value_formats: RdfValueFormats, limit: Option, } @@ -192,7 +199,7 @@ impl RdfWriter { pub(super) fn new( writer: Box, variant: RdfVariant, - value_formats: Vec, + value_formats: RdfValueFormats, limit: Option, ) -> Self { RdfWriter { @@ -377,9 +384,6 @@ impl TableWriter for RdfWriter { let _ = f.finish(); }, ), - RdfVariant::Unspecified => unreachable!( - "the writer should not be instantiated with unknown format by the handler" - ), } } } diff --git a/nemo/src/io/formats/types.rs b/nemo/src/io/formats/types.rs deleted file mode 100644 index 9625a0e08..000000000 --- a/nemo/src/io/formats/types.rs +++ /dev/null @@ -1,26 +0,0 @@ -//! Types related to input and output formats. - -use nemo_physical::datavalues::AnyDataValue; - -use crate::error::Error; - -/// Direction of import/export activities. -/// We often share code for the two directions, and a direction -/// is then used to enable smaller distinctions where needed. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub(crate) enum Direction { - /// Processing input. - Import, - /// Processing output. - Export, -} - -/// A trait for exporting table data, e.g., to some file. -// TODO Maybe this should be directly in io, since it is the interface to the OutputManager? -pub trait TableWriter { - /// Export a table. - fn export_table_data<'a>( - self: Box, - table: Box> + 'a>, - ) -> Result<(), Error>; -} diff --git a/nemo/src/io/import_manager.rs b/nemo/src/io/import_manager.rs index 957153cd6..4fd32b1b6 100644 --- a/nemo/src/io/import_manager.rs +++ b/nemo/src/io/import_manager.rs @@ -2,16 +2,9 @@ use nemo_physical::datasources::table_providers::TableProvider; -use crate::{ - error::Error, - io::resource_providers::ResourceProviders, - model::{ImportDirective, PARAMETER_NAME_FORMAT}, -}; +use crate::{error::Error, io::resource_providers::ResourceProviders}; -use super::{ - compression_format::CompressionFormat, - formats::import_export::{ImportExportError, ImportExportHandler, ImportExportHandlers}, -}; +use super::formats::ImportExportHandler; /// Manages everything related to resolving the inputs of a Nemo program. /// Currently, this is only the resource providers. @@ -27,51 +20,51 @@ impl ImportManager { Self { resource_providers } } - /// Validates the given [ImportDirective]. - /// - /// TODO: Currently, this only checks the coherence of the given settings, - /// without actually trying to access the resource. Some set-ups, such as WASM, - /// may actually want to validate without such a check, but this can be done - /// via `resource()`. - pub fn validate(&self, import_directive: &ImportDirective) -> Result<(), Error> { - ImportExportHandlers::import_handler(import_directive)?; - Ok(()) - } + // /// Validates the given [ImportDirective]. + // /// + // /// TODO: Currently, this only checks the coherence of the given settings, + // /// without actually trying to access the resource. Some set-ups, such as WASM, + // /// may actually want to validate without such a check, but this can be done + // /// via `resource()`. + // pub fn validate(&self, import_directive: &ImportDirective) -> Result<(), Error> { + // ImportExportHandlers::import_handler(import_directive)?; + // Ok(()) + // } - /// Returns the resource that data is to be imported from according - /// to this [ImportDirective]. - pub fn resource(import_directive: &ImportDirective) -> Result { - let handler = ImportExportHandlers::import_handler(import_directive)?; - if let Some(resource) = handler.resource() { - Ok(resource) - } else { - unreachable!("handler validation should make sure that all imports have a resource"); - } - } + // /// Returns the resource that data is to be imported from according + // /// to this [ImportDirective]. + // pub fn resource(import_directive: &ImportDirective) -> Result { + // let handler = ImportExportHandlers::import_handler(import_directive)?; + // if let Some(resource) = handler.resource() { + // Ok(resource) + // } else { + // unreachable!("handler validation should make sure that all imports have a resource"); + // } + // } - /// Constructs a [TableProvider] from the given [ImportDirective]. - /// The arity, if given, defines the expected arity of the data: it is validated if - /// the import directive is compatible with this assumption. - pub fn table_provider( - &self, - import_directive: &ImportDirective, - expected_arity: Option, - ) -> Result, Error> { - let handler = ImportExportHandlers::import_handler(import_directive)?; + // /// Constructs a [TableProvider] from the given [ImportDirective]. + // /// The arity, if given, defines the expected arity of the data: it is validated if + // /// the import directive is compatible with this assumption. + // pub fn table_provider( + // &self, + // import_directive: &ImportDirective, + // expected_arity: Option, + // ) -> Result, Error> { + // let handler = ImportExportHandlers::import_handler(import_directive)?; - let arity; - if let Some(expected_arity) = expected_arity { - arity = expected_arity; - } else if let Some(expected_arity) = handler.predicate_arity() { - arity = expected_arity; - } else { - // Note: this only occurs if imported data is not used in any arity-determining way, which should be rare. - return Err( - ImportExportError::MissingAttribute(PARAMETER_NAME_FORMAT.to_string()).into(), - ); - } - self.table_provider_from_handler(&*handler, arity) - } + // let arity; + // if let Some(expected_arity) = expected_arity { + // arity = expected_arity; + // } else if let Some(expected_arity) = handler.predicate_arity() { + // arity = expected_arity; + // } else { + // // Note: this only occurs if imported data is not used in any arity-determining way, which should be rare. + // return Err( + // ImportExportError::MissingAttribute(PARAMETER_NAME_FORMAT.to_string()).into(), + // ); + // } + // self.table_provider_from_handler(&*handler, arity) + // } /// Constructs a [TableProvider] from the given [ImportExportHandler]. /// The expeced arity can reflect additional knowledge of the caller (or might be taken @@ -80,24 +73,12 @@ impl ImportManager { pub(crate) fn table_provider_from_handler( &self, handler: &dyn ImportExportHandler, - expected_arity: usize, ) -> Result, Error> { - if let Some(import_arity) = handler.predicate_arity() { - if import_arity != expected_arity { - return Err(ImportExportError::InvalidArity { - arity: import_arity, - expected: expected_arity, - } - .into()); - } - } let reader = self.resource_providers.open_resource( &handler.resource().expect("checked when making handler"), - handler - .compression_format() - .unwrap_or(CompressionFormat::None), + handler.compression_format(), )?; - handler.reader(reader, expected_arity) + handler.reader(reader) } } diff --git a/nemo/src/io/lexer.rs b/nemo/src/io/lexer.rs deleted file mode 100644 index 648504c17..000000000 --- a/nemo/src/io/lexer.rs +++ /dev/null @@ -1,701 +0,0 @@ -//! Lexical tokenization of rulewerk-style rules. -// FIXME: temporary, because this module probably will get removed -#![allow(unused)] -#![allow(missing_docs)] - -use std::{cell::RefCell, ops::Range}; - -use super::parser::context; -use nom::{ - branch::alt, - bytes::complete::{is_not, tag, take, take_till}, - character::complete::{alpha1, alphanumeric1, digit1, line_ending, multispace0, multispace1}, - combinator::{cut, map, opt, recognize}, - error::ParseError, - multi::{many0, many1}, - sequence::{delimited, pair, tuple}, - IResult, -}; -use nom_locate::LocatedSpan; -use nom_supreme::{context::ContextError, error::GenericErrorTree}; -use tower_lsp::lsp_types::SymbolKind; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum Context { - Tag(&'static str), - Exponent, - Punctuations, - Operators, - Identifier, - Iri, - Number, - String, - Comment, - DocComment, - TlDocComment, - Comments, - Whitespace, - Illegal, - Program, - Fact, - Rule, - RuleHead, - RuleBody, - Directive, - DirectiveBase, - DirectivePrefix, - DirectiveImport, - DirectiveExport, - DirectiveOutput, - List, - HeadAtoms, - BodyAtoms, - PositiveAtom, - NegativeAtom, - InfixAtom, - Tuple, - NamedTuple, - Map, - Pair, - Term, - TermPrivimitive, - TermBinary, - TermAggregation, - TermTuple, - TermMap, - RdfLiteral, - PrefixedConstant, - Decimal, - Integer, - ArithmeticProduct, - ArithmeticFactor, - Blank, - UniversalVariable, - ExistentialVariable, -} -impl std::fmt::Display for Context { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Context::Tag(c) => write!(f, "{}", c), - Context::Exponent => write!(f, "exponent"), - Context::Punctuations => write!(f, "punctuations"), - Context::Operators => write!(f, "operators"), - Context::Identifier => write!(f, "identifier"), - Context::Iri => write!(f, "lex iri"), - Context::Number => write!(f, "lex number"), - Context::String => write!(f, "lex string"), - Context::Comment => write!(f, "lex comment"), - Context::DocComment => write!(f, "lex documentation comment"), - Context::TlDocComment => write!(f, "lex top level documentation comment"), - Context::Comments => write!(f, "comments"), - Context::Whitespace => write!(f, "lex whitespace"), - Context::Illegal => write!(f, "lex illegal character"), - Context::Program => write!(f, "program"), - Context::Fact => write!(f, "fact"), - Context::Rule => write!(f, "rule"), - Context::RuleHead => write!(f, "rule head"), - Context::RuleBody => write!(f, "rule body"), - Context::Directive => write!(f, "directive"), - Context::DirectiveBase => write!(f, "base directive"), - Context::DirectivePrefix => write!(f, "prefix directive"), - Context::DirectiveImport => write!(f, "import directive"), - Context::DirectiveExport => write!(f, "export directive"), - Context::DirectiveOutput => write!(f, "output directive"), - Context::List => write!(f, "list"), - Context::HeadAtoms => write!(f, "head atoms"), - Context::BodyAtoms => write!(f, "body atoms"), - Context::PositiveAtom => write!(f, "positive atom"), - Context::NegativeAtom => write!(f, "negative atom"), - Context::InfixAtom => write!(f, "infix atom"), - Context::Tuple => write!(f, "tuple"), - Context::NamedTuple => write!(f, "named tuple"), - Context::Map => write!(f, "map"), - Context::Pair => write!(f, "pair"), - Context::Term => write!(f, "term"), - Context::TermPrivimitive => write!(f, "primitive term"), - Context::TermBinary => write!(f, "binary term"), - Context::TermAggregation => write!(f, "aggreation term"), - Context::TermTuple => write!(f, "tuple term"), - Context::TermMap => write!(f, "map term"), - Context::RdfLiteral => write!(f, "rdf literal"), - Context::PrefixedConstant => write!(f, "prefixed constant"), - Context::Decimal => write!(f, "decimal"), - Context::Integer => write!(f, "integer"), - Context::ArithmeticProduct => write!(f, "arithmetic product"), - Context::ArithmeticFactor => write!(f, "arithmetic factor"), - Context::Blank => write!(f, "blank"), - Context::UniversalVariable => write!(f, "universal variable"), - Context::ExistentialVariable => write!(f, "existential variable"), - } - } -} - -pub(crate) type ErrorTree = - GenericErrorTree>; - -use super::parser::{ - ast::{AstNode, Position}, - types::{Input, ToRange}, -}; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Error { - pub pos: Position, - pub msg: String, - pub context: Vec, -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub(crate) struct ParserState<'a> { - pub(crate) errors: &'a RefCell>, -} -impl ParserState<'_> { - pub fn report_error(&self, error: Error) { - self.errors.borrow_mut().push(error); - } -} - -pub(crate) type Span<'a> = LocatedSpan<&'a str>; - -impl ToRange for Span<'_> { - fn to_range(&self) -> Range { - let start = self.location_offset(); - let end = start + self.fragment().len(); - start..end - } -} -impl AstNode for Span<'_> { - fn children(&self) -> Option> { - None - } - - fn span(&self) -> Span { - *self - } - - fn is_leaf(&self) -> bool { - true - } - - fn name(&self) -> String { - self.fragment().to_string() - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - // This was todo!() before but we don't want to panic here. - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - // This was todo!() before but we don't want to panic here. - None - } - - fn lsp_range_to_rename(&self) -> Option { - // This was todo!() before but we don't want to panic here. - None - } -} - -pub(crate) fn to_range(span: Span<'_>) -> Range { - let start = span.location_offset(); - let end = start + span.fragment().len(); - start..end -} - -/// All the tokens the input gets parsed into. -#[derive(Debug, PartialEq, Copy, Clone)] -pub(crate) enum TokenKind { - // Syntactic symbols: - /// '?' - QuestionMark, - /// '!' - ExclamationMark, - /// '(' - OpenParen, - /// ')' - CloseParen, - /// '[' - OpenBracket, - /// ']' - CloseBracket, - /// '{' - OpenBrace, - /// '}' - CloseBrace, - /// '.' - Dot, - /// ',' - Comma, - /// ':' - Colon, - /// `:-` - Arrow, - /// '>' - Greater, - /// `>=` - GreaterEqual, - /// '=' - Equal, - /// `<=` - LessEqual, - /// '<' - Less, - /// `!=` - Unequal, - /// '~' - Tilde, - /// '^' - Caret, - /// '#' - Hash, - /// '_' - Underscore, - /// '@' - At, - /// '+' - Plus, - /// '-' - Minus, - /// '*' - Star, - /// '/' - Slash, - /// 'e' or 'E' - Exponent, - // Multi-char tokens: - /// Identifier for keywords and names - Ident, - /// Identifier with a prefix, like `xsd:decimal` - PrefixedIdent, - /// Variable like `?var` - Variable, - /// Existential Variable like `!var` - Existential, - /// Aggregate identifier like `#sum` - Aggregate, - /// IRI, delimited with `<` and `>` - Iri, - /// Base 10 digits - Number, - /// A string literal, delimited with `"` - String, - /// A comment, starting with `%` - Comment, - /// A comment, starting with `%%` - DocComment, - /// A comment, starting with `%!` - TlDocComment, - /// ` `, `\t`, `\r` or `\n` - Whitespace, - /// base directive keyword - Base, - /// prefix directive keyword - Prefix, - /// import directive keyword - Import, - /// export directive keyword - Export, - /// output directive keyword - Output, - /// Ident for prefixes - PrefixIdent, - /// catch all token - Illegal, - /// signals end of file - Eof, - /// signals an error - Error, -} -impl std::fmt::Display for TokenKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - TokenKind::QuestionMark => write!(f, "QuestionMark"), - TokenKind::ExclamationMark => write!(f, "ExclamationMark"), - TokenKind::OpenParen => write!(f, "OpenParen"), - TokenKind::CloseParen => write!(f, "CloseParen"), - TokenKind::OpenBracket => write!(f, "OpenBracket"), - TokenKind::CloseBracket => write!(f, "CloseBracket"), - TokenKind::OpenBrace => write!(f, "OpenBrace"), - TokenKind::CloseBrace => write!(f, "CloseBrace"), - TokenKind::Dot => write!(f, "Dot"), - TokenKind::Comma => write!(f, "Comma"), - TokenKind::Colon => write!(f, "Colon"), - TokenKind::Arrow => write!(f, "Arrow"), - TokenKind::Greater => write!(f, "Greater"), - TokenKind::GreaterEqual => write!(f, "GreaterEqual"), - TokenKind::Equal => write!(f, "Equal"), - TokenKind::LessEqual => write!(f, "LessEqual"), - TokenKind::Less => write!(f, "Less"), - TokenKind::Unequal => write!(f, "Unequal"), - TokenKind::Tilde => write!(f, "Tilde"), - TokenKind::Caret => write!(f, "Caret"), - TokenKind::Hash => write!(f, "Hash"), - TokenKind::Underscore => write!(f, "Underscore"), - TokenKind::At => write!(f, "At"), - TokenKind::Plus => write!(f, "Plus"), - TokenKind::Minus => write!(f, "Minus"), - TokenKind::Star => write!(f, "Star"), - TokenKind::Slash => write!(f, "Slash"), - TokenKind::Exponent => write!(f, "Exponent"), - TokenKind::Ident => write!(f, "Ident"), - TokenKind::PrefixedIdent => write!(f, "Prefixed Ident"), - TokenKind::Variable => write!(f, "Variable"), - TokenKind::Existential => write!(f, "Existential"), - TokenKind::Aggregate => write!(f, "Aggregate"), - TokenKind::Iri => write!(f, "Iri"), - TokenKind::Number => write!(f, "Number"), - TokenKind::String => write!(f, "String"), - TokenKind::Comment => write!(f, "Comment"), - TokenKind::DocComment => write!(f, "DocComment"), - TokenKind::TlDocComment => write!(f, "TlDocComment"), - TokenKind::Whitespace => write!(f, "Whitespace"), - TokenKind::Base => write!(f, "Base"), - TokenKind::Prefix => write!(f, "Prefix"), - TokenKind::Import => write!(f, "Import"), - TokenKind::Export => write!(f, "Export"), - TokenKind::Output => write!(f, "Output"), - TokenKind::PrefixIdent => write!(f, "PrefixIdent"), - TokenKind::Illegal => write!(f, "Illegal"), - TokenKind::Eof => write!(f, "Eof"), - TokenKind::Error => write!(f, "\x1b[1;31mError\x1b[0m"), - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct Token<'a> { - pub(crate) kind: TokenKind, - pub(crate) span: Span<'a>, -} -impl<'a> Token<'a> { - pub(crate) fn new(kind: TokenKind, span: Span<'a>) -> Token<'a> { - Token { kind, span } - } -} -impl std::fmt::Display for Token<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let offset = self.span.location_offset(); - let line = self.span.location_line(); - let column = self.span.get_utf8_column(); - let fragment = self.span.fragment(); - if self.span.extra == () { - write!( - f, - // "T!{{{0}, S!({offset}, {line}, {fragment:?})}}", - "\x1b[93mTOKEN {0} \x1b[34m@{line}:{column} ({offset}) \x1b[93m{fragment:?}\x1b[0m", - self.kind - ) - } else { - write!( - f, - // "T!{{{0}, S!({offset}, {line}, {fragment:?}, {1:?})}}", - "\x1b[93mTOKEN {0} \x1b[34m@{line}:{column} ({offset}) \x1b[93m{fragment:?}\x1b[0m, {1:?}\x1b[0m", - self.kind, self.span.extra - ) - } - } -} -impl<'a> AstNode for Token<'a> { - fn children(&self) -> Option> { - None::> - } - - fn span(&self) -> Span { - self.span - } - - fn is_leaf(&self) -> bool { - true - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - None - } - - fn name(&self) -> String { - String::from("Token") - } -} - -// pub(crate) fn map_err<'a, 's, O, E: ParseError>>( -// mut f: impl nom::Parser, O, E>, -// mut op: impl FnMut(E) -> NewParseError, -// ) -> impl FnMut(Input<'a, 's>) -> IResult, O> { -// move |input| { -// f.parse(input).map_err(|e| match e { -// nom::Err::Incomplete(err) => nom::Err::Incomplete(err), -// nom::Err::Error(err) => nom::Err::Error(op(err)), -// nom::Err::Failure(err) => nom::Err::Error(op(err)), -// }) -// } -// } - -macro_rules! syntax { - ($func_name: ident, $tag_str: literal, $token: expr) => { - pub(crate) fn $func_name<'a, 's, E>( - input: Input<'a, 's>, - ) -> IResult, Span<'a>, E> - where - E: ParseError> + ContextError, Context>, - { - map( - context(Context::Tag($tag_str), tag($tag_str)), - |input: Input| input.input, - )(input) - } - }; -} - -syntax!(open_paren, "(", TokenKind::OpenParen); -syntax!(close_paren, ")", TokenKind::CloseParen); -syntax!(open_bracket, "[", TokenKind::OpenBracket); -syntax!(close_bracket, "]", TokenKind::CloseBracket); -syntax!(open_brace, "{", TokenKind::OpenBrace); -syntax!(close_brace, "}", TokenKind::CloseBrace); -syntax!(dot, ".", TokenKind::Dot); -syntax!(comma, ",", TokenKind::Comma); -syntax!(colon, ":", TokenKind::Colon); -syntax!(arrow, ":-", TokenKind::Arrow); -syntax!(question_mark, "?", TokenKind::QuestionMark); -syntax!(exclamation_mark, "!", TokenKind::ExclamationMark); -syntax!(tilde, "~", TokenKind::Tilde); -syntax!(caret, "^", TokenKind::Caret); -syntax!(hash, "#", TokenKind::Hash); -syntax!(underscore, "_", TokenKind::Underscore); -syntax!(at, "@", TokenKind::At); -syntax!(exp_lower, "e", TokenKind::Exponent); -syntax!(exp_upper, "E", TokenKind::Exponent); - -pub(crate) fn exp<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context(Context::Exponent, alt((exp_lower, exp_upper)))(input) -} - -pub(crate) fn lex_punctuations<'a, 's, E>( - input: Input<'a, 's>, -) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::Punctuations, - alt(( - arrow, - open_paren, - close_paren, - open_bracket, - close_bracket, - open_brace, - close_brace, - dot, - comma, - colon, - question_mark, - exclamation_mark, - tilde, - caret, - hash, - underscore, - at, - )), - )(input) -} - -syntax!(less, "<", TokenKind::Less); -syntax!(less_equal, "<=", TokenKind::LessEqual); -syntax!(equal, "=", TokenKind::Equal); -syntax!(greater_equal, ">=", TokenKind::GreaterEqual); -syntax!(greater, ">", TokenKind::Greater); -syntax!(unequal, "!=", TokenKind::Unequal); -syntax!(plus, "+", TokenKind::Plus); -syntax!(minus, "-", TokenKind::Minus); -syntax!(star, "*", TokenKind::Star); -syntax!(slash, "/", TokenKind::Slash); - -pub(crate) fn lex_operators<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::Operators, - alt(( - less_equal, - greater_equal, - unequal, - less, - equal, - greater, - plus, - minus, - star, - slash, - )), - )(input) -} - -/// This function lexes the name of a predicate or a map, called the tag of the predicate/map. -pub(crate) fn lex_tag<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::Identifier, - recognize(pair( - alpha1, - many0(alt((alphanumeric1, tag("_"), tag("-")))), - )), - )(input) - .map(|(rest_input, ident)| (rest_input, ident.input)) -} - -pub(crate) fn lex_prefixed_ident<'a, 's, E>( - input: Input<'a, 's>, -) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - recognize(tuple((opt(lex_tag), colon, lex_tag)))(input) - .map(|(rest_input, prefixed_ident)| (rest_input, prefixed_ident.input)) -} - -pub(crate) fn lex_iri<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::Iri, - recognize(delimited(tag("<"), is_not("> \n"), cut(tag(">")))), - )(input) - .map(|(rest, result)| (rest, result.input)) -} - -pub(crate) fn lex_number<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context(Context::Number, digit1)(input).map(|(rest_input, result)| (rest_input, result.input)) -} - -pub(crate) fn lex_string<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::String, - recognize(delimited(tag("\""), is_not("\""), cut(tag("\"")))), - )(input) - .map(|(rest, result)| (rest, result.input)) -} - -pub(crate) fn lex_comment<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::Comment, - recognize(tuple((tag("%"), many0(is_not("\n")), line_ending))), - )(input) - .map(|(rest, result)| (rest, result.input)) -} - -pub(crate) fn lex_doc_comment<'a, 's, E>( - input: Input<'a, 's>, -) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::DocComment, - recognize(many1(tuple((tag("%%"), many0(is_not("\n")), line_ending)))), - )(input) - .map(|(rest, result)| (rest, result.input)) -} - -pub(crate) fn lex_toplevel_doc_comment<'a, 's, E>( - input: Input<'a, 's>, -) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::TlDocComment, - recognize(many1(tuple((tag("%%%"), many0(is_not("\n")), line_ending)))), - )(input) - .map(|(rest, result)| (rest, result.input)) -} - -pub(crate) fn lex_comments<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::Comments, - alt((lex_toplevel_doc_comment, lex_doc_comment, lex_comment)), - )(input) -} - -pub(crate) fn lex_whitespace<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context(Context::Whitespace, multispace1)(input).map(|(rest, result)| (rest, result.input)) -} - -pub(crate) fn lex_illegal<'a, 's, E>(input: Input<'a, 's>) -> IResult, Span<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context(Context::Illegal, take(1usize))(input).map(|(rest, result)| (rest, result.input)) -} - -pub(crate) fn skip_to_statement_end<'a, 's, E>(input: Input<'a, 's>) -> (Input<'a, 's>, Span<'a>) -where - E: ParseError> + ContextError, Context>, -{ - let (rest_input, error_input) = recognize(tuple(( - take_till::<_, Input<'_, '_>, nom::error::Error<_>>(|c| c == '.'), - opt(tag(".")), - multispace0, - )))(input) - .expect("Skipping to the next dot should not fail!"); - (rest_input, error_input.input) -} - -#[cfg(test)] -mod tests { - use super::ErrorTree; - - use super::TokenKind::*; - use super::*; - - macro_rules! T { - ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { - Token::new($tok_kind, unsafe { - Span::new_from_raw_offset($offset, $line, $str, ()) - }) - }; - } - - #[test] - fn skip_to_statement_end() { - let input = Span::new("some ?broken :- rule). A(Fact)."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - dbg!(super::skip_to_statement_end::>(input)); - } -} diff --git a/nemo/src/io/parser.rs b/nemo/src/io/parser.rs deleted file mode 100644 index 4ae877b50..000000000 --- a/nemo/src/io/parser.rs +++ /dev/null @@ -1,4864 +0,0 @@ -//! A parser for rulewerk-style rules. - -pub mod ast; -pub(crate) mod types; - -use ast::atom::Atom; -use ast::directive::Directive; -use ast::map::{Map, Pair}; -use ast::named_tuple::NamedTuple; -use ast::program::Program; -use ast::statement::{Fact, Statement}; -use ast::term::{Exponent, Primitive, Term}; -use ast::tuple::Tuple; -use ast::{List, Position, Wsoc}; -use types::Input; -pub use types::LocatedParseError; - -pub(crate) mod iri; -pub(crate) mod rfc5234; -pub(crate) mod sparql; -pub(crate) mod turtle; - -pub mod old { - //! FIXME: remove when new parser is in use and wasm binds can get updated - use super::{iri, rfc5234, sparql, turtle}; - - use std::{cell::RefCell, collections::HashMap, fmt::Debug}; - - use crate::{ - error::Error, - io::parser::types::{ArithmeticOperator, BodyExpression}, - model::*, - }; - use nemo_physical::datavalues::{ - AnyDataValue, DataValueCreationError, MapDataValue, TupleDataValue, - }; - use nom::{ - branch::alt, - bytes::complete::{is_not, tag}, - character::complete::{alpha1, digit1, multispace1, satisfy}, - combinator::{all_consuming, cut, map, map_res, opt, recognize, value}, - multi::{many0, many1, separated_list0, separated_list1}, - sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, - Err, - }; - - use macros::traced; - - pub use super::types::{span_from_str, LocatedParseError, ParseError, ParseResult}; - use super::types::{ConstraintOperator, IntermediateResult, Span}; - - /// Parse a program in the given `input`-String and return a [Program]. - /// - /// The program will be parsed and checked for unsupported features. - /// - /// # Error - /// Returns an appropriate [Error] variant on parsing and feature check issues. - pub fn parse_program(input: impl AsRef) -> Result { - let program = all_input_consumed(RuleParser::new().parse_program())(input.as_ref())?; - Ok(program) - } - - /// Parse a single fact in the given `input`-String and return a [Program]. - /// - /// The program will be parsed and checked for unsupported features. - /// - /// # Error - /// Returns an appropriate [Error] variant on parsing and feature check issues. - pub fn parse_fact(mut input: String) -> Result { - input += "."; - let fact = all_input_consumed(RuleParser::new().parse_fact())(input.as_str())?; - Ok(fact) - } - - /// A combinator to add tracing to the parser. - /// [fun] is an identifier for the parser and [parser] is the actual parser. - #[inline(always)] - fn traced<'a, T, P>( - fun: &'static str, - mut parser: P, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> - where - T: Debug, - P: FnMut(Span<'a>) -> IntermediateResult<'a, T>, - { - move |input| { - log::trace!(target: "parser", "{fun}({input:?})"); - let result = parser(input); - log::trace!(target: "parser", "{fun}({input:?}) -> {result:?}"); - result - } - } - - /// A combinator that makes sure all input has been consumed. - pub fn all_input_consumed<'a, T: 'a>( - parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, - ) -> impl FnMut(&'a str) -> Result + 'a { - let mut p = all_consuming(parser); - move |input| { - let input = Span::new(input); - p(input).map(|(_, result)| result).map_err(|e| match e { - Err::Incomplete(e) => ParseError::MissingInput(match e { - nom::Needed::Unknown => { - "expected an unknown amount of further input".to_string() - } - nom::Needed::Size(size) => format!("expected at least {size} more bytes"), - }) - .at(input), - Err::Error(e) | Err::Failure(e) => e, - }) - } - } - - /// A combinator that recognises a comment, starting at a `%` - /// character and ending at the end of the line. - pub fn comment(input: Span) -> IntermediateResult<()> { - alt(( - value((), pair(tag("%"), is_not("\n\r"))), - // a comment that immediately precedes the end of the line – - // this must come after the normal line comment above - value((), tag("%")), - ))(input) - } - - /// A combinator that recognises an arbitrary amount of whitespace and - /// comments. - pub fn multispace_or_comment0(input: Span) -> IntermediateResult<()> { - value((), many0(alt((value((), multispace1), comment))))(input) - } - - /// A combinator that recognises any non-empty amount of whitespace - /// and comments. - pub fn multispace_or_comment1(input: Span) -> IntermediateResult<()> { - value((), many1(alt((value((), multispace1), comment))))(input) - } - - /// A combinator that modifies the associated error. - pub fn map_error<'a, T: 'a>( - mut parser: impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a, - mut error: impl FnMut() -> ParseError + 'a, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, T> + 'a { - move |input| { - parser(input).map_err(|e| match e { - Err::Incomplete(_) => e, - Err::Error(context) => { - let mut err = error().at(input); - err.append(context); - Err::Error(err) - } - Err::Failure(context) => { - let mut err = error().at(input); - err.append(context); - Err::Failure(err) - } - }) - } - } - - /// A combinator that creates a parser for a specific token. - pub fn token<'a>(token: &'a str) -> impl FnMut(Span<'a>) -> IntermediateResult> { - map_error(tag(token), || ParseError::ExpectedToken(token.to_string())) - } - - /// A combinator that creates a parser for a specific token, - /// surrounded by whitespace or comments. - pub fn space_delimited_token<'a>( - token: &'a str, - ) -> impl FnMut(Span<'a>) -> IntermediateResult> { - map_error( - delimited(multispace_or_comment0, tag(token), multispace_or_comment0), - || ParseError::ExpectedToken(token.to_string()), - ) - } - - /// Expand a prefix. - fn resolve_prefix<'a>( - prefixes: &'a HashMap<&'a str, &'a str>, - prefix: &'a str, - ) -> Result<&'a str, ParseError> { - prefixes - .get(prefix) - .copied() - .ok_or_else(|| ParseError::UndeclaredPrefix(prefix.to_string())) - } - - /// Expand a prefixed name. - fn resolve_prefixed_name( - prefixes: &HashMap<&str, &str>, - name: sparql::Name, - ) -> Result { - match name { - sparql::Name::IriReference(iri) => Ok(iri.to_string()), - sparql::Name::PrefixedName { prefix, local } => { - resolve_prefix(prefixes, prefix).map(|iri| format!("{iri}{local}")) - } - sparql::Name::BlankNode(label) => Ok(format!("_:{label}")), - } - } - - /// Resolve prefixes in a [turtle::RdfLiteral]. - fn resolve_prefixed_rdf_literal( - prefixes: &HashMap<&str, &str>, - literal: turtle::RdfLiteral, - ) -> Result { - match literal { - turtle::RdfLiteral::LanguageString { value, tag } => Ok( - AnyDataValue::new_language_tagged_string(value.to_string(), tag.to_string()), - ), - turtle::RdfLiteral::DatatypeValue { value, datatype } => { - AnyDataValue::new_from_typed_literal( - value.to_string(), - resolve_prefixed_name(prefixes, datatype) - .expect("prefix should have been registered during parsing"), - ) - } - } - } - - #[traced("parser")] - pub(crate) fn parse_bare_name(input: Span<'_>) -> IntermediateResult> { - map_error( - recognize(pair( - alpha1, - opt(many1(satisfy(|c| { - ['0'..='9', 'a'..='z', 'A'..='Z', '-'..='-', '_'..='_'] - .iter() - .any(|range| range.contains(&c)) - }))), - )), - || ParseError::ExpectedBareName, - )(input) - } - - #[traced("parser")] - fn parse_simple_name(input: Span<'_>) -> IntermediateResult> { - map_error( - recognize(pair( - alpha1, - opt(preceded( - many0(tag(" ")), - separated_list1( - many1(tag(" ")), - many1(satisfy(|c| { - ['0'..='9', 'a'..='z', 'A'..='Z', '_'..='_'] - .iter() - .any(|range| range.contains(&c)) - })), - ), - )), - )), - || ParseError::ExpectedBareName, - )(input) - } - - /// Parse an IRI representing a constant. - fn parse_iri_constant<'a>( - prefixes: &'a RefCell>, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { - map_error( - move |input| { - let (remainder, name) = traced( - "parse_iri_constant", - alt(( - map(sparql::iriref, |name| sparql::Name::IriReference(&name)), - sparql::prefixed_name, - sparql::blank_node_label, - map(parse_bare_name, |name| sparql::Name::IriReference(&name)), - )), - )(input)?; - - let resolved = resolve_prefixed_name(&prefixes.borrow(), name) - .map_err(|e| Err::Failure(e.at(input)))?; - - Ok((remainder, AnyDataValue::new_iri(resolved))) - }, - || ParseError::ExpectedIriConstant, - ) - } - - fn parse_constant_term<'a>( - prefixes: &'a RefCell>, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { - traced( - "parse_constant_term", - alt(( - parse_iri_constant(prefixes), - turtle::numeric_literal, - map_res(turtle::rdf_literal, move |literal| { - resolve_prefixed_rdf_literal(&prefixes.borrow(), literal) - }), - map(turtle::string, move |literal| { - AnyDataValue::new_plain_string(literal.to_string()) - }), - )), - ) - } - - /// Parse a ground term. - pub fn parse_ground_term<'a>( - prefixes: &'a RefCell>, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, PrimitiveTerm> { - traced( - "parse_ground_term", - map_error( - map(parse_constant_term(prefixes), PrimitiveTerm::GroundTerm), - || ParseError::ExpectedGroundTerm, - ), - ) - } - - /// The main parser. Holds a hash map for - /// prefixes, as well as the base IRI. - #[derive(Debug, Default)] - pub struct RuleParser<'a> { - /// The base IRI, if set. - base: RefCell>, - /// A map from Prefixes to IRIs. - prefixes: RefCell>, - /// Number counting up for generating distinct wildcards. - wildcard_generator: RefCell, - } - - impl<'a> RuleParser<'a> { - /// Construct a new [RuleParser]. - pub fn new() -> Self { - Default::default() - } - - fn parse_complex_constant_term( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<'a, AnyDataValue> { - traced( - "parse_complex_constant_term", - // Note: The explicit |s| in the cases below is important to enable proper type - // reasoning in rust. Without it, unresolved opaque types appear in the recursion. - alt(( - parse_constant_term(&self.prefixes), - map(|s| self.parse_tuple_literal()(s), AnyDataValue::from), - map(|s| self.parse_map_literal()(s), AnyDataValue::from), - )), - ) - } - - /// Parse the dot that ends declarations, optionally surrounded by spaces. - fn parse_dot(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_dot", space_delimited_token(".")) - } - - /// Parse a comma, optionally surrounded by spaces. - fn parse_comma(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_comma", space_delimited_token(",")) - } - - /// Parse an equality sign, optionally surrounded by spaces. - fn parse_equals(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_equals", space_delimited_token("=")) - } - - /// Parse a negation sign (`~`), optionally surrounded by spaces. - fn parse_not(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_not", space_delimited_token("~")) - } - - /// Parse an arrow (`:-`), optionally surrounded by spaces. - fn parse_arrow(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_arrow", space_delimited_token(":-")) - } - - /// Parse an opening parenthesis, optionally surrounded by spaces. - fn parse_open_parenthesis( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_open_parenthesis", space_delimited_token("(")) - } - - /// Parse a closing parenthesis, optionally surrounded by spaces. - fn parse_close_parenthesis( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_close_parenthesis", space_delimited_token(")")) - } - - /// Matches an opening parenthesis, - /// then gets an object from the parser, - /// and finally matches an closing parenthesis. - pub fn parenthesised<'b, O, F>( - &'a self, - parser: F, - ) -> impl FnMut(Span<'a>) -> IntermediateResult - where - O: Debug + 'a, - F: FnMut(Span<'a>) -> IntermediateResult + 'a, - { - traced( - "parenthesised", - map_error( - delimited( - self.parse_open_parenthesis(), - parser, - self.parse_close_parenthesis(), - ), - || ParseError::ExpectedParenthesisedExpression, - ), - ) - } - - /// Parse an opening brace, optionally surrounded by spaces. - fn parse_open_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_open_brace", space_delimited_token("{")) - } - - /// Parse a closing brace, optionally surrounded by spaces. - fn parse_close_brace(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced("parse_close_brace", space_delimited_token("}")) - } - - /// Parse a base declaration. - fn parse_base(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_base", - map_error( - move |input| { - let (remainder, base) = delimited( - terminated(token("@base"), cut(multispace_or_comment1)), - cut(sparql::iriref), - cut(self.parse_dot()), - )(input)?; - - log::debug!(target: "parser", r#"parse_base: set new base: "{base}""#); - *self.base.borrow_mut() = Some(&base); - - Ok((remainder, Identifier(base.to_string()))) - }, - || ParseError::ExpectedBaseDeclaration, - ), - ) - } - - fn parse_prefix(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult> { - traced( - "parse_prefix", - map_error( - move |input| { - let (remainder, (prefix, iri)) = delimited( - terminated(token("@prefix"), cut(multispace_or_comment1)), - cut(tuple(( - cut(terminated(sparql::pname_ns, multispace_or_comment1)), - cut(sparql::iriref), - ))), - cut(self.parse_dot()), - )(input)?; - - log::debug!(target: "parser", r#"parse_prefix: got prefix "{prefix}" for iri "{iri}""#); - if self.prefixes.borrow_mut().insert(&prefix, &iri).is_some() { - Err(Err::Failure( - ParseError::RedeclaredPrefix(prefix.to_string()).at(input), - )) - } else { - Ok((remainder, prefix)) - } - }, - || ParseError::ExpectedPrefixDeclaration, - ), - ) - } - - /// Parse a data source declaration. - /// This is a backwards compatibility feature for Rulewerk syntax. Nemo normally uses - /// `@import` instead of `@source`. The difference in `@source` is that (1) a predicate - /// arity is given in brackets after the predicate name, (2) the import predicate names - /// are one of `load-csv`, `load-tsv`, `load-rdf`, and `sparql`, with the only parameter - /// being the file name or IRI to be loaded. - fn parse_source(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_source", - map_error( - move |input| { - let (remainder, (predicate, arity)) = preceded( - terminated(token("@source"), cut(multispace_or_comment1)), - cut(self.parse_qualified_predicate_name()), - )(input)?; - - let (remainder, datasource): (_, Result<_, ParseError>) = cut(delimited( - delimited(multispace_or_comment0, token(":"), multispace_or_comment1), - alt(( - map( - delimited( - preceded( - token("load-csv"), - cut(self.parse_open_parenthesis()), - ), - turtle::string, - self.parse_close_parenthesis(), - ), - |filename| { - let attributes = MapDataValue::from_iter([ - ( - AnyDataValue::new_iri( - PARAMETER_NAME_RESOURCE.to_string(), - ), - AnyDataValue::new_plain_string( - filename.to_string(), - ), - ), - ( - AnyDataValue::new_iri( - PARAMETER_NAME_FORMAT.to_string(), - ), - TupleDataValue::from_iter( - vec![VALUE_FORMAT_ANY; arity] - .iter() - .map(|format| { - AnyDataValue::new_plain_string( - (*format).to_string(), - ) - }) - .collect::>(), - ) - .into(), - ), - ]); - Ok(ImportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::CSV, - attributes, - })) - }, - ), - map( - delimited( - preceded( - token("load-tsv"), - cut(self.parse_open_parenthesis()), - ), - turtle::string, - self.parse_close_parenthesis(), - ), - |filename| { - let attributes = MapDataValue::from_iter([ - ( - AnyDataValue::new_iri( - PARAMETER_NAME_RESOURCE.to_string(), - ), - AnyDataValue::new_plain_string( - filename.to_string(), - ), - ), - ( - AnyDataValue::new_iri( - PARAMETER_NAME_FORMAT.to_string(), - ), - TupleDataValue::from_iter( - vec![VALUE_FORMAT_ANY; arity] - .iter() - .map(|format| { - AnyDataValue::new_plain_string( - (*format).to_string(), - ) - }) - .collect::>(), - ) - .into(), - ), - ]); - Ok(ImportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::TSV, - attributes, - })) - }, - ), - map( - delimited( - preceded( - token("load-rdf"), - cut(self.parse_open_parenthesis()), - ), - turtle::string, - self.parse_close_parenthesis(), - ), - |filename| { - let mut attribute_pairs = vec![ - ( - AnyDataValue::new_iri( - PARAMETER_NAME_RESOURCE.to_string(), - ), - AnyDataValue::new_plain_string( - filename.to_string(), - ), - ), - ( - AnyDataValue::new_iri( - PARAMETER_NAME_FORMAT.to_string(), - ), - TupleDataValue::from_iter( - vec![VALUE_FORMAT_ANY; arity] - .iter() - .map(|format| { - AnyDataValue::new_plain_string( - (*format).to_string(), - ) - }) - .collect::>(), - ) - .into(), - ), - ]; - if let Some(base) = self.base() { - attribute_pairs.push(( - AnyDataValue::new_iri( - PARAMETER_NAME_BASE.to_string(), - ), - AnyDataValue::new_iri(base.to_string()), - )); - } - - let attributes = MapDataValue::from_iter(attribute_pairs); - - Ok(ImportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::RDF(RdfVariant::Unspecified), - attributes, - })) - }, - ), - map( - delimited( - preceded( - token("sparql"), - cut(self.parse_open_parenthesis()), - ), - tuple(( - self.parse_iri_identifier(), - delimited( - self.parse_comma(), - turtle::string, - self.parse_comma(), - ), - turtle::string, - )), - self.parse_close_parenthesis(), - ), - |(_endpoint, _projection, _query)| { - Err(ParseError::UnsupportedSparqlSource( - predicate.clone().0, - )) - }, - ), - )), - cut(self.parse_dot()), - ))( - remainder - )?; - - let spec = datasource.map_err(|e| Err::Failure(e.at(input)))?; - - Ok((remainder, spec)) - }, - || ParseError::ExpectedDataSourceDeclaration, - ), - ) - } - - /// Parse an output directive. - fn parse_output_directive( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_output", - map_error( - delimited( - terminated(token("@output"), cut(multispace_or_comment1)), - cut(map_res::<_, _, _, _, Error, _, _>( - self.parse_iri_like_identifier(), - Ok, - )), - cut(self.parse_dot()), - ), - || ParseError::ExpectedOutputDeclaration, - ), - ) - } - - /// Parse an entry in a [MapDataValue], i.e., am [AnyDataValue]--[AnyDataValue] pair. - fn parse_map_entry( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<(AnyDataValue, AnyDataValue)> { - traced( - "parse_map_entry", - separated_pair( - self.parse_complex_constant_term(), - self.parse_equals(), - map(self.parse_complex_constant_term(), |term| term), - ), - ) - } - - /// Parse a ground map literal. - fn parse_map_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_map_literal", - delimited( - self.parse_open_brace(), - map( - separated_list0(self.parse_comma(), self.parse_map_entry()), - MapDataValue::from_iter, - ), - self.parse_close_brace(), - ), - ) - } - - /// Parse a ground tuple literal. - pub fn parse_tuple_literal( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_tuple_literal", - delimited( - self.parse_open_parenthesis(), - map( - separated_list0(self.parse_comma(), self.parse_complex_constant_term()), - TupleDataValue::from_iter, - ), - self.parse_close_parenthesis(), - ), - ) - } - - /// Parse a file format name. - fn parse_file_format(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced("parse_file_format", move |input| { - let (remainder, format) = - map_res(alpha1, |format: Span<'a>| match *format.fragment() { - FILE_FORMAT_CSV => Ok(FileFormat::CSV), - FILE_FORMAT_DSV => Ok(FileFormat::DSV), - FILE_FORMAT_TSV => Ok(FileFormat::TSV), - FILE_FORMAT_RDF_UNSPECIFIED => Ok(FileFormat::RDF(RdfVariant::Unspecified)), - FILE_FORMAT_RDF_NTRIPLES => Ok(FileFormat::RDF(RdfVariant::NTriples)), - FILE_FORMAT_RDF_NQUADS => Ok(FileFormat::RDF(RdfVariant::NQuads)), - FILE_FORMAT_RDF_TURTLE => Ok(FileFormat::RDF(RdfVariant::Turtle)), - FILE_FORMAT_RDF_TRIG => Ok(FileFormat::RDF(RdfVariant::TriG)), - FILE_FORMAT_RDF_XML => Ok(FileFormat::RDF(RdfVariant::RDFXML)), - FILE_FORMAT_JSON => Ok(FileFormat::JSON), - _ => Err(ParseError::FileFormatError(format.fragment().to_string())), - })(input)?; - - Ok((remainder, format)) - }) - } - - /// Parse an import/export specification. - fn parse_import_export_spec( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced("parse_import_export_spec", move |input| { - let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; - let (remainder, format) = delimited( - space_delimited_token(":-"), - self.parse_file_format(), - multispace_or_comment0, - )(remainder)?; - let (remainder, attributes) = self.parse_map_literal()(remainder)?; - Ok(( - remainder, - ImportExportDirective { - predicate, - format, - attributes, - }, - )) - }) - } - - /// Parse an import directive. - fn parse_import(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_import", - delimited( - terminated(token("@import"), multispace_or_comment1), - cut(map(self.parse_import_export_spec(), ImportDirective::from)), - cut(self.parse_dot()), - ), - ) - } - - /// Parse an export directive. - fn parse_export(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_export", - delimited( - terminated(token("@export"), multispace_or_comment1), - cut(map(self.parse_import_export_spec(), ExportDirective::from)), - cut(self.parse_dot()), - ), - ) - } - - /// Parse a statement. - fn parse_statement(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_statement", - map_error( - alt(( - map(self.parse_fact(), Statement::Fact), - map(self.parse_rule(), Statement::Rule), - )), - || ParseError::ExpectedStatement, - ), - ) - } - - /// Parse a fact. - fn parse_fact(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_fact", - map_error( - move |input| { - let (remainder, (predicate, terms)) = terminated( - pair( - self.parse_iri_like_identifier(), - self.parenthesised(separated_list1( - self.parse_comma(), - parse_ground_term(&self.prefixes), - )), - ), - self.parse_dot(), - )(input)?; - - let predicate_name = predicate.name(); - log::trace!(target: "parser", "found fact {predicate_name}({terms:?})"); - - // We do not allow complex term trees in facts for now - let terms = terms.into_iter().map(Term::Primitive).collect(); - - Ok((remainder, Fact(Atom::new(predicate, terms)))) - }, - || ParseError::ExpectedFact, - ), - ) - } - - /// Parse an IRI identifier, e.g. for predicate names. - fn parse_iri_identifier( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - map_error( - move |input| { - let (remainder, name) = traced( - "parse_iri_identifier", - alt(( - map(sparql::iriref, |name| sparql::Name::IriReference(&name)), - sparql::prefixed_name, - sparql::blank_node_label, - )), - )(input)?; - - Ok(( - remainder, - Identifier( - resolve_prefixed_name(&self.prefixes.borrow(), name) - .map_err(|e| Err::Failure(e.at(input)))?, - ), - )) - }, - || ParseError::ExpectedIriIdentifier, - ) - } - - /// Parse an IRI-like identifier. - /// - /// This is being used for: - /// * predicate names - /// * built-in functions in term trees - fn parse_iri_like_identifier( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_iri_like_identifier", - map_error( - alt(( - self.parse_iri_identifier(), - self.parse_bare_iri_like_identifier(), - )), - || ParseError::ExpectedIriLikeIdentifier, - ), - ) - } - - /// Parse a qualified predicate name – currently, this is a - /// predicate name together with its arity. - /// - /// FIXME: Obsolete. Can be removed in the future. - fn parse_qualified_predicate_name( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult<(Identifier, usize)> { - traced( - "parse_qualified_predicate_name", - pair( - self.parse_iri_like_identifier(), - preceded( - multispace_or_comment0, - delimited( - token("["), - cut(map_res(digit1, |number: Span<'a>| number.parse::())), - cut(token("]")), - ), - ), - ), - ) - } - - /// Parse an IRI-like identifier (e.g. a predicate name) that is not an IRI. - fn parse_bare_iri_like_identifier( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced("parse_bare_iri_like_identifier", move |input| { - let (remainder, name) = parse_bare_name(input)?; - - Ok((remainder, Identifier(name.to_string()))) - }) - } - - /// Parse a rule. - fn parse_rule(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_rule", - map_error( - move |input| { - let (remainder, (head, body)) = pair( - terminated( - separated_list1(self.parse_comma(), self.parse_atom()), - self.parse_arrow(), - ), - cut(terminated( - separated_list1(self.parse_comma(), self.parse_body_expression()), - self.parse_dot(), - )), - )(input)?; - - log::trace!(target: "parser", r#"found rule "{head:?}" :- "{body:?}""#); - - let literals = body - .iter() - .filter_map(|expr| match expr { - BodyExpression::Literal(l) => Some(l.clone()), - _ => None, - }) - .collect(); - let constraints = body - .into_iter() - .filter_map(|expr| match expr { - BodyExpression::Constraint(c) => Some(c), - _ => None, - }) - .collect(); - Ok(( - remainder, - Rule::new_validated(head, literals, constraints) - .map_err(|e| Err::Failure(e.at(input)))?, - )) - }, - || ParseError::ExpectedRule, - ), - ) - } - - /// Parse an atom. - fn parse_atom(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_atom", - map_error( - move |input| { - let (remainder, predicate) = self.parse_iri_like_identifier()(input)?; - let (remainder, terms) = delimited( - self.parse_open_parenthesis(), - cut(separated_list1(self.parse_comma(), self.parse_term())), - cut(self.parse_close_parenthesis()), - )(remainder)?; - - let predicate_name = predicate.name(); - log::trace!(target: "parser", "found atom {predicate_name}({terms:?})"); - - Ok((remainder, Atom::new(predicate, terms))) - }, - || ParseError::ExpectedAtom, - ), - ) - } - - /// Parse a [PrimitiveTerm]. - fn parse_primitive_term( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_primitive_term", - map_error( - alt((parse_ground_term(&self.prefixes), self.parse_variable())), - || ParseError::ExpectedPrimitiveTerm, - ), - ) - } - - /// Parse an aggregate term. - fn parse_aggregate(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_aggregate", - map_error( - move |input| { - let (remainder, _) = nom::character::complete::char('#')(input)?; - let (remainder, aggregate_operation_identifier) = - self.parse_bare_iri_like_identifier()(remainder)?; - let (remainder, terms) = self - .parenthesised(separated_list1(self.parse_comma(), self.parse_term()))( - remainder, - )?; - - if let Some(logical_aggregate_operation) = - (&aggregate_operation_identifier).into() - { - let aggregate = Aggregate { - logical_aggregate_operation, - terms, - }; - - Ok((remainder, Term::Aggregation(aggregate))) - } else { - Err(Err::Failure( - ParseError::UnknownAggregateOperation( - aggregate_operation_identifier.name(), - ) - .at(input), - )) - } - }, - || ParseError::ExpectedAggregate, - ), - ) - } - - /// Parse a variable. - fn parse_variable(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_variable", - map_error( - map( - alt(( - self.parse_universal_variable(), - self.parse_existential_variable(), - )), - PrimitiveTerm::Variable, - ), - || ParseError::ExpectedVariable, - ), - ) - } - - /// Parse a universally quantified variable. - fn parse_universal_variable( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_universal_variable", - map_error( - map( - preceded(token("?"), cut(self.parse_variable_name())), - Variable::Universal, - ), - || ParseError::ExpectedUniversalVariable, - ), - ) - } - - /// Parse an existentially quantified variable. - fn parse_existential_variable( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_existential_variable", - map_error( - map( - preceded(token("!"), cut(self.parse_variable_name())), - Variable::Existential, - ), - || ParseError::ExpectedExistentialVariable, - ), - ) - } - - /// Parse a variable name. - fn parse_variable_name(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_variable", - map_error( - move |input| { - let (remainder, name) = parse_simple_name(input)?; - - Ok((remainder, name.to_string())) - }, - || ParseError::ExpectedVariableName, - ), - ) - } - - /// Parse a literal (i.e., a possibly negated atom). - fn parse_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_literal", - map_error( - alt((self.parse_negative_literal(), self.parse_positive_literal())), - || ParseError::ExpectedLiteral, - ), - ) - } - - /// Parse a non-negated literal. - fn parse_positive_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_positive_literal", - map_error(map(self.parse_atom(), Literal::Positive), || { - ParseError::ExpectedPositiveLiteral - }), - ) - } - - /// Parse a negated literal. - fn parse_negative_literal(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_negative_literal", - map_error( - map( - preceded(self.parse_not(), cut(self.parse_atom())), - Literal::Negative, - ), - || ParseError::ExpectedNegativeLiteral, - ), - ) - } - - /// Parse operation that is filters a variable - fn parse_constraint_operator( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_constraint_operator", - map_error( - delimited( - multispace_or_comment0, - alt(( - value(ConstraintOperator::LessThanEq, token("<=")), - value(ConstraintOperator::LessThan, token("<")), - value(ConstraintOperator::Equals, token("=")), - value(ConstraintOperator::Unequals, token("!=")), - value(ConstraintOperator::GreaterThanEq, token(">=")), - value(ConstraintOperator::GreaterThan, token(">")), - )), - multispace_or_comment0, - ), - || ParseError::ExpectedFilterOperator, - ), - ) - } - - /// Parse a term tree. - /// - /// This may consist of: - /// * A function term - /// * An arithmetic expression, which handles e.g. precedence of addition over multiplication - fn parse_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_term", - map_error( - move |input| { - delimited( - multispace_or_comment0, - alt(( - self.parse_arithmetic_expression(), - // map(self.parse_constraint(), |c| c.as_binary_term()), - self.parse_parenthesised_term(), - self.parse_function_term(), - self.parse_aggregate(), - self.parse_wildcard(), - )), - multispace_or_comment0, - )(input) - }, - || ParseError::ExpectedTerm, - ), - ) - } - - /// Parse a wildcard variable. - fn parse_wildcard(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_wildcard", - map_res(space_delimited_token("_"), |_| { - let wildcard = Variable::new_unamed(*self.wildcard_generator.borrow()); - *self.wildcard_generator.borrow_mut() += 1; - Ok::<_, ParseError>(Term::Primitive(PrimitiveTerm::Variable(wildcard))) - }), - ) - } - - /// Parse a parenthesised term tree. - fn parse_parenthesised_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_parenthesised_term", - map_error(self.parenthesised(self.parse_term()), || { - ParseError::ExpectedParenthesisedTerm - }), - ) - } - - /// Parse a function term, possibly with nested term trees. - fn parse_function_term(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_function_term", - map_error( - move |input| { - let (remainder, name) = self.parse_iri_like_identifier()(input)?; - - if let Ok(op) = UnaryOperation::construct_from_name(&name.0) { - let (remainder, subterm) = - (self.parenthesised(self.parse_term()))(remainder)?; - - Ok((remainder, Term::Unary(op, Box::new(subterm)))) - } else if let Some(op) = BinaryOperation::construct_from_name(&name.0) { - let (remainder, (left, _, right)) = - (self.parenthesised(tuple(( - self.parse_term(), - self.parse_comma(), - self.parse_term(), - ))))(remainder)?; - - Ok(( - remainder, - Term::Binary { - operation: op, - lhs: Box::new(left), - rhs: Box::new(right), - }, - )) - } else if let Some(op) = TernaryOperation::construct_from_name(&name.0) { - let (remainder, (first, _, second, _, third)) = - (self.parenthesised(tuple(( - self.parse_term(), - self.parse_comma(), - self.parse_term(), - self.parse_comma(), - self.parse_term(), - ))))(remainder)?; - - Ok(( - remainder, - Term::Ternary { - operation: op, - first: Box::new(first), - second: Box::new(second), - third: Box::new(third), - }, - )) - } else if let Some(op) = NaryOperation::construct_from_name(&name.0) { - let (remainder, subterms) = (self.parenthesised(separated_list0( - self.parse_comma(), - self.parse_term(), - )))(remainder)?; - - Ok(( - remainder, - Term::Nary { - operation: op, - parameters: subterms, - }, - )) - } else { - let (remainder, subterms) = (self.parenthesised(separated_list0( - self.parse_comma(), - self.parse_term(), - )))(remainder)?; - - Ok((remainder, Term::Function(name, subterms))) - } - }, - || ParseError::ExpectedFunctionTerm, - ), - ) - } - - /// Parse an arithmetic expression - fn parse_arithmetic_expression( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_arithmetic_expression", - map_error( - move |input| { - let (remainder, first) = self.parse_arithmetic_product()(input)?; - let (remainder, expressions) = many0(alt(( - preceded( - delimited( - multispace_or_comment0, - token("+"), - multispace_or_comment0, - ), - map(self.parse_arithmetic_product(), |term| { - (ArithmeticOperator::Addition, term) - }), - ), - preceded( - delimited( - multispace_or_comment0, - token("-"), - multispace_or_comment0, - ), - map(self.parse_arithmetic_product(), |term| { - (ArithmeticOperator::Subtraction, term) - }), - ), - )))(remainder)?; - - Ok(( - remainder, - Self::fold_arithmetic_expressions(first, expressions), - )) - }, - || ParseError::ExpectedArithmeticExpression, - ), - ) - } - - /// Parse an arithmetic product, i.e., an expression involving - /// only `*` and `/` over subexpressions. - fn parse_arithmetic_product(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_arithmetic_product", - map_error( - move |input| { - let (remainder, first) = self.parse_arithmetic_factor()(input)?; - let (remainder, factors) = many0(alt(( - preceded( - delimited( - multispace_or_comment0, - token("*"), - multispace_or_comment0, - ), - map(self.parse_arithmetic_factor(), |term| { - (ArithmeticOperator::Multiplication, term) - }), - ), - preceded( - delimited( - multispace_or_comment0, - token("/"), - multispace_or_comment0, - ), - map(self.parse_arithmetic_factor(), |term| { - (ArithmeticOperator::Division, term) - }), - ), - )))(remainder)?; - - Ok((remainder, Self::fold_arithmetic_expressions(first, factors))) - }, - || ParseError::ExpectedArithmeticProduct, - ), - ) - } - - /// Parse an arithmetic factor. - fn parse_arithmetic_factor(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_arithmetic_factor", - map_error( - alt(( - self.parse_function_term(), - self.parse_aggregate(), - map(self.parse_primitive_term(), Term::Primitive), - self.parse_parenthesised_term(), - )), - || ParseError::ExpectedArithmeticFactor, - ), - ) - } - - /// Fold a sequence of ([ArithmeticOperator], [PrimitiveTerm]) pairs into a single [Term]. - fn fold_arithmetic_expressions( - initial: Term, - sequence: Vec<(ArithmeticOperator, Term)>, - ) -> Term { - sequence.into_iter().fold(initial, |acc, pair| { - let (operation, expression) = pair; - - use ArithmeticOperator::*; - - let operation = match operation { - Addition => BinaryOperation::NumericAddition, - Subtraction => BinaryOperation::NumericSubtraction, - Multiplication => BinaryOperation::NumericMultiplication, - Division => BinaryOperation::NumericDivision, - }; - - Term::Binary { - operation, - lhs: Box::new(acc), - rhs: Box::new(expression), - } - }) - } - - /// Parse expression of the form ` ` expressing a constraint. - fn parse_constraint(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_constraint", - map_error( - map( - tuple(( - self.parse_term(), - self.parse_constraint_operator(), - cut(self.parse_term()), - )), - |(lhs, operation, rhs)| operation.into_constraint(lhs, rhs), - ), - || ParseError::ExpectedConstraint, - ), - ) - } - - /// Parse body expression - fn parse_body_expression( - &'a self, - ) -> impl FnMut(Span<'a>) -> IntermediateResult { - traced( - "parse_body_expression", - map_error( - alt(( - map(self.parse_constraint(), BodyExpression::Constraint), - map(self.parse_literal(), BodyExpression::Literal), - )), - || ParseError::ExpectedBodyExpression, - ), - ) - } - - /// Parse a program in the rules language. - pub fn parse_program(&'a self) -> impl FnMut(Span<'a>) -> IntermediateResult { - fn check_for_invalid_statement<'a, F>( - parser: &mut F, - input: Span<'a>, - ) -> IntermediateResult<'a, ()> - where - F: FnMut(Span<'a>) -> IntermediateResult, - { - if let Ok((_, e)) = parser(input) { - return Err(Err::Failure(e.at(input))); - } - - Ok((input, ())) - } - - traced("parse_program", move |input| { - let (remainder, _) = multispace_or_comment0(input)?; - let (remainder, _) = opt(self.parse_base())(remainder)?; - - check_for_invalid_statement( - &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), - remainder, - )?; - - let (remainder, _) = many0(self.parse_prefix())(remainder)?; - - check_for_invalid_statement( - &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), - remainder, - )?; - check_for_invalid_statement( - &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), - remainder, - )?; - - let mut statements = Vec::new(); - let mut output_predicates = Vec::new(); - let mut sources = Vec::new(); - let mut imports = Vec::new(); - let mut exports = Vec::new(); - - let (remainder, _) = many0(alt(( - map(self.parse_source(), |source| sources.push(source)), - map(self.parse_import(), |import| imports.push(import)), - map(self.parse_export(), |export| exports.push(export)), - map(self.parse_statement(), |statement| { - statements.push(statement) - }), - map(self.parse_output_directive(), |output_predicate| { - output_predicates.push(output_predicate) - }), - )))(remainder)?; - - check_for_invalid_statement( - &mut map(self.parse_base(), |_| ParseError::LateBaseDeclaration), - remainder, - )?; - check_for_invalid_statement( - &mut map(self.parse_prefix(), |_| ParseError::LatePrefixDeclaration), - remainder, - )?; - - let base = self.base().map(String::from); - let prefixes = self - .prefixes - .borrow() - .iter() - .map(|(&prefix, &iri)| (prefix.to_string(), iri.to_string())) - .collect::>(); - let mut rules = Vec::new(); - let mut facts = Vec::new(); - - statements.iter().for_each(|statement| match statement { - Statement::Fact(value) => facts.push(value.clone()), - Statement::Rule(value) => rules.push(value.clone()), - }); - - let mut program_builder = Program::builder() - .prefixes(prefixes) - .imports(sources) - .imports(imports) - .exports(exports) - .rules(rules) - .facts(facts); - - if let Some(base) = base { - program_builder = program_builder.base(base); - } - - if !output_predicates.is_empty() { - program_builder = program_builder.output_predicates(output_predicates); - } - - Ok((remainder, program_builder.build())) - }) - } - - /// Return the declared base, if set, or None. - #[must_use] - fn base(&self) -> Option<&'a str> { - *self.base.borrow() - } - } - - #[cfg(test)] - mod test { - use super::*; - use std::assert_matches::assert_matches; - use test_log::test; - - macro_rules! assert_parse { - ($parser:expr, $left:expr, $right:expr $(,) ?) => { - assert_eq!( - all_input_consumed($parser)($left).expect( - format!("failed to parse `{:?}`\nexpected `{:?}`", $left, $right).as_str() - ), - $right - ); - }; - } - - macro_rules! assert_fails { - ($parser:expr, $left:expr, $right:pat $(,) ?) => {{ - // Store in intermediate variable to prevent from being dropped too early - let result = all_input_consumed($parser)($left); - assert_matches!(result, Err($right)) - }}; - } - - macro_rules! assert_parse_error { - ($parser:expr, $left:expr, $right:pat $(,) ?) => { - assert_fails!($parser, $left, LocatedParseError { source: $right, .. }) - }; - } - - macro_rules! assert_expected_token { - ($parser:expr, $left:expr, $right:expr $(,) ?) => { - let _token = String::from($right); - assert_parse_error!($parser, $left, ParseError::ExpectedToken(_token),); - }; - } - - #[test] - fn base_directive() { - let base = "http://example.org/foo"; - let input = format!("@base <{base}> ."); - let parser = RuleParser::new(); - let b = Identifier(base.to_string()); - assert!(parser.base().is_none()); - assert_parse!(parser.parse_base(), input.as_str(), b); - assert_eq!(parser.base(), Some(base)); - } - - #[test] - fn prefix_directive() { - let prefix = unsafe { Span::new_from_raw_offset(8, 1, "foo", ()) }; - let iri = "http://example.org/foo"; - let input = format!("@prefix {prefix}: <{iri}> ."); - let parser = RuleParser::new(); - assert!(resolve_prefix(&parser.prefixes.borrow(), &prefix).is_err()); - assert_parse!(parser.parse_prefix(), input.as_str(), prefix); - assert_eq!( - resolve_prefix(&parser.prefixes.borrow(), &prefix).map_err(|_| ()), - Ok(iri) - ); - } - - #[test] - #[cfg_attr(miri, ignore)] - fn source() { - /// Helper function to create source-like imports - fn csv_import(predicate: Identifier, filename: &str, arity: i64) -> ImportDirective { - let attributes = MapDataValue::from_iter([ - ( - AnyDataValue::new_iri(PARAMETER_NAME_RESOURCE.to_string()), - AnyDataValue::new_plain_string(filename.to_string()), - ), - ( - AnyDataValue::new_iri(PARAMETER_NAME_FORMAT.to_string()), - TupleDataValue::from_iter( - vec![ - VALUE_FORMAT_ANY; - usize::try_from(arity).expect("required for these tests") - ] - .iter() - .map(|format| AnyDataValue::new_plain_string((*format).to_string())) - .collect::>(), - ) - .into(), - ), - ]); - ImportDirective::from(ImportExportDirective { - predicate, - format: FileFormat::CSV, - attributes, - }) - } - - let parser = RuleParser::new(); - let file = "drinks.csv"; - let predicate_name = "drink"; - let predicate = Identifier(predicate_name.to_string()); - let default_import = csv_import(predicate.clone(), file, 1); - - // rulewerk accepts all of these variants - let input = format!(r#"@source {predicate_name}[1]: load-csv("{file}") ."#); - assert_parse!(parser.parse_source(), &input, default_import); - let input = format!(r#"@source {predicate_name}[1] : load-csv("{file}") ."#); - assert_parse!(parser.parse_source(), &input, default_import); - let input = format!(r#"@source {predicate_name}[1] : load-csv ( "{file}" ) ."#); - assert_parse!(parser.parse_source(), &input, default_import); - let input = format!(r#"@source {predicate_name} [1] : load-csv ( "{file}" ) ."#); - assert_parse!(parser.parse_source(), &input, default_import); - } - - #[test] - fn fact() { - let parser = RuleParser::new(); - let predicate = "p"; - let value = "foo"; - let datatype = "bar"; - let p = Identifier(predicate.to_string()); - let v = value.to_string(); - let t = datatype.to_string(); - let fact = format!(r#"{predicate}("{value}"^^<{datatype}>) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_from_typed_literal(v, t).expect("unknown types should work"), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_namespaced() { - let parser = RuleParser::new(); - let predicate = "p"; - let name = "foo"; - let prefix = unsafe { Span::new_from_raw_offset(8, 1, "eg", ()) }; - let iri = "http://example.org/foo"; - let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); - let p = Identifier(predicate.to_string()); - let pn = format!("{prefix}:{name}"); - let v = format!("{iri}{name}"); - let fact = format!(r#"{predicate}({pn}) ."#); - - assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_iri(v), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_bnode() { - let parser = RuleParser::new(); - let predicate = "p"; - let name = "foo"; - let p = Identifier(predicate.to_string()); - let pn = format!("_:{name}"); - let fact = format!(r#"{predicate}({pn}) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_iri(pn), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_numbers() { - let parser = RuleParser::new(); - let predicate = "p"; - let p = Identifier(predicate.to_string()); - let int = 23_i64; - let dbl = 42.0; - let dec = 13.37; - let fact = format!(r#"{predicate}({int}, {dbl:.1}E0, {dec:.2}) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![ - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(int), - )), - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_double_from_f64(dbl).expect("is not NaN"), - )), - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_double_from_f64(dec).expect("is not NaN"), - )), - ], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_rdf_literal_xsd_string() { - let parser = RuleParser::new(); - - let prefix = unsafe { Span::new_from_raw_offset(8, 1, "xsd", ()) }; - let iri = "http://www.w3.org/2001/XMLSchema#"; - let prefix_declaration = format!("@prefix {prefix}: <{iri}> ."); - - assert_parse!(parser.parse_prefix(), &prefix_declaration, prefix); - - let predicate = "p"; - let value = "my nice string"; - let datatype = "xsd:string"; - - let p = Identifier(predicate.to_string()); - let v = value.to_string(); - let fact = format!(r#"{predicate}("{value}"^^{datatype}) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_plain_string(v), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_string_literal() { - let parser = RuleParser::new(); - let predicate = "p"; - let value = "my nice string"; - let p = Identifier(predicate.to_string()); - let v = value.to_string(); - let fact = format!(r#"{predicate}("{value}") ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_plain_string(v), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_language_string() { - let parser = RuleParser::new(); - let predicate = "p"; - let v = "Qapla"; - let langtag = "tlh"; - let p = Identifier(predicate.to_string()); - let value = v.to_string(); - let fact = format!(r#"{predicate}("{v}"@{langtag}) ."#); - let tag = langtag.to_string(); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_language_tagged_string(value, tag), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact); - } - - #[test] - fn fact_abstract() { - let parser = RuleParser::new(); - let predicate = "p"; - let name = "a"; - let p = Identifier(predicate.to_string()); - let fact = format!(r#"{predicate}({name}) ."#); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_iri(name.to_string()), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - fn fact_comment() { - let parser = RuleParser::new(); - let predicate = "p"; - let value = "foo"; - let datatype = "bar"; - let p = Identifier(predicate.to_string()); - let v = value.to_string(); - let t = datatype.to_string(); - let fact = format!( - r#"{predicate}(% comment 1 - "{value}"^^<{datatype}> % comment 2 - ) % comment 3 - . % comment 4 - %"# - ); - - let expected_fact = Fact(Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_from_typed_literal(v, t) - .expect("unknown datatype should always work"), - ))], - )); - - assert_parse!(parser.parse_fact(), &fact, expected_fact,); - } - - #[test] - #[cfg_attr(miri, ignore)] - fn filter() { - let parser = RuleParser::new(); - let aa = "A"; - let a = Identifier(aa.to_string()); - let bb = "B"; - let b = Identifier(bb.to_string()); - let pp = "P"; - let p = Identifier(pp.to_string()); - let xx = "X"; - let x = xx.to_string(); - let yy = "Y"; - let y = yy.to_string(); - let zz = "Z"; - let z = zz.to_string(); - - let rule = format!( - "{pp}(?{xx}) :- {aa}(?{xx}, ?{yy}), ?{yy} > ?{xx}, {bb}(?{zz}), ?{xx} = 3, ?{zz} < 7, ?{xx} <= ?{zz}, ?{zz} >= ?{yy} ." - ); - - let expected_rule = Rule::new( - vec![Atom::new( - p, - vec![Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(x.clone()), - ))], - )], - vec![ - Literal::Positive(Atom::new( - a, - vec![ - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( - x.clone(), - ))), - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( - y.clone(), - ))), - ], - )), - Literal::Positive(Atom::new( - b, - vec![Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(z.clone()), - ))], - )), - ], - vec![ - Constraint::GreaterThan( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y.clone()))), - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), - ), - Constraint::Equals( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x.clone()))), - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(3), - )), - ), - Constraint::LessThan( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), - Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(7), - )), - ), - Constraint::LessThanEq( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(x))), - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z.clone()))), - ), - Constraint::GreaterThanEq( - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(z))), - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal(y))), - ), - ], - ); - - assert_parse!(parser.parse_rule(), &rule, expected_rule,); - } - - #[test] - #[allow(clippy::redundant_clone)] - fn parse_output() { - let parser = RuleParser::new(); - - let j2 = Identifier("J2".to_string()); - - assert_parse!(parser.parse_output_directive(), "@output J2 .", j2.clone()); - assert_parse_error!( - parser.parse_output_directive(), - "@output J2[3] .", - ParseError::ExpectedOutputDeclaration - ); - } - - #[test] - fn parse_errors() { - let parser = RuleParser::new(); - - assert_expected_token!(parser.parse_dot(), "", "."); - assert_expected_token!(parser.parse_dot(), ":-", "."); - assert_expected_token!(parser.parse_comma(), "", ","); - assert_expected_token!(parser.parse_comma(), ":-", ","); - assert_expected_token!(parser.parse_not(), "", "~"); - assert_expected_token!(parser.parse_not(), ":-", "~"); - assert_expected_token!(parser.parse_arrow(), "", ":-"); - assert_expected_token!(parser.parse_arrow(), "-:", ":-"); - assert_expected_token!(parser.parse_open_parenthesis(), "", "("); - assert_expected_token!(parser.parse_open_parenthesis(), "-:", "("); - assert_expected_token!(parser.parse_close_parenthesis(), "", ")"); - assert_expected_token!(parser.parse_close_parenthesis(), "-:", ")"); - - assert_parse_error!( - parser.parse_base(), - "@base . @base .", - ParseError::LateBaseDeclaration - ); - - assert_parse_error!( - parser.parse_program(), - "@prefix f: . @base .", - ParseError::LateBaseDeclaration - ); - - assert_parse_error!( - parser.parse_program(), - "@output p . @base .", - ParseError::LateBaseDeclaration - ); - - assert_parse_error!( - parser.parse_program(), - "@output p . @prefix g: .", - ParseError::LatePrefixDeclaration - ); - } - #[test] - #[cfg_attr(miri, ignore)] - fn parse_function_terms() { - let parser = RuleParser::new(); - - let twenty_three = Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(23), - )); - let fourty_two = Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(42), - )); - let twenty_three_times_fourty_two = Term::Binary { - operation: BinaryOperation::NumericMultiplication, - lhs: Box::new(twenty_three.clone()), - rhs: Box::new(fourty_two.clone()), - }; - - assert_parse_error!( - parser.parse_function_term(), - "", - ParseError::ExpectedFunctionTerm - ); - - let nullary_function = - Term::Function(Identifier(String::from("nullary_function")), vec![]); - assert_parse!( - parser.parse_function_term(), - "nullary_function()", - nullary_function - ); - assert_parse!( - parser.parse_function_term(), - "nullary_function( )", - nullary_function - ); - assert_parse_error!( - parser.parse_function_term(), - "nullary_function( () )", - ParseError::ExpectedFunctionTerm - ); - - let unary_function = Term::Function( - Identifier(String::from("unary_function")), - vec![fourty_two.clone()], - ); - assert_parse!( - parser.parse_function_term(), - "unary_function(42)", - unary_function - ); - assert_parse!( - parser.parse_function_term(), - "unary_function((42))", - unary_function - ); - assert_parse!( - parser.parse_function_term(), - "unary_function(( (42 )))", - unary_function - ); - - let binary_function = Term::Function( - Identifier(String::from("binary_function")), - vec![fourty_two.clone(), twenty_three.clone()], - ); - assert_parse!( - parser.parse_function_term(), - "binary_function(42, 23)", - binary_function - ); - - let function_with_nested_algebraic_expression = Term::Function( - Identifier(String::from("function")), - vec![twenty_three_times_fourty_two], - ); - assert_parse!( - parser.parse_function_term(), - "function( 23 *42)", - function_with_nested_algebraic_expression - ); - - let nested_function = Term::Function( - Identifier(String::from("nested_function")), - vec![nullary_function.clone()], - ); - - assert_parse!( - parser.parse_function_term(), - "nested_function(nullary_function())", - nested_function - ); - - let triple_nested_function = Term::Function( - Identifier(String::from("nested_function")), - vec![Term::Function( - Identifier(String::from("nested_function")), - vec![Term::Function( - Identifier(String::from("nested_function")), - vec![nullary_function.clone()], - )], - )], - ); - assert_parse!( - parser.parse_function_term(), - "nested_function( nested_function( (nested_function(nullary_function()) ) ))", - triple_nested_function - ); - } - - #[test] - fn parse_terms() { - let parser = RuleParser::new(); - - assert_parse_error!(parser.parse_term(), "", ParseError::ExpectedTerm); - - assert_parse!( - parser.parse_term(), - "constant", - Term::Primitive(PrimitiveTerm::GroundTerm(AnyDataValue::new_iri( - String::from("constant") - ))) - ); - } - - #[test] - fn parse_aggregates() { - let parser = RuleParser::new(); - - assert_parse_error!(parser.parse_aggregate(), "", ParseError::ExpectedAggregate); - - assert_parse!( - parser.parse_aggregate(), - "#min(?VARIABLE)", - Term::Aggregation(Aggregate { - logical_aggregate_operation: LogicalAggregateOperation::MinNumber, - terms: vec![Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("VARIABLE")) - ))] - }) - ); - - assert_parse_error!( - parser.parse_aggregate(), - "#test(?VAR1, ?VAR2)", - ParseError::ExpectedAggregate - ) - } - - #[test] - fn parse_unary_function() { - let parser = RuleParser::new(); - - let expression = "ABS(4)"; - let expected_term = Term::Unary( - UnaryOperation::NumericAbsolute, - Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(4), - ))), - ); - - assert_parse!(parser.parse_arithmetic_factor(), expression, expected_term); - } - - #[test] - fn parse_arithmetic_and_functions() { - let parser = RuleParser::new(); - - let expression = "5 * ABS(SQRT(4) - 3)"; - - let expected_term = Term::Binary { - operation: BinaryOperation::NumericMultiplication, - lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(5), - ))), - rhs: Box::new(Term::Unary( - UnaryOperation::NumericAbsolute, - Box::new(Term::Binary { - operation: BinaryOperation::NumericSubtraction, - lhs: Box::new(Term::Unary( - UnaryOperation::NumericSquareroot, - Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(4), - ))), - )), - rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(3), - ))), - }), - )), - }; - - assert_parse!(parser.parse_term(), expression, expected_term); - } - - #[test] - fn parse_assignment() { - let parser = RuleParser::new(); - - let expression = "?X = ABS(?Y - 5) * (7 + ?Z)"; - - let variable = Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( - "X".to_string(), - ))); - - let term = Term::Binary { - operation: BinaryOperation::NumericMultiplication, - lhs: Box::new(Term::Unary( - UnaryOperation::NumericAbsolute, - Box::new(Term::Binary { - operation: BinaryOperation::NumericSubtraction, - lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal("Y".to_string()), - ))), - rhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(5), - ))), - }), - )), - rhs: Box::new(Term::Binary { - operation: BinaryOperation::NumericAddition, - lhs: Box::new(Term::Primitive(PrimitiveTerm::GroundTerm( - AnyDataValue::new_integer_from_i64(7), - ))), - rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal("Z".to_string()), - ))), - }), - }; - - let expected = Constraint::Equals(variable, term); - - assert_parse!(parser.parse_constraint(), expression, expected); - } - - #[test] - fn parse_complex_condition() { - let parser = RuleParser::new(); - - let expression = "ABS(?X - ?Y) <= ?Z + SQRT(?Y)"; - - let left_term = Term::Unary( - UnaryOperation::NumericAbsolute, - Box::new(Term::Binary { - operation: BinaryOperation::NumericSubtraction, - lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("X")), - ))), - rhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("Y")), - ))), - }), - ); - - let right_term = Term::Binary { - operation: BinaryOperation::NumericAddition, - lhs: Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("Z")), - ))), - rhs: Box::new(Term::Unary( - UnaryOperation::NumericSquareroot, - Box::new(Term::Primitive(PrimitiveTerm::Variable( - Variable::Universal(String::from("Y")), - ))), - )), - }; - - let expected = Constraint::LessThanEq(left_term, right_term); - - assert_parse!(parser.parse_constraint(), expression, expected); - } - - #[test] - fn map_literal() { - let parser = RuleParser::new(); - assert_parse!( - parser.parse_map_literal(), - r#"{}"#, - MapDataValue::from_iter([]), - ); - - let ident = "foo"; - let key = AnyDataValue::new_iri(ident.to_string()); - - let entry = format!("{ident}=23"); - assert_parse!( - parser.parse_map_entry(), - &entry, - (key.clone(), AnyDataValue::new_integer_from_i64(23)) - ); - - let pairs = vec![ - ( - AnyDataValue::new_plain_string("23".to_string()), - AnyDataValue::new_integer_from_i64(42), - ), - ( - AnyDataValue::new_iri("foo".to_string()), - AnyDataValue::new_integer_from_i64(23), - ), - ]; - - assert_parse!( - parser.parse_map_literal(), - r#"{foo = 23, "23" = 42}"#, - pairs.clone().into_iter().collect::() - ); - } - - #[test] - fn nested_map_literal() { - let parser = RuleParser::new(); - - let pairs = vec![( - AnyDataValue::new_iri("inner".to_string()), - MapDataValue::from_iter([]).into(), - )]; - - assert_parse!( - parser.parse_map_literal(), - r#"{inner = {}}"#, - pairs.clone().into_iter().collect::() - ); - } - - #[test] - fn tuple_literal() { - let parser = RuleParser::new(); - - let expected: TupleDataValue = [ - AnyDataValue::new_iri("something".to_string()), - AnyDataValue::new_integer_from_i64(42), - TupleDataValue::from_iter([]).into(), - ] - .into_iter() - .collect(); - - assert_parse!( - parser.parse_tuple_literal(), - r#"(something, 42, ())"#, - expected - ); - } - - #[test] - fn import_export() { - let parser = RuleParser::new(); - - let name = "p".to_string(); - let predicate = Identifier(name.clone()); - let qualified = format!("{name} "); - let arguments = r#"{delimiter = ";", resource = }"#; - let spec = format!("{qualified} :- dsv{arguments}"); - let directive = format!("@import {spec} ."); - let directive_export = format!("@export {spec} ."); - let attributes = parser.parse_map_literal()(arguments.into()).unwrap().1; - - assert_parse!( - parser.parse_import_export_spec(), - &spec, - ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::DSV, - attributes: attributes.clone(), - } - ); - - assert_parse!( - parser.parse_import(), - &directive, - ImportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::DSV, - attributes: attributes.clone() - }) - ); - - assert_parse!( - parser.parse_export(), - &directive_export, - ExportDirective::from(ImportExportDirective { - predicate: predicate.clone(), - format: FileFormat::DSV, - attributes: attributes.clone() - }) - ); - } - } -} - -/// NEW PARSER -use std::cell::RefCell; - -use nom::character::complete::multispace0; -use nom::combinator::{opt, recognize}; -use nom::error::ParseError; -use nom::sequence::{delimited, pair}; -use nom::Parser; -use nom::{ - branch::alt, - combinator::verify, - multi::{many0, many1}, - sequence::tuple, - IResult, -}; -use nom_supreme::{context::ContextError, error::StackContext}; - -use super::lexer::{ - arrow, at, caret, close_brace, close_paren, colon, comma, dot, equal, exclamation_mark, exp, - greater, greater_equal, hash, less, less_equal, lex_comment, lex_doc_comment, lex_iri, - lex_number, lex_prefixed_ident, lex_string, lex_tag, lex_toplevel_doc_comment, lex_whitespace, - minus, open_brace, open_paren, plus, question_mark, skip_to_statement_end, slash, star, tilde, - underscore, unequal, Context, Error, ErrorTree, ParserState, Span, -}; - -fn outer_span<'a>(input: Span<'a>, rest_input: Span<'a>) -> Span<'a> { - unsafe { - // dbg!(&input, &span, &rest_input); - Span::new_from_raw_offset( - input.location_offset(), - input.location_line(), - &input[..(rest_input.location_offset() - input.location_offset())], - (), - ) - } -} - -// fn expect_abc<'a, 's, O: Copy, E: ParseError>, F: Parser, O, E>>( -// mut parser: F, -// error_msg: impl ToString, -// error_output: O, -// errors: ParserState<'s>, -// ) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> { -// move |input| match parser.parse(input) { -// Ok(resureport_errorlt) => Ok(result), -// Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { -// let err = Error { -// pos: Position { -// offset: input.input.location_offset(), -// line: input.input.location_line(), -// column: input.input.get_utf8_column() as u32, -// }, -// msg: error_msg.to_string(), -// context: vec![], -// }; -// errors.report_error(err); -// Ok((input, error_output)) -// } -// Err(err) => Err(err), -// } -// } - -fn recover<'a, 's, E>( - mut parser: impl Parser, Statement<'a>, E>, - error_msg: impl ToString, - context: Context, - _errors: ParserState<'s>, -) -> impl FnMut(Input<'a, 's>) -> IResult, Statement<'a>, E> { - move |input: Input<'a, 's>| match parser.parse(input) { - Ok(result) => Ok(result), - Err(err) if input.input.is_empty() => Err(err), - Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { - let _err = Error { - pos: Position { - offset: input.input.location_offset(), - line: input.input.location_line(), - column: input.input.get_utf8_column() as u32, - }, - msg: error_msg.to_string(), - context: vec![context], - }; - // errors.report_error(err); - let (rest_input, span) = skip_to_statement_end::>>(input); - Ok((rest_input, Statement::Error(span))) - } - Err(err) => Err(err), - } -} - -fn report_error<'a, 's, O>( - mut parser: impl Parser, O, ErrorTree>>, -) -> impl FnMut(Input<'a, 's>) -> IResult, O, ErrorTree>> { - move |input| match parser.parse(input) { - Ok(result) => Ok(result), - Err(e) => { - if input.input.is_empty() { - return Err(e); - }; - match &e { - nom::Err::Incomplete(_) => (), - nom::Err::Error(err) | nom::Err::Failure(err) => { - let (_deepest_pos, errors) = get_deepest_errors(err); - for error in errors { - input.parser_state.report_error(error); - } - // let error = Error(deepest_pos, format!("")); - // // input.parser_state.report_error(error) - } - }; - Err(e) - } - } -} - -fn get_deepest_errors<'a, 's>(e: &'a ErrorTree>) -> (Position, Vec) { - match e { - ErrorTree::Base { location, .. } => { - let span = location.input; - let err_pos = Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - }; - ( - err_pos, - vec![Error { - pos: err_pos, - msg: "".to_string(), - context: Vec::new(), - }], - ) - } - ErrorTree::Stack { base, contexts } => { - // let mut err_pos = Position::default(); - match &**base { - ErrorTree::Base { location, .. } => { - let span = location.input; - let err_pos = Position { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - }; - let mut msg = String::from(""); - for (_, context) in contexts { - match context { - StackContext::Kind(_) => todo!(), - StackContext::Context(c) => match c { - Context::Tag(t) => { - msg.push_str(t); - } - _ => (), - }, - } - } - ( - err_pos, - vec![Error { - pos: err_pos, - msg, - context: context_strs(contexts), - }], - ) - } - ErrorTree::Stack { base, contexts } => { - let (pos, mut deepest_errors) = get_deepest_errors(base); - let contexts = context_strs(contexts); - for error in &mut deepest_errors { - error.context.append(&mut contexts.clone()); - } - (pos, deepest_errors) - } - ErrorTree::Alt(_error_tree) => { - let (pos, mut deepest_errors) = get_deepest_errors(base); - let contexts = context_strs(contexts); - for error in &mut deepest_errors { - error.context.append(&mut contexts.clone()); - } - (pos, deepest_errors) - } - } - } - ErrorTree::Alt(vec) => { - let mut return_vec: Vec = Vec::new(); - let mut deepest_pos = Position::default(); - for error in vec { - let (pos, mut deepest_errors) = get_deepest_errors(error); - if pos > deepest_pos { - deepest_pos = pos; - return_vec.clear(); - return_vec.append(&mut deepest_errors); - } else if pos == deepest_pos { - return_vec.append(&mut deepest_errors); - } - } - (deepest_pos, return_vec) - } - } -} - -fn context_strs(contexts: &Vec<(Input<'_, '_>, StackContext)>) -> Vec { - contexts - .iter() - .map(|(_, c)| match c { - StackContext::Kind(_) => todo!(), - StackContext::Context(c) => *c, - }) - .collect() -} - -pub(crate) fn context<'a, 's, P, E, F, O>( - context: P, - mut f: F, -) -> impl FnMut(Input<'a, 's>) -> IResult, O, E> -where - P: Clone, - F: Parser, O, E>, - E: ContextError, P>, -{ - move |i| match f.parse(i.clone()) { - Ok(o) => Ok(o), - Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), - Err(nom::Err::Error(e)) => Err(nom::Err::Error(E::add_context(i, context.clone(), e))), - Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(E::add_context(i, context.clone(), e))), - } -} - -fn wsoc0<'a, 's, E>(input: Input<'a, 's>) -> IResult, Option>, E> -where - E: ParseError> + ContextError, Context>, -{ - many0(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { - if vec.is_empty() { - (rest_input, None) - } else { - ( - rest_input, - Some(Wsoc { - span: outer_span(input.input, rest_input.input), - token: vec, - }), - ) - } - }) -} - -fn wsoc1<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Wsoc<'a>, E> { - many1(alt((lex_whitespace, lex_comment)))(input).map(|(rest_input, vec)| { - ( - rest_input, - Wsoc { - span: outer_span(input.input, rest_input.input), - token: vec, - }, - ) - }) -} - -/// Parse a full program consisting of directives, facts, rules and comments. -pub fn parse_program< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> (Program<'a>, Vec) { - let result = context( - Context::Program, - pair( - opt(lex_toplevel_doc_comment::>>), - delimited( - multispace0, - many0(recover( - report_error(delimited( - multispace0, - alt(( - // TODO: Discuss wether directives should only get parsed at the beginning of the source file - parse_rule, - parse_fact, - parse_directive, - parse_comment, - )), - multispace0, - )), - "failed to parse statement", - Context::Program, - input.parser_state, - )), - multispace0, - ), - ), - )(input); - match result { - Ok((rest_input, (tl_doc_comment, statements))) => { - if !rest_input.input.is_empty() { - panic!("Parser did not consume all input. This is considered a bug. Please report it. Unparsed input is: {:?}", rest_input); - }; - ( - Program { - span: input.input, - tl_doc_comment, - statements, - }, - rest_input.parser_state.errors.take(), - ) - } - Err(e) => panic!( - "Parser can't fail. If it fails it's a bug! Please report it. Got: {:?}", - e - ), - } -} - -/// This function takes a `&str` of source code (for example by loading a file) and -/// produces an AST and potentially a Vector with Errors -pub fn parse_program_str(input: &str) -> (Program<'_>, Vec) { - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input: Span::new(input), - parser_state, - }; - parse_program::>>(input) -} - -/// Parse a fact directly -pub fn parse_fact_str(_input: &str) -> (Fact<'_>, Vec) { - todo!("parse fact directly from string input") -} - -/// Parse normal comments that start with a `%` and ends at the line ending. -fn parse_comment<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Statement<'a>, E> { - lex_comment(input).map(|(rest_input, comment)| (rest_input, Statement::Comment(comment))) -} - -/// Parse a fact of the form `predicateName(term1, term2, …).` -fn parse_fact<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Statement<'a>, E> { - // dbg!(&input.parser_state.labels); - context( - Context::Fact, - tuple((opt(lex_doc_comment), parse_fact_atom, wsoc0, dot)), - )(input) - .map(|(rest_input, (doc_comment, atom, _ws, dot))| { - ( - rest_input, - Statement::Fact { - span: outer_span(input.input, rest_input.input), - doc_comment, - fact: atom, - dot, - }, - ) - }) -} - -fn parse_fact_atom<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Fact<'a>, E> { - // TODO: Add Context - match parse_named_tuple::(input) { - Ok((rest_input, named_tuple)) => Ok((rest_input, Fact::NamedTuple(named_tuple))), - Err(_) => match parse_map::(input) { - Ok((rest_input, map)) => Ok((rest_input, Fact::Map(map))), - Err(err) => Err(err), - }, - } -} - -/// Parse a rule of the form `headPredicate1(term1, term2, …), headPredicate2(term1, term2, …) :- bodyPredicate(term1, …), term1 >= (term2 + term3) * function(term1, …) .` -fn parse_rule<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Statement<'a>, E> { - context( - Context::Rule, - tuple(( - opt(lex_doc_comment), - parse_head, - wsoc0, - arrow, - wsoc0, - parse_body, - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, head, _ws1, arrow, _ws2, body, _ws3, dot))| { - ( - rest_input, - Statement::Rule { - span: outer_span(input.input, rest_input.input), - doc_comment, - head, - arrow, - body, - dot, - }, - ) - }, - ) -} - -/// Parse the head atoms of a rule. -fn parse_head<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, List<'a, Atom<'a>>, E> { - context(Context::RuleHead, parse_list(parse_atoms))(input) -} - -/// Parse the body atoms of a rule. -fn parse_body<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, List<'a, Atom<'a>>, E> { - context(Context::RuleBody, parse_list(parse_atoms))(input) -} - -/// Parse the directives (@base, @prefix, @import, @export, @output). -fn parse_directive<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Statement<'a>, E> { - context( - Context::Directive, - alt(( - parse_base_directive, - parse_prefix_directive, - parse_import_directive, - parse_export_directive, - parse_output_directive, - )), - )(input) - .map(|(rest, directive)| (rest, Statement::Directive(directive))) -} - -/// Parse the base directive. -fn parse_base_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Directive<'a>, E> { - context( - Context::DirectiveBase, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_tag, |token| *token.fragment() == "base"), - )), - wsoc0, - lex_iri, - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, _kw, _ws1, base_iri, _ws2, dot))| { - ( - rest_input, - Directive::Base { - span: outer_span(input.input, rest_input.input), - doc_comment, - base_iri, - dot, - }, - ) - }, - ) -} - -/// Parse the prefix directive. -fn parse_prefix_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Directive<'a>, E> { - context( - Context::DirectivePrefix, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_tag, |token| *token.fragment() == "prefix"), - )), - wsoc0, - recognize(pair(opt(lex_tag), colon)), - wsoc0, - lex_iri, - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, _kw, _ws1, prefix, _ws2, prefix_iri, _ws3, dot))| { - ( - rest_input, - Directive::Prefix { - span: outer_span(input.input, rest_input.input), - doc_comment, - prefix: prefix.input, - prefix_iri, - dot, - }, - ) - }, - ) -} - -/// Parse the import directive. -fn parse_import_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Directive<'a>, E> { - context( - Context::DirectiveImport, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_tag, |token| *token.fragment() == "import"), - )), - wsoc1, - lex_tag, - wsoc0, - arrow, - wsoc0, - parse_map, - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot))| { - ( - rest_input, - Directive::Import { - span: outer_span(input.input, rest_input.input), - doc_comment, - predicate, - arrow, - map, - dot, - }, - ) - }, - ) -} - -/// Parse the export directive. -fn parse_export_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Directive<'a>, E> { - context( - Context::DirectiveExport, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_tag, |token| *token.fragment() == "export"), - )), - wsoc1, - lex_tag, - wsoc0, - arrow, - wsoc0, - parse_map, - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, _kw, _ws1, predicate, _ws2, arrow, _ws3, map, _ws4, dot))| { - ( - rest_input, - Directive::Export { - span: outer_span(input.input, rest_input.input), - doc_comment, - predicate, - arrow, - map, - dot, - }, - ) - }, - ) -} - -/// Parse the output directive. -fn parse_output_directive< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Directive<'a>, E> { - context( - Context::DirectiveOutput, - tuple(( - opt(lex_doc_comment), - recognize(pair( - at, - verify(lex_tag, |token| *token.fragment() == "output"), - )), - wsoc1, - opt(parse_list(lex_tag)), - wsoc0, - dot, - )), - )(input) - .map( - |(rest_input, (doc_comment, _kw, _ws1, predicates, _ws2, dot))| { - ( - rest_input, - Directive::Output { - span: outer_span(input.input, rest_input.input), - doc_comment, - predicates, - dot, - }, - ) - }, - ) -} - -// /// Parse a list of `ident1, ident2, …` -// fn parse_identifier_list<'a, 's, E: ParseError> + ContextError, Context>>( -// input: Input<'a, 's>, -// ) -> IResult, List<'a, Token<'a>>, E> { -// pair( -// lex_ident, -// many0(tuple(( -// opt(lex_whitespace), -// comma, -// opt(lex_whitespace), -// lex_ident, -// ))), -// )(input) -// .map(|(rest_input, (first, rest))| { -// ( -// rest_input, -// List { -// span: outer_span(input.input, rest_input.input), -// first, -// rest: if rest.is_empty() { None } else { Some(rest) }, -// }, -// ) -// }) -// } - -fn parse_list<'a, 's, T, E: ParseError> + ContextError, Context>>( - parse_t: fn(Input<'a, 's>) -> IResult, T, E>, -) -> impl Fn(Input<'a, 's>) -> IResult, List<'a, T>, E> { - move |input: Input<'a, 's>| { - context( - Context::List, - tuple(( - parse_t, - many0(tuple((wsoc0, comma, wsoc0, parse_t))), - pair(wsoc0, opt(comma)), - )), - )(input) - .map(|(rest_input, (first, rest, (_, trailing_comma)))| { - ( - rest_input, - List { - span: outer_span(input.input, rest_input.input), - first, - rest: if rest.is_empty() { - None - } else { - Some( - rest.into_iter() - .map(|(_ws1, comma, _ws2, t)| (comma, t)) - .collect(), - ) - }, - trailing_comma, - }, - ) - }) - } -} - -/// Parse the different atom variants. -fn parse_atoms<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Atom<'a>, E> { - context( - Context::BodyAtoms, - alt(( - parse_normal_atom, - parse_negative_atom, - parse_infix_atom, - parse_map_atom, - )), - )(input) -} - -/// Parse an atom of the form `predicateName(term1, term2, …)`. -fn parse_normal_atom< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Atom<'a>, E> { - context(Context::PositiveAtom, parse_named_tuple)(input) - .map(|(rest_input, named_tuple)| (rest_input, Atom::Positive(named_tuple))) -} - -/// Parse an atom of the form `~predicateName(term1, term2, …)`. -fn parse_negative_atom< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Atom<'a>, E> { - context(Context::NegativeAtom, pair(tilde, parse_named_tuple))(input).map( - |(rest_input, (tilde, named_tuple))| { - ( - rest_input, - Atom::Negative { - span: outer_span(input.input, rest_input.input), - neg: tilde, - atom: named_tuple, - }, - ) - }, - ) -} - -/// Parse an "infix atom" of the form `term1 term2`. -/// The supported infix operations are `<`, `<=`, `=`, `>=`, `>` and `!=`. -fn parse_infix_atom<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Atom<'a>, E> { - context( - Context::InfixAtom, - tuple((parse_term, wsoc0, parse_operation_token, wsoc0, parse_term)), - )(input) - .map(|(rest_input, (lhs, _ws1, operation, _ws2, rhs))| { - ( - rest_input, - Atom::InfixAtom { - span: outer_span(input.input, rest_input.input), - lhs, - operation, - rhs, - }, - ) - }) -} - -/// Parse a tuple like `(int, int, skip)`. A 1-tuple is denoted `(,)` (with a trailing comma) to distinquish it from parenthesised expressions. -fn parse_tuple<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Tuple<'a>, E> { - context( - Context::Tuple, - tuple(( - open_paren, - wsoc0, - opt(parse_list(parse_term)), - wsoc0, - close_paren, - )), - )(input) - .map( - |(rest_input, (open_paren, _ws1, terms, _ws2, close_paren))| { - ( - rest_input, - Tuple { - span: outer_span(input.input, rest_input.input), - open_paren, - terms, - close_paren, - }, - ) - }, - ) -} - -/// Parse a named tuple. This function is like `parse_tuple` with the difference, -/// that is enforces the existence of an identifier for the tuple. -fn parse_named_tuple< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, NamedTuple<'a>, E> { - context( - Context::NamedTuple, - tuple((alt((lex_prefixed_ident, lex_tag)), wsoc0, parse_tuple)), - )(input) - .map(|(rest_input, (identifier, _ws, tuple))| { - ( - rest_input, - NamedTuple { - span: outer_span(input.input, rest_input.input), - identifier, - tuple, - }, - ) - }) -} - -/// Parse a map. Maps are denoted with `{…}` and can haven an optional name, e.g. `csv {…}`. -/// Inside the curly braces ist a list of pairs. -fn parse_map<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Map<'a>, E> { - context( - Context::Map, - tuple(( - opt(lex_tag), - wsoc0, - open_brace, - wsoc0, - opt(parse_list(parse_pair)), - wsoc0, - close_brace, - )), - )(input) - .map( - |(rest_input, (identifier, _ws1, open_brace, _ws2, pairs, _ws3, close_brace))| { - ( - rest_input, - Map { - span: outer_span(input.input, rest_input.input), - identifier, - open_brace, - pairs, - close_brace, - }, - ) - }, - ) -} - -/// Parse a map in an atom position. -fn parse_map_atom<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Atom<'a>, E> { - parse_map(input).map(|(rest_input, map)| (rest_input, Atom::Map(map))) -} - -/// Parse a pair of the form `key = value`. -fn parse_pair<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Pair<'a>, E> { - context( - Context::Pair, - tuple((parse_term, wsoc0, equal, wsoc0, parse_term)), - )(input) - .map(|(rest_input, (key, _ws1, equal, _ws2, value))| { - ( - rest_input, - Pair { - span: outer_span(input.input, rest_input.input), - key, - equal, - value, - }, - ) - }) -} - -/// Parse a term. A term can be a primitive value (constant, number, string, …), -/// a variable (universal or existential), a map, a function (-symbol), an arithmetic -/// operation, an aggregation or an tuple of terms, e.g. `(term1, term2, …)`. -fn parse_term<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context( - Context::Term, - alt(( - parse_binary_term, - parse_tuple_term, - // parse_unary_prefix_term, - parse_map_term, - parse_primitive_term, - parse_variable, - parse_existential, - parse_aggregation_term, - parse_blank, - )), - )(input) -} - -/// Parse a primitive term (simple constant, iri constant, number, string). -fn parse_primitive_term< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context( - Context::TermPrivimitive, - alt(( - parse_rdf_literal, - parse_prefixed_ident, - parse_ident, - parse_iri, - parse_number, - parse_string, - )), - )(input) - .map(|(rest_input, term)| (rest_input, Term::Primitive(term))) -} - -/// Parse a rdf literal e.g. "2023-06-19"^^ -fn parse_rdf_literal< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Primitive<'a>, E> { - context( - Context::RdfLiteral, - tuple((lex_string, recognize(pair(caret, caret)), lex_iri)), - )(input) - .map(|(rest_input, (string, carets, iri))| { - ( - rest_input, - Primitive::RdfLiteral { - span: outer_span(input.input, rest_input.input), - string, - carets: carets.input, - iri, - }, - ) - }) -} - -fn parse_prefixed_ident<'a, 's, E>(input: Input<'a, 's>) -> IResult, Primitive<'a>, E> -where - E: ParseError> + ContextError, Context>, -{ - context( - Context::PrefixedConstant, - tuple((opt(lex_tag), colon, lex_tag)), - )(input) - .map(|(rest_input, (prefix, colon, constant))| { - ( - rest_input, - Primitive::PrefixedConstant { - span: outer_span(input.input, rest_input.input), - prefix, - colon, - constant, - }, - ) - }) -} - -fn parse_ident<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Primitive<'a>, E> { - lex_tag(input).map(|(rest_input, ident)| (rest_input, Primitive::Constant(ident))) -} - -fn parse_iri<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Primitive<'a>, E> { - lex_iri(input).map(|(rest_input, iri)| (rest_input, Primitive::Iri(iri))) -} - -fn parse_number<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Primitive<'a>, E> { - context(Context::Number, alt((parse_decimal, parse_integer)))(input) -} - -fn parse_decimal<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Primitive<'a>, E> { - context( - Context::Decimal, - tuple(( - opt(alt((plus, minus))), - opt(lex_number), - dot, - lex_number, - opt(parse_exponent), - )), - )(input) - .map(|(rest_input, (sign, before, dot, after, exponent))| { - ( - rest_input, - Primitive::Number { - span: outer_span(input.input, rest_input.input), - sign, - before, - dot: Some(dot), - after, - exponent, - }, - ) - }) -} - -fn parse_integer<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Primitive<'a>, E> { - context(Context::Integer, pair(opt(alt((plus, minus))), lex_number))(input).map( - |(rest_input, (sign, number))| { - ( - rest_input, - Primitive::Number { - span: outer_span(input.input, rest_input.input), - sign, - before: None, - dot: None, - after: number, - exponent: None, - }, - ) - }, - ) -} - -fn parse_exponent<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Exponent<'a>, E> { - context( - Context::Exponent, - tuple((exp, opt(alt((plus, minus))), lex_number)), - )(input) - .map(|(rest_input, (e, sign, number))| (rest_input, Exponent { e, sign, number })) -} - -fn parse_string<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Primitive<'a>, E> { - lex_string(input).map(|(rest_input, string)| (rest_input, Primitive::String(string))) -} - -// /// Parse an unary term. -// fn parse_unary_prefix_term<'a, 's, E: ParseError> + ContextError, Context>>(input: Input<'a, 's>) -> IResult, Term<'a>, E> { -// pair(lex_unary_prefix_operators, parse_term)(input).map( -// |(rest_input, (operation, term))| { -// ( -// rest_input, -// Term::UnaryPrefix { -// span: outer_span(input.input, rest_input.input), -// operation, -// term: Box::new(term), -// }, -// ) -// }, -// ) -// } - -/// Parse a binary infix operation of the form `term1 term2`. -fn parse_binary_term< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context( - Context::TermBinary, - pair( - parse_arithmetic_product, - opt(tuple((wsoc0, alt((plus, minus)), wsoc0, parse_binary_term))), - ), - )(input) - .map(|(rest_input, (lhs, opt))| { - ( - rest_input, - if let Some((_ws1, operation, _ws2, rhs)) = opt { - Term::Binary { - span: outer_span(input.input, rest_input.input), - lhs: Box::new(lhs), - operation, - rhs: Box::new(rhs), - } - } else { - lhs - }, - ) - }) -} - -/// Parse an arithmetic product, i.e. an expression involving -/// only `*` and `/` over subexpressions. -fn parse_arithmetic_product< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context( - Context::ArithmeticProduct, - pair( - parse_arithmetic_factor, - opt(tuple(( - wsoc0, - alt((star, slash)), - wsoc0, - parse_arithmetic_product, - ))), - ), - )(input) - .map(|(rest_input, (lhs, opt))| { - ( - rest_input, - if let Some((_ws1, operation, _ws2, rhs)) = opt { - Term::Binary { - span: outer_span(input.input, rest_input.input), - lhs: Box::new(lhs), - operation, - rhs: Box::new(rhs), - } - } else { - lhs - }, - ) - }) -} - -fn parse_arithmetic_factor< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context( - Context::ArithmeticFactor, - alt(( - parse_tuple_term, - parse_aggregation_term, - parse_primitive_term, - parse_variable, - parse_existential, - )), - )(input) -} - -// fn fold_arithmetic_expression<'a>( -// initial: Term<'a>, -// sequence: Vec<(Option>, Token<'a>, Option>, Term<'a>)>, -// span_vec: Vec>, -// ) -> Term<'a> { -// sequence -// .into_iter() -// .enumerate() -// .fold(initial, |acc, (i, pair)| { -// let (ws1, operation, ws2, expression) = pair; -// Term::Binary { -// span: span_vec[i], -// lhs: Box::new(acc), -// ws1, -// operation, -// ws2, -// rhs: Box::new(expression), -// } -// }) -// } - -/// Parse an aggregation term of the form `#sum(…)`. -fn parse_aggregation_term< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context( - Context::TermAggregation, - tuple(( - recognize(pair(hash, lex_tag)), - open_paren, - wsoc0, - parse_list(parse_term), - wsoc0, - close_paren, - )), - )(input) - .map( - |(rest_input, (operation, open_paren, _ws1, terms, _ws2, close_paren))| { - ( - rest_input, - Term::Aggregation { - span: outer_span(input.input, rest_input.input), - operation: operation.input, - open_paren, - terms: Box::new(terms), - close_paren, - }, - ) - }, - ) -} - -/// Parse a `_` -fn parse_blank<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context(Context::Blank, underscore)(input) - .map(|(rest_input, underscore)| (rest_input, Term::Blank(underscore))) -} - -/// Parse a tuple term, either with a name (function symbol) or as a term (-list) with -/// parenthesis. -fn parse_tuple_term<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context(Context::TermTuple, parse_tuple)(input) - .map(|(rest_input, named_tuple)| (rest_input, Term::Tuple(Box::new(named_tuple)))) -} - -/// Parse a map as a term. -fn parse_map_term<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context(Context::TermMap, parse_map)(input) - .map(|(rest_input, map)| (rest_input, Term::Map(Box::new(map)))) -} - -/// Parse a variable. -fn parse_variable<'a, 's, E: ParseError> + ContextError, Context>>( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context( - Context::UniversalVariable, - recognize(pair(question_mark, lex_tag)), - )(input) - .map(|(rest_input, var)| (rest_input, Term::UniversalVariable(var.input))) -} - -/// Parse an existential variable. -fn parse_existential< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Term<'a>, E> { - context( - Context::ExistentialVariable, - recognize(pair(exclamation_mark, lex_tag)), - )(input) - .map(|(rest_input, existential)| (rest_input, Term::ExistentialVariable(existential.input))) -} - -/// Parse the operator for an infix atom. -fn parse_operation_token< - 'a, - 's, - E: ParseError> + ContextError, Context>, ->( - input: Input<'a, 's>, -) -> IResult, Span<'a>, E> { - context( - Context::Operators, - // Order of parser compinator is important, because of ordered choice and no backtracking - alt((less_equal, greater_equal, equal, unequal, less, greater)), - )(input) -} - -#[cfg(test)] -mod tests { - use std::{ - cell::RefCell, - collections::{BTreeMap, HashSet}, - }; - - use super::*; - use crate::io::{ - lexer::*, - parser::ast::*, - // parser::ast::{ - // atom::*, directive::*, map::*, named_tuple::*, program::*, statement::*, term::*, - // }, - }; - - macro_rules! T { - ($tok_kind: expr, $offset: literal, $line: literal, $str: literal) => { - unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } - }; - } - macro_rules! s { - ($offset:literal,$line:literal,$str:literal) => { - unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } - }; - } - - // use nom::error::{convert_error, VerboseError}; - // fn convert_located_span_error<'a, 's>( - // input: Span<'a>, - // err: VerboseError>, - // ) -> String { - // convert_error( - // *(input.fragment()), - // VerboseError { - // errors: err - // .errors - // .into_iter() - // .map(|(span, tag)| (*(span.input.fragment()), tag)) - // .collect(), - // }, - // ) - // } - - #[test] - fn fact() { - // let input = Tokens { - // tok: &lex_tokens(Span::new("a(B,C).")).unwrap().1, - // }; - let input = Span::new("a(B,C)."); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // parse_program::>(input).unwrap().1, - parse_program::>(input).0, - Program { - span: input.input, - tl_doc_comment: None, - statements: vec![Statement::Fact { - span: s!(0, 1, "a(B,C)."), - doc_comment: None, - fact: Fact::NamedTuple(NamedTuple { - span: s!(0, 1, "a(B,C)"), - identifier: s!(0, 1, "a"), - tuple: Tuple { - span: s!(1, 1, "(B,C)"), - open_paren: s!(1, 1, "("), - terms: Some(List { - span: s!(2, 1, "B,C"), - first: Term::Primitive(Primitive::Constant(s!(2, 1, "B"),)), - rest: Some(vec![( - s!(3, 1, ","), - Term::Primitive(Primitive::Constant(s!(4, 1, "C"),)), - )]), - trailing_comma: None, - }), - close_paren: s!(5, 1, ")"), - } - }), - dot: s!(6, 1, ".") - }], - } - ); - } - - #[test] - fn syntax() { - let input = Span::new( - r#"@base .@prefix rdfs:.@import sourceA:-csv{resource="sources/dataA.csv"}.@export a:-csv{}.@output a, b, c."#, - ); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // parse_program::>(input).unwrap().1, - parse_program::>(input).0, - Program { - tl_doc_comment: None, - span: input.input, - statements: vec![ - Statement::Directive(Directive::Base { - span: s!(0, 1, "@base ."), - doc_comment: None, - base_iri: s!(6, 1, ""), - dot: s!(31, 1, "."), - }), - Statement::Directive(Directive::Prefix { - span: s!( - 32, - 1, - "@prefix rdfs:." - ), - doc_comment: None, - prefix: s!(40, 1, "rdfs:"), - prefix_iri: s!(45, 1, ""), - dot: s!(84, 1, ".") - }), - Statement::Directive(Directive::Import { - span: s!( - 85, - 1, - r#"@import sourceA:-csv{resource="sources/dataA.csv"}."# - ), - doc_comment: None, - predicate: s!(93, 1, "sourceA"), - arrow: s!(100, 1, ":-"), - map: Map { - span: s!(102, 1, r#"csv{resource="sources/dataA.csv"}"#), - identifier: Some(s!(102, 1, "csv")), - open_brace: s!(105, 1, "{"), - pairs: Some(List { - span: s!(106, 1, "resource=\"sources/dataA.csv\""), - first: Pair { - span: s!(106, 1, "resource=\"sources/dataA.csv\""), - key: Term::Primitive(Primitive::Constant(s!( - 106, 1, "resource" - ),)), - equal: s!(114, 1, "="), - value: Term::Primitive(Primitive::String(s!( - 115, - 1, - "\"sources/dataA.csv\"" - ),)), - }, - rest: None, - trailing_comma: None, - }), - close_brace: s!(134, 1, "}"), - }, - dot: s!(135, 1, ".") - }), - Statement::Directive(Directive::Export { - span: s!(136, 1, "@export a:-csv{}."), - doc_comment: None, - predicate: s!(144, 1, "a"), - arrow: s!(145, 1, ":-"), - map: Map { - span: s!(147, 1, "csv{}"), - identifier: Some(s!(147, 1, "csv"),), - open_brace: s!(150, 1, "{"), - - pairs: None, - close_brace: s!(151, 1, "}"), - }, - dot: s!(152, 1, "."), - }), - Statement::Directive(Directive::Output { - span: s!(153, 1, "@output a, b, c."), - doc_comment: None, - predicates: Some(List { - span: s!(161, 1, "a, b, c"), - first: s!(161, 1, "a"), - rest: Some(vec![ - (s!(162, 1, ","), s!(164, 1, "b"),), - (s!(165, 1, ","), s!(167, 1, "c"),), - ]), - trailing_comma: None, - }), - dot: s!(168, 1, "."), - }), - ], - } - ); - } - - // #[test] - // fn ignore_ws_and_comments() { - // let input = Span::new(" Hi %cool comment\n"); - // assert_eq!( - // super::ignore_ws_and_comments(lex_ident::>)(input), - // Ok(( - // s!(22, 2, ""), - // Token { - // kind: TokenKind::Ident, - // span: s!(3, 1, "Hi") - // } - // )) - // ) - // } - - #[test] - fn fact_with_ws() { - let input = Span::new("some(Fact, with, whitespace) . % and a super useful comment\n"); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - assert_eq!( - // parse_program::>(input).unwrap().1, - parse_program::>(input).0, - Program { - span: input.input, - tl_doc_comment: None, - statements: vec![ - Statement::Fact { - span: s!(0, 1, "some(Fact, with, whitespace) ."), - doc_comment: None, - fact: Fact::NamedTuple(NamedTuple { - span: s!(0, 1, "some(Fact, with, whitespace)"), - identifier: s!(0, 1, "some"), - tuple: Tuple { - span: s!(4, 1, "(Fact, with, whitespace)"), - open_paren: s!(4, 1, "("), - terms: Some(List { - span: s!(5, 1, "Fact, with, whitespace"), - first: Term::Primitive(Primitive::Constant(s!(5, 1, "Fact"),)), - rest: Some(vec![ - ( - s!(9, 1, ","), - Term::Primitive(Primitive::Constant(s!(11, 1, "with"))), - ), - ( - s!(15, 1, ","), - Term::Primitive(Primitive::Constant(s!( - 17, - 1, - "whitespace" - ))), - ), - ]), - trailing_comma: None, - }), - close_paren: s!(27, 1, ")"), - } - }), - dot: s!(29, 1, "."), - }, - Statement::Comment(s!(31, 1, "% and a super useful comment\n")) - ], - } - ); - } - - #[test] - fn display_program() { - let input = Span::new( - r#"% This example finds trees of (some species of lime/linden tree) in Dresden, -% which are more than 200 years old. -% -% It shows how to load (typed) data from (compressed) CSV files, how to -% perform a recursive reachability query, and how to use datatype built-in to -% find old trees. It can be modified to use a different species or genus of -% plant, and by changing the required age. - -@import tree :- csv{format=(string, string, string, int, int), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/dresden-trees-ages-heights.csv"} . % location URL, species, age, height in m -@import taxon :- csv{format=(string, string, string), resource="https://raw.githubusercontent.com/knowsys/nemo-examples/main/examples/lime-trees/wikidata-taxon-name-parent.csv.gz"} . % location URL, species, age, height in m - -limeSpecies(?X, "Tilia") :- taxon(?X, "Tilia", ?P). -limeSpecies(?X, ?Name) :- taxon(?X, ?Name, ?Y), limeSpecies(?Y, ?N). - -oldLime(?location,?species,?age) :- tree(?location,?species,?age,?heightInMeters), ?age > 200, limeSpecies(?id,?species) ."#, - ); - let refcell = RefCell::new(Vec::new()); - let errors = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state: errors, - }; - // let ast = parse_program::>(input); - let (ast, _) = parse_program::>(input); - println!("{}", ast); - // With the removal of whitespace in the AST this does not work anymore. - // assert_eq!( - // { - // let mut string_from_tokens = String::new(); - // for token in get_all_tokens(&ast) { - // string_from_tokens.push_str(token.span().fragment()); - // } - // println!("String from Tokens:\n"); - // println!("{}\n", string_from_tokens); - // string_from_tokens - // }, - // *input.input.fragment(), - // ); - } - - #[test] - fn parser_test() { - let file = "../testfile2.rls"; - let str = std::fs::read_to_string(file).expect("testfile not found"); - let input = Span::new(str.as_str()); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_program::>(input); - let (ast, errors) = parse_program::>>(input); - // println!("{}\n\n{:#?}", ast, errors); - println!("{}\n\n", ast); - let mut error_map: BTreeMap> = BTreeMap::new(); - for error in errors { - if let Some(set) = error_map.get_mut(&error.pos) { - set.insert(error.msg); - } else { - let mut set = HashSet::new(); - set.insert(error.msg); - error_map.insert(error.pos, set); - }; - } - // dbg!(&error_map); - println!("\n\n"); - // assert!(false); - let lines: Vec<_> = str.lines().collect(); - for (pos, str) in error_map { - // println!("{pos:?}, {str:?}"); - println!("error: {str:?}"); - println!("--> {}:{}:{}", file, pos.line, pos.column); - println!("{}", lines.get((pos.line - 1) as usize).unwrap()); - println!("{0:>1$}\n", "^", pos.column as usize) - } - } - - #[test] - fn arithmetic_expressions() { - assert_eq!( - { - let input = Span::new("42"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Primitive(Primitive::Number { - span: s!(0, 1, "42"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0, 1, "42"}, - exponent: None, - }), - ); - - assert_eq!( - { - let input = Span::new("35+7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "35+7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "35"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0, 1, "35"}, - exponent: None, - })), - operation: T! {Plus, 2, 1, "+"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(3, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 3, 1, "7"}, - exponent: None, - })), - } - ); - - assert_eq!( - { - let input = Span::new("6*7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "6*7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "6"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0,1,"6"}, - exponent: None, - })), - operation: T! {Star, 1,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(2, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 2,1,"7"}, - exponent: None, - })), - } - ); - - assert_eq!( - { - let input = Span::new("49-7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "49-7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "49"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0, 1, "49"}, - exponent: None, - })), - operation: T! {Minus, 2, 1, "-"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(3, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 3, 1, "7"}, - exponent: None, - })), - } - ); - - assert_eq!( - { - let input = Span::new("84/2"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "84/2"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "84"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0, 1, "84"}, - exponent: None, - })), - operation: T! {Slash, 2, 1, "/"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(3, 1, "2"), - sign: None, - before: None, - dot: None, - after: T! {Number, 3, 1, "2"}, - exponent: None, - })), - } - ); - - assert_eq!( - { - let input = Span::new("5*7+7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "5*7+7"), - lhs: Box::new(Term::Binary { - span: s!(0, 1, "5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "5"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0,1,"5"}, - exponent: None, - })), - operation: T! {Star, 1,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(2, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 2,1,"7"}, - exponent: None, - })), - }), - operation: T! {Plus, 3,1,"+"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(4, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 4,1,"7"}, - exponent: None, - })), - } - ); - - assert_eq!( - { - let input = Span::new("7+5*7"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "7+5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0,1,"7"}, - exponent: None - })), - operation: T! {Plus, 1,1,"+"}, - rhs: Box::new(Term::Binary { - span: s!(2, 1, "5*7"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(2, 1, "5"), - sign: None, - before: None, - dot: None, - after: T! {Number, 2,1,"5"}, - exponent: None - })), - operation: T! {Star, 3,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(4, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 4,1,"7"}, - exponent: None - })), - }), - } - ); - - assert_eq!( - { - let input = Span::new("(15+3*2-(7+35)*8)/3"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - // let result = parse_term::>(Span::new("(15+3*2-(7+35)*8)/3")); - // match result { - // Ok(ast) => { - // println!("{}", ast.1); - // ast.1 - // } - // Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - // panic!( - // "{}", - // convert_error( - // *(input.input.fragment()), - // VerboseError { - // errors: err - // .errors - // .into_iter() - // .map(|(span, tag)| { (*(span.fragment()), tag) }) - // .collect() - // } - // ) - // ) - // } - // Err(nom::Err::Incomplete(err)) => panic!("{:#?}", err), - // } - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "(15+3*2-(7+35)*8)/3"), - lhs: Box::new(Term::Tuple(Box::new(Tuple { - span: s!(0, 1, "(15+3*2-(7+35)*8)"), - open_paren: T!(OpenParen, 0, 1, "("), - terms: Some(List { - span: s!(1, 1, "15+3*2-(7+35)*8"), - first: Term::Binary { - span: s!(1, 1, "15+3*2-(7+35)*8"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(1, 1, "15"), - sign: None, - before: None, - dot: None, - after: T! {Number, 1,1,"15"}, - exponent: None, - })), - operation: T! {Plus, 3,1,"+"}, - rhs: Box::new(Term::Binary { - span: s!(4, 1, "3*2-(7+35)*8"), - lhs: Box::new(Term::Binary { - span: s!(4, 1, "3*2"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(4, 1, "3"), - sign: None, - before: None, - dot: None, - after: T! {Number, 4,1,"3"}, - exponent: None, - })), - operation: T! {Star, 5,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(6, 1, "2"), - sign: None, - before: None, - dot: None, - after: T! {Number, 6,1,"2"}, - exponent: None, - })), - }), - operation: T! {Minus, 7,1,"-"}, - rhs: Box::new(Term::Binary { - span: s!(8, 1, "(7+35)*8"), - lhs: Box::new(Term::Tuple(Box::new(Tuple { - span: s!(8, 1, "(7+35)"), - open_paren: T! {OpenParen, 8, 1, "("}, - terms: Some(List { - span: s!(9, 1, "7+35"), - first: Term::Binary { - span: s!(9, 1, "7+35"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(9, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 9,1,"7"}, - exponent: None, - })), - operation: T! {Plus, 10,1,"+"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(11, 1, "35"), - sign: None, - before: None, - dot: None, - after: T! {Number, 11,1,"35"}, - exponent: None, - })), - }, - rest: None, - trailing_comma: None, - }), - close_paren: T! {CloseParen, 13,1,")"}, - }))), - operation: T! {Star, 14,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(15, 1, "8"), - sign: None, - before: None, - dot: None, - after: T! {Number, 15,1,"8"}, - exponent: None, - })), - }), - }), - }, - rest: None, - trailing_comma: None, - }), - close_paren: T!(CloseParen, 16, 1, ")") - }))), - operation: T! {Slash, 17,1,"/"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(18, 1, "3"), - sign: None, - before: None, - dot: None, - after: T! {Number, 18,1,"3"}, - exponent: None, - })), - } - ); - // Term::Binary { - // span: s!(), - // lhs: Box::new(), - // ws1: None, - // operation: , - // ws2: None, - // rhs: Box::new(), - // } - - assert_eq!( - { - let input = Span::new("15+3*2-(7+35)*8/3"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // let result = parse_term::>(input); - let result = parse_term::>(input); - result.unwrap().1 - }, - Term::Binary { - span: s!(0, 1, "15+3*2-(7+35)*8/3"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(0, 1, "15"), - sign: None, - before: None, - dot: None, - after: T! {Number, 0,1,"15"}, - exponent: None, - })), - operation: T! {Plus, 2,1,"+"}, - rhs: Box::new(Term::Binary { - span: s!(3, 1, "3*2-(7+35)*8/3"), - lhs: Box::new(Term::Binary { - span: s!(3, 1, "3*2"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(3, 1, "3"), - sign: None, - before: None, - dot: None, - after: T! {Number, 3,1,"3"}, - exponent: None, - })), - operation: T! {Star, 4,1,"*"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(5, 1, "2"), - sign: None, - before: None, - dot: None, - after: T! {Number, 5,1,"2"}, - exponent: None, - })), - }), - operation: T! {Minus, 6,1,"-"}, - rhs: Box::new(Term::Binary { - span: s!(7, 1, "(7+35)*8/3"), - lhs: Box::new(Term::Tuple(Box::new(Tuple { - span: s!(7, 1, "(7+35)"), - open_paren: T! {OpenParen, 7,1,"("}, - terms: Some(List { - span: s!(8, 1, "7+35"), - first: Term::Binary { - span: s!(8, 1, "7+35"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(8, 1, "7"), - sign: None, - before: None, - dot: None, - after: T! {Number, 8,1,"7"}, - exponent: None, - })), - operation: T! {Plus, 9,1,"+"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(10, 1, "35"), - sign: None, - before: None, - dot: None, - after: T! {Number, 10,1,"35"}, - exponent: None, - })), - }, - rest: None, - trailing_comma: None, - }), - close_paren: T! {CloseParen, 12,1,")"}, - }))), - operation: T! {Star, 13,1,"*"}, - rhs: Box::new(Term::Binary { - span: s!(14, 1, "8/3"), - lhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(14, 1, "8"), - sign: None, - before: None, - dot: None, - after: T! {Number, 14,1,"8"}, - exponent: None, - })), - operation: T! {Slash, 15, 1, "/"}, - rhs: Box::new(Term::Primitive(Primitive::Number { - span: s!(16, 1, "3"), - sign: None, - before: None, - dot: None, - after: T! {Number, 16,1,"3"}, - exponent: None, - })), - }), - }), - }), - } - ); - - // assert_eq!({ - // let result = parse_term::>(Span::new("1*2*3*4*5")); - // result.unwrap().1 - // },); - - // assert_eq!({ - // let result = parse_term::>(Span::new("(5+3)")); - // result.unwrap().1 - // },); - - // assert_eq!({ - // let result = parse_term::>(Span::new("( int , int , string , skip )")); - // result.unwrap().1 - // },); - - // assert_eq!({ - // let result = parse_term::>(Span::new("(14+4)+3")); - // result.unwrap().1 - // },); - - // assert_eq!({ - // let result = parse_term::>(Span::new( - // "(3 + #sum(?X, ?Y)) * (LENGTH(\"Hello, World!\") + 3)", - // )); - // result.unwrap().1 - // },); - } - - #[test] - fn number_exp() { - assert_eq!( - { - let input = Span::new("e42"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - // parse_exponent::>(input) - parse_exponent::>(input).unwrap().1 - }, - Exponent { - e: T! {TokenKind::Exponent, 0,1,"e"}, - sign: None, - number: T! {TokenKind::Number, 1,1,"42"} - } - ) - } - - #[test] - fn missing_dot() { - let input = Span::new("some(Fact\nSome other, Fact.\nthird(fact)."); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>(input); - println!("{}\n\n{:#?}", result.0, result.1); - // assert!(false); - } - - #[test] - fn wsoc() { - let input = Span::new(" \t\n % first comment\n % second comment\n"); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - dbg!(wsoc0::>(input)); - dbg!(wsoc1::>(input)); - } - - #[test] - fn debug_test() { - let str = "asd"; - let input = Span::new(str); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - dbg!(&result); - println!("{}", result.0); - } - - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_language_tag() { - let test_string = "fact(\"テスト\"@ja)."; - let input = Span::new(&test_string); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - assert!(result.1.is_empty()); - } - - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_rdf_literal() { - let test_string = "fact(\"2023\"^^xsd:gYear)."; - let input = Span::new(&test_string); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - assert!(result.1.is_empty()); - } - - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_floating_point_numbers() { - // https://regex101.com/r/ObowxD/5 - - let valid_numbers = vec![ - "0.2", - "4534.34534345", - ".456456", - "1.", - "1e545", - "1.1e435", - ".1e232", - "1.e343", - "112E+12", - "12312.1231", - ".1231", - "1231", - "-1e+0", - "1e-1", - ]; - - let invalid_numbers = vec!["3", "E9", ".e3", "7E"]; - - for valid in valid_numbers { - let input = Span::new(valid); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - - let result = parse_decimal::>>(input); - // dbg!(&input); - // dbg!(&result); - assert!(result.is_ok()) - } - - for invalid in invalid_numbers { - let input = Span::new(invalid); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - - let result = parse_decimal::>>(input); - assert!(result.is_err()) - } - } - - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_complex_comparison() { - let test_string = "complex(?X, ?Y) :- data(?X, ?Y), ABS(?X - ?Y) >= ?X * ?X."; - let input = Span::new(&test_string); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - // dbg!(&result); - assert!(result.1.is_empty()); - } - - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_negation() { - let test_string = "R(?x, ?y, ?z) :- S(?x, ?y, ?z), ~T(?x, ?y), ~ T(a, ?z)."; // should allow for spaces - let input = Span::new(&test_string); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - assert!(result.1.is_empty()); - } - - // TODO: Instead of just checking for errors, this should compare the created AST - #[test] - fn parse_trailing_comma() { - let test_string = "head(?X) :- body( (2 ,), (3, 4 , ), ?X) ."; // should allow for spaces - let input = Span::new(&test_string); - let refcell = RefCell::new(Vec::new()); - let parser_state = ParserState { errors: &refcell }; - let input = Input { - input, - parser_state, - }; - let result = parse_program::>>(input); - assert!(result.1.is_empty()); - } -} diff --git a/nemo/src/io/parser/ast.rs b/nemo/src/io/parser/ast.rs deleted file mode 100644 index be1133f24..000000000 --- a/nemo/src/io/parser/ast.rs +++ /dev/null @@ -1,447 +0,0 @@ -//! This module contains the (abstract) syntax tree, generated from the parser. - - -use tower_lsp::lsp_types::SymbolKind; - -use crate::io::lexer::Span; -use ascii_tree::{write_tree, Tree}; -use std::fmt::Display; - -pub(crate) mod atom; -pub(crate) mod directive; -pub(crate) mod map; -pub mod program; -pub(crate) mod statement; -pub(crate) mod term; -pub(crate) mod tuple; -pub(crate) mod named_tuple; - -/// All AST nodes have to implement this trait so you can get all children recursively. -pub trait AstNode: std::fmt::Debug + Display + Sync { - /// Return all children of an AST node. - fn children(&self) -> Option>; - /// Return the `LocatedSpan` of the AST node. - fn span(&self) -> Span; - /// Convert the `LocatedSpan` into a range of positions. - fn range(&self) -> Range { - let span = self.span(); - - let start_position = Position { - offset: self.span().location_offset(), - line: self.span().location_line(), - column: self.span().get_utf8_column() as u32, - }; - - let end_position = Position { - offset: start_position.offset + span.len(), - line: start_position.line + span.fragment().lines().count() as u32 - 1, - column: if span.fragment().lines().count() > 1 { - 1 + span.fragment().lines().last().unwrap().len() as u32 // Column is on new line - } else { - start_position.column + span.fragment().len() as u32 // Column is on same line - }, - }; - - Range { - start: start_position, - end: end_position, - } - } - - // FIXME: With the removal of tokens is this method still usefull? - /// Indicates whether the current AST node is a leaf and has no children. - fn is_leaf(&self) -> bool; - - /// Return a formatted String for use in printing the AST. - fn name(&self) -> String; - - /// Returns an optional pair of the identfier and identifier scope. - /// - /// The identifier scope will scope this identifier up to any [`AstNode`] - /// that has an identifier that has this node's identifier scope as a prefix. - /// - /// This can be used to restict rename operations to be local, e.g. for variable idenfiers inside of rules. - fn lsp_identifier(&self) -> Option<(String, String)>; - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)>; - /// Range of the part of the node that should be renamed or [`None`] if the node can not be renamed - fn lsp_range_to_rename(&self) -> Option; -} - -/// `Position` contains the offset in the source and the line and column information. -#[derive(Debug, Clone, Copy, Hash)] -pub struct Position { - /// Offset in the source. - pub offset: usize, - /// Line number - pub line: u32, - /// Column number - pub column: u32, -} -impl PartialEq for Position { - fn eq(&self, other: &Self) -> bool { - self.offset.eq(&other.offset) - } -} -impl Eq for Position {} -impl PartialOrd for Position { - fn partial_cmp(&self, other: &Self) -> Option { - self.offset.partial_cmp(&other.offset) - } -} -impl Ord for Position { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.offset.cmp(&other.offset) - } -} -impl Default for Position { - fn default() -> Self { - Position { - offset: 0, - line: 1, - column: 1, - } - } -} - -#[derive(Debug, Clone, Copy, Hash)] -/// A Range with start and end `Position`s. -pub struct Range { - /// Start position - pub start: Position, - /// End position - pub end: Position, -} - -/// Whitespace or Comment token -#[derive(Debug, Clone, PartialEq)] -pub struct Wsoc<'a> { - pub(crate) span: Span<'a>, - pub(crate) token: Vec>, -} -impl AstNode for Wsoc<'_> { - fn children(&self) -> Option> { - if self.token.is_empty() { - None - } else { - #[allow(trivial_casts)] - Some(self.token.iter().map(|t| t as &dyn AstNode).collect()) - } - } - - fn span(&self) -> Span { - self.span - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - format!( - "Wsoc \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - self.span.fragment() - ) - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - None - } -} - -impl Display for Wsoc<'_> { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct List<'a, T> { - pub span: Span<'a>, - pub first: T, - // (,T)* - pub rest: Option, T)>>, - pub trailing_comma: Option>, -} -impl<'a, T> List<'a, T> { - pub fn to_item_vec(&'a self) -> Vec<&'a T> { - let mut vec = Vec::new(); - vec.push(&self.first); - if let Some(rest) = &self.rest { - for (_, item) in rest { - vec.push(&item); - } - } - vec - } -} - -impl IntoIterator for List<'_, T> { - type Item = T; - - type IntoIter = std::vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - let mut vec = Vec::new(); - vec.push(self.first); - if let Some(rest) = self.rest { - for (_, item) in rest { - vec.push(item); - } - } - vec.into_iter() - } -} -impl AstNode for List<'_, T> { - fn children(&self) -> Option> { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - vec.push(&self.first); - if let Some(rest) = &self.rest { - for (delim, item) in rest { - vec.push(delim); - vec.push(item); - } - }; - Some(vec) - } - - fn span(&self) -> Span { - self.span - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - format!( - "List \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - self.span.fragment() - ) - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - Some((String::from("List"), SymbolKind::ARRAY)) - } -} - -impl Display for List<'_, T> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} - -pub(crate) fn get_all_tokens(node: &dyn AstNode) -> Vec<&dyn AstNode> { - let mut vec = Vec::new(); - if let Some(children) = node.children() { - for child in children { - vec.append(&mut get_all_tokens(child)); - } - } else { - vec.push(node); - }; - vec -} - -pub(crate) fn ast_to_ascii_tree(node: &dyn AstNode) -> Tree { - let mut vec = Vec::new(); - if let Some(children) = node.children() { - for child in children { - if child.is_leaf() { - vec.push(Tree::Leaf(vec![format!("\x1b[93m{:?}\x1b[0m", child.name())])); - } else { - vec.push(ast_to_ascii_tree(child)); - } - } - } - Tree::Node(node.name(), vec) -} - -mod test { - use named_tuple::NamedTuple; - use statement::Fact; - - use super::*; - use super::{ - atom::Atom, - directive::Directive, - program::Program, - statement::Statement, - term::{Primitive, Term}, - tuple::Tuple, - }; - use crate::io::lexer::{Span, TokenKind}; - - macro_rules! s { - ($offset:literal,$line:literal,$str:literal) => { - unsafe { Span::new_from_raw_offset($offset, $line, $str, ()) } - }; - } - - #[test] - fn ast_traversal() { - let input = "\ - %! This is just a test file.\n\ - %! So the documentation of the rules is not important.\n\ - %% This is the prefix used for datatypes\n\ - @prefix xsd: .\n\ - \n\ - % Facts\n\ - %% This is just an example predicate.\n\ - somePredicate(ConstA, ConstB).\n\ - \n\ - % Rules\n\ - %% This is just an example rule.\n\ - someHead(?VarA) :- somePredicate(?VarA, ConstB). % all constants that are in relation with ConstB\n"; - let span = Span::new(input); - let ast = Program { - span, - tl_doc_comment: Some( - s!(0, 1, "%! This is just a test file.\n%! So the documentation of the rules is not important.\n") - ), - statements: vec![ - Statement::Directive(Directive::Prefix { - span:s!(125,4,"@prefix xsd: ."), - doc_comment:Some( - s!(84,3,"%% This is the prefix used for datatypes\n") - ), - prefix: - s!(133, 4, "xsd:"), - prefix_iri: - s!(138, 4, ""), - dot: - s!(173,4,".") - }), - Statement::Comment( - s!(176, 6, "% Facts\n"), - ), - Statement::Fact { - span:s!(222,8,"somePredicate(ConstA, ConstB)."), - doc_comment: Some( - s!(184,7,"%% This is just an example predicate.\n") - ), - fact: Fact::NamedTuple(NamedTuple { - span: s!(222,8,"somePredicate(ConstA, ConstB)"), - identifier: s!(222, 8, "somePredicate"), - tuple: Tuple { - span: s!(235,8,"(ConstA, ConstB)"), - open_paren: - s!(235,8,"(") - , - terms: Some(List { - span: s!(236, 8, "ConstA, ConstB"), - first: Term::Primitive(Primitive::Constant( s!(236, 8, "ConstA"), - )), - rest: Some(vec![( - s!(242, 8, ","), - Term::Primitive(Primitive::Constant( s!(244, 8, "ConstB"), - )), - )]), - trailing_comma: None, - }), - close_paren: s!(250,8,")") - } - }), - dot: - s!(251,8,".") - - }, - Statement::Comment( - s!(254, 10, "% Rules\n"), - ), - Statement::Rule { - span: s!(295,12,"someHead(?VarA) :- somePredicate(?VarA, ConstB)."), - doc_comment: Some(s!(262,11,"%% This is just an example rule.\n")), - head: List { - span: s!(295, 12, "someHead(?VarA)"), - first: Atom::Positive(NamedTuple { - span: s!(295,12,"someHead(?VarA)"), - identifier: s!(295, 12, "someHead"), - tuple: Tuple { - span: s!(303,12,"(?VarA)"), - open_paren: s!(303,12,"(") , - terms: Some(List { - span: s!(304, 12, "?VarA"), - first: Term::UniversalVariable( s!(304, 12, "?VarA"), - ), - rest: None, - trailing_comma: None, - }), - close_paren: s!(309,12,")") , - } - }), - rest: None, - trailing_comma: None, - }, - arrow: s!(311,12,":-"), - body: List { - span: s!(314, 12, "somePredicate(?VarA, ConstB)"), - first: Atom::Positive(NamedTuple { - span: s!(314, 12,"somePredicate(?VarA, ConstB)"), - identifier: s!(314, 12, "somePredicate"), - tuple: Tuple { - span: s!(327,12,"(?VarA, ConstB)"), - open_paren: s!(327,12,"("), - terms: Some(List { - span: s!(328, 12, "?Var, ConstB"), - first: Term::UniversalVariable( s!(328, 12, "?VarA"), - ), - rest: Some(vec![( - s!(333, 12, ","), - - Term::Primitive(Primitive::Constant(s!(335, 12, "ConstB"), - )), - )]), - trailing_comma: None, - }), - close_paren: s!(341, 12,")") , - } - }), - rest: None, - trailing_comma: None, - }, - dot: s!(342, 12,"."), - }, - Statement::Comment( - s!(346, 12, "% all constants that are in relation with ConstB\n"), - ), - ], - }; - println!("{}", ast); - let tokens1 = get_all_tokens(&ast); - for token in &tokens1 { - println!("{}", token); - } - - // This doesn't work anymore, because the whitespace and keywords got removed from - // from the AST, so you can't directly recreate the input exactly. - // assert_eq!(input, { - // let mut result = String::new(); - // for token in &tokens1 { - // result.push_str(token.span().fragment()); - // } - // result - // }); - } -} diff --git a/nemo/src/io/parser/ast/atom.rs b/nemo/src/io/parser/ast/atom.rs deleted file mode 100644 index 13a2c5922..000000000 --- a/nemo/src/io/parser/ast/atom.rs +++ /dev/null @@ -1,122 +0,0 @@ -use tower_lsp::lsp_types::SymbolKind; - -use super::map::Map; -use super::named_tuple::NamedTuple; -use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, Range}; -use crate::io::lexer::Span; -use ascii_tree::write_tree; - -#[derive(Debug, Clone, PartialEq)] -pub enum Atom<'a> { - Positive(NamedTuple<'a>), - Negative { - span: Span<'a>, - neg: Span<'a>, - atom: NamedTuple<'a>, - }, - InfixAtom { - span: Span<'a>, - lhs: Term<'a>, - operation: Span<'a>, - rhs: Term<'a>, - }, - Map(Map<'a>), -} - -impl Atom<'_> { - fn named_tuple(&self) -> Option<&NamedTuple<'_>> { - match &self { - Atom::Positive(tuple) => Some(tuple), - Atom::Negative { atom, .. } => Some(atom), - _ => None, - } - } -} - -impl AstNode for Atom<'_> { - fn children(&self) -> Option> { - match self { - Atom::Positive(named_tuple) => Some(vec![named_tuple]), - Atom::Negative { neg, atom, .. } => Some(vec![neg, atom]), - Atom::InfixAtom { - lhs, - operation, - rhs, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - vec.push(lhs); - vec.push(operation); - vec.push(rhs); - Some(vec) - } - Atom::Map(map) => Some(vec![map]), - } - } - - fn span(&self) -> Span { - match self { - Atom::Positive(named_tuple) => named_tuple.span(), - Atom::Negative { span, .. } => *span, - Atom::InfixAtom { span, .. } => *span, - Atom::Map(map) => map.span(), - } - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - macro_rules! name { - ($name:literal) => { - format!( - "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - $name, - self.span().location_line(), - self.span().get_utf8_column(), - self.span().fragment() - ) - }; - } - match self { - Atom::Positive(_) => name!("Positive Atom"), - Atom::Negative { .. } => name!("Negative Atom"), - Atom::InfixAtom { .. } => name!("Infix Atom"), - Atom::Map(_) => name!("Map Atom"), - } - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - self.named_tuple().map(|named_tuple| { - ( - format!("atom/{}", named_tuple.identifier.fragment()), - "file".to_string(), - ) - }) - } - - fn lsp_range_to_rename(&self) -> Option { - self.named_tuple() - .and_then(|named_tuple| Some(named_tuple.identifier)) - .map(|identifier| identifier.range()) - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - match self.named_tuple() { - Some(tuple) => Some(( - format!("Atom: {}", tuple.identifier.fragment()), - SymbolKind::FUNCTION, - )), - None => Some((String::from("Atom"), SymbolKind::FUNCTION)), - } - } -} -impl std::fmt::Display for Atom<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} diff --git a/nemo/src/io/parser/ast/directive.rs b/nemo/src/io/parser/ast/directive.rs deleted file mode 100644 index 25512581e..000000000 --- a/nemo/src/io/parser/ast/directive.rs +++ /dev/null @@ -1,193 +0,0 @@ -use tower_lsp::lsp_types::SymbolKind; - -use super::map::Map; -use super::{ast_to_ascii_tree, AstNode, List, Range, Wsoc}; -use crate::io::lexer::{Span, Token}; -use ascii_tree::write_tree; - -#[derive(Debug, Clone, PartialEq)] -pub enum Directive<'a> { - // "@base ." - Base { - span: Span<'a>, - doc_comment: Option>, - base_iri: Span<'a>, - dot: Span<'a>, - }, - // "@prefix wikidata: ." - Prefix { - span: Span<'a>, - doc_comment: Option>, - prefix: Span<'a>, - prefix_iri: Span<'a>, - dot: Span<'a>, - }, - // "@import table :- csv{resource="path/to/file.csv"} ." - Import { - span: Span<'a>, - doc_comment: Option>, - predicate: Span<'a>, - arrow: Span<'a>, - map: Map<'a>, - dot: Span<'a>, - }, - // "@export result :- turtle{resource="out.ttl"} ." - Export { - span: Span<'a>, - doc_comment: Option>, - predicate: Span<'a>, - arrow: Span<'a>, - map: Map<'a>, - dot: Span<'a>, - }, - // "@output A, B, C." - Output { - span: Span<'a>, - doc_comment: Option>, - predicates: Option>>, - dot: Span<'a>, - }, -} -impl AstNode for Directive<'_> { - fn children(&self) -> Option> { - match self { - Directive::Base { - doc_comment, - base_iri, - dot, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(dc) = doc_comment { - vec.push(dc); - }; - vec.push(base_iri); - vec.push(dot); - Some(vec) - } - Directive::Prefix { - doc_comment, - prefix, - prefix_iri, - dot, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(dc) = doc_comment { - vec.push(dc); - }; - vec.push(prefix); - vec.push(prefix_iri); - vec.push(dot); - Some(vec) - } - Directive::Import { - doc_comment, - predicate, - arrow, - map, - dot, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(dc) = doc_comment { - vec.push(dc); - }; - vec.push(predicate); - vec.push(arrow); - vec.push(map); - vec.push(dot); - Some(vec) - } - Directive::Export { - doc_comment, - predicate, - arrow, - map, - dot, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(dc) = doc_comment { - vec.push(dc); - }; - vec.push(predicate); - vec.push(arrow); - vec.push(map); - vec.push(dot); - Some(vec) - } - Directive::Output { - span, - doc_comment, - predicates, - dot, - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(dc) = doc_comment { - vec.push(dc); - }; - if let Some(p) = predicates { - vec.push(p); - }; - vec.push(dot); - Some(vec) - } - } - } - - fn span(&self) -> Span { - match self { - Directive::Base { span, .. } => *span, - Directive::Prefix { span, .. } => *span, - Directive::Import { span, .. } => *span, - Directive::Export { span, .. } => *span, - Directive::Output { span, .. } => *span, - } - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - macro_rules! name { - ($name:literal) => { - format!( - "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - $name, - self.span().location_line(), - self.span().get_utf8_column(), - self.span().fragment() - ) - }; - } - match self { - Directive::Base { .. } => name!("Base Directive"), - Directive::Prefix { .. } => name!("Prefix Directive"), - Directive::Import { .. } => name!("Import Directive"), - Directive::Export { .. } => name!("Export Directive"), - Directive::Output { .. } => name!("Output Directive"), - } - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - Some((String::from("Directive"), SymbolKind::FUNCTION)) - } -} - -impl std::fmt::Display for Directive<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} diff --git a/nemo/src/io/parser/ast/map.rs b/nemo/src/io/parser/ast/map.rs deleted file mode 100644 index 6eec78e52..000000000 --- a/nemo/src/io/parser/ast/map.rs +++ /dev/null @@ -1,115 +0,0 @@ -use tower_lsp::lsp_types::SymbolKind; - -use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; -use crate::io::lexer::{Span, Token}; -use ascii_tree::write_tree; -use std::fmt::Debug; - -#[derive(Debug, Clone, PartialEq)] -pub struct Map<'a> { - pub span: Span<'a>, - pub identifier: Option>, - pub open_brace: Span<'a>, - pub pairs: Option>>, - pub close_brace: Span<'a>, -} -impl AstNode for Map<'_> { - fn children(&self) -> Option> { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(identifier) = &self.identifier { - vec.push(identifier); - }; - vec.push(&self.open_brace); - if let Some(pairs) = &self.pairs { - vec.push(pairs); - }; - vec.push(&self.close_brace); - Some(vec) - } - - fn span(&self) -> Span { - self.span - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - String::from("Map") - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - Some((String::from("Map"), SymbolKind::STRUCT)) - } -} - -impl std::fmt::Display for Map<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct Pair<'a> { - pub span: Span<'a>, - pub key: Term<'a>, - pub equal: Span<'a>, - pub value: Term<'a>, -} -impl AstNode for Pair<'_> { - fn children(&self) -> Option> { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - vec.push(&self.key); - vec.push(&self.equal); - vec.push(&self.value); - Some(vec) - } - - fn span(&self) -> Span { - self.span - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - format!( - "Pair \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - self.span.fragment() - ) - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - Some((String::from("Pair"), SymbolKind::ARRAY)) - } -} -impl std::fmt::Display for Pair<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} diff --git a/nemo/src/io/parser/ast/named_tuple.rs b/nemo/src/io/parser/ast/named_tuple.rs deleted file mode 100644 index 9ef66d0af..000000000 --- a/nemo/src/io/parser/ast/named_tuple.rs +++ /dev/null @@ -1,66 +0,0 @@ -use std::fmt::Display; - -use crate::io::{lexer::Span, parser::ast::ast_to_ascii_tree}; - -use super::{tuple::Tuple, AstNode}; -use ascii_tree::write_tree; - -#[derive(Debug, Clone, PartialEq)] -pub struct NamedTuple<'a> { - pub span: Span<'a>, - pub identifier: Span<'a>, - pub tuple: Tuple<'a>, -} - -impl AstNode for NamedTuple<'_> { - // NOTE: This flattens the tuple children into the vec. An alternative could be - // vec![&self.identifier, &self.tuple] but then you always have an `Tuple` as an - // child - fn children(&self) -> Option> { - let mut vec: Vec<&dyn AstNode> = vec![&self.identifier]; - if let Some(mut children) = self.tuple.children() { - vec.append(&mut children); - }; - Some(vec) - } - - fn span(&self) -> Span { - self.span - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - format!( - "NamedTuple \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - self.span.fragment() - ) - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - // This was todo!() before but we don't want to panic here. - None - } - - fn lsp_symbol_info(&self) -> Option<(String, tower_lsp::lsp_types::SymbolKind)> { - // This was todo!() before but we don't want to panic here. - None - } - - fn lsp_range_to_rename(&self) -> Option { - // This was todo!() before but we don't want to panic here. - None - } -} - -impl Display for NamedTuple<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} diff --git a/nemo/src/io/parser/ast/program.rs b/nemo/src/io/parser/ast/program.rs deleted file mode 100644 index 55dcf97f7..000000000 --- a/nemo/src/io/parser/ast/program.rs +++ /dev/null @@ -1,75 +0,0 @@ -//! The root node of an AST, representing a nemo program. - -use tower_lsp::lsp_types::SymbolKind; - -use super::{ast_to_ascii_tree, statement::Statement, AstNode, Position, Range}; -use crate::io::lexer::{Span, Token}; -use ascii_tree::write_tree; - -#[derive(Debug, Clone, PartialEq)] -pub struct Program<'a> { - pub span: Span<'a>, - pub tl_doc_comment: Option>, - pub statements: Vec>, -} -impl AstNode for Program<'_> { - fn children(&self) -> Option> { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(dc) = &self.tl_doc_comment { - vec.push(dc); - }; - // NOTE: The current implementation puts the doc comment and all the - // statements in the same vec, so there is no need to implement AstNode - // for Vec, which would be hard for the fn span() implementation - for statement in &self.statements { - vec.push(statement); - } - Some(vec) - } - - fn span(&self) -> Span { - self.span - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - if self.span.fragment().len() < 60 { - format!( - "Program \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - &self.span.fragment(), - ) - } else { - format!( - "Program \x1b[34m@{}:{} \x1b[92m{:?}[…]\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - &self.span.fragment()[..60], - ) - } - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - Some(("File".to_string(), SymbolKind::FILE)) - } -} - -impl std::fmt::Display for Program<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} diff --git a/nemo/src/io/parser/ast/statement.rs b/nemo/src/io/parser/ast/statement.rs deleted file mode 100644 index 4547c2b91..000000000 --- a/nemo/src/io/parser/ast/statement.rs +++ /dev/null @@ -1,200 +0,0 @@ -use tower_lsp::lsp_types::SymbolKind; - -use super::atom::Atom; -use super::directive::Directive; -use super::map::Map; -use super::named_tuple::NamedTuple; -use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; -use crate::io::lexer::{Span, Token}; -use ascii_tree::write_tree; - -#[derive(Debug, Clone, PartialEq)] -pub enum Statement<'a> { - Directive(Directive<'a>), - Fact { - span: Span<'a>, - doc_comment: Option>, - fact: Fact<'a>, - dot: Span<'a>, - }, - Rule { - span: Span<'a>, - doc_comment: Option>, - head: List<'a, Atom<'a>>, - arrow: Span<'a>, - body: List<'a, Atom<'a>>, - dot: Span<'a>, - }, - Comment(Span<'a>), - Error(Span<'a>), -} -impl AstNode for Statement<'_> { - fn children(&self) -> Option> { - match self { - Statement::Directive(directive) => Some(vec![directive]), - Statement::Fact { - doc_comment, - fact: atom, - dot, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(dc) = doc_comment { - vec.push(dc); - }; - vec.push(atom); - vec.push(dot); - Some(vec) - } - Statement::Rule { - doc_comment, - head, - arrow, - body, - dot, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(dc) = doc_comment { - vec.push(dc); - }; - vec.push(head); - vec.push(arrow); - vec.push(body); - vec.push(dot); - Some(vec) - } - Statement::Comment(c) => Some(vec![c]), - Statement::Error(t) => Some(vec![t]), - } - } - - fn span(&self) -> Span { - match self { - Statement::Directive(directive) => directive.span(), - Statement::Fact { span, .. } => *span, - Statement::Rule { span, .. } => *span, - Statement::Comment(c) => c.span(), - Statement::Error(t) => *t, - } - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - macro_rules! name { - ($name:literal) => { - format!( - "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - $name, - self.span().location_line(), - self.span().get_utf8_column(), - self.span().fragment() - ) - }; - } - - match self { - Statement::Directive(_) => name!("Directive"), - Statement::Fact { .. } => name!("Fact"), - Statement::Rule { .. } => name!("Rule"), - Statement::Comment(_) => name!("Comment"), - Statement::Error(_) => name!("\x1b[1;31mERROR\x1b[0m"), - } - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - Some(("statement".to_string(), "statement".to_string())) - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - let name = match self { - Statement::Directive(_) => "Directive", - Statement::Fact { .. } => "Fact", - Statement::Rule { .. } => "Rule", - Statement::Comment(_) => return None, - Statement::Error(_) => "Invalid", - }; - - Some((String::from(name), SymbolKind::CLASS)) - } -} - -impl std::fmt::Display for Statement<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Fact<'a> { - NamedTuple(NamedTuple<'a>), - Map(Map<'a>), -} -impl AstNode for Fact<'_> { - fn children(&self) -> Option> { - match self { - Fact::NamedTuple(named_tuple) => named_tuple.children(), - Fact::Map(map) => map.children(), - } - } - - fn span(&self) -> Span { - match self { - Fact::NamedTuple(named_tuple) => named_tuple.span(), - Fact::Map(map) => map.span(), - } - } - - fn is_leaf(&self) -> bool { - match self { - Fact::NamedTuple(named_tuple) => named_tuple.is_leaf(), - Fact::Map(map) => map.is_leaf(), - } - } - - fn name(&self) -> String { - match self { - Fact::NamedTuple(named_tuple) => named_tuple.name(), - Fact::Map(map) => map.name(), - } - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - match self { - Fact::NamedTuple(named_tuple) => named_tuple.lsp_identifier(), - Fact::Map(map) => map.lsp_identifier(), - } - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - match self { - Fact::NamedTuple(named_tuple) => named_tuple.lsp_symbol_info(), - Fact::Map(map) => map.lsp_symbol_info(), - } - } - - fn lsp_range_to_rename(&self) -> Option { - match self { - Fact::NamedTuple(named_tuple) => named_tuple.lsp_range_to_rename(), - Fact::Map(map) => map.lsp_range_to_rename(), - } - } -} - -impl std::fmt::Display for Fact<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Fact::NamedTuple(named_tuple) => named_tuple.fmt(f), - Fact::Map(map) => map.fmt(f), - } - } -} diff --git a/nemo/src/io/parser/ast/term.rs b/nemo/src/io/parser/ast/term.rs deleted file mode 100644 index ddbe323cf..000000000 --- a/nemo/src/io/parser/ast/term.rs +++ /dev/null @@ -1,367 +0,0 @@ -use tower_lsp::lsp_types::SymbolKind; - -use super::map::Map; -use super::named_tuple::NamedTuple; -use super::tuple::Tuple; -use super::{ast_to_ascii_tree, AstNode, List, Range}; -use crate::io::lexer::Span; -use ascii_tree::write_tree; - -#[derive(Debug, Clone, PartialEq)] -pub enum Term<'a> { - Primitive(Primitive<'a>), - UniversalVariable(Span<'a>), - ExistentialVariable(Span<'a>), - Binary { - span: Span<'a>, - lhs: Box>, - operation: Span<'a>, - rhs: Box>, - }, - Aggregation { - span: Span<'a>, - operation: Span<'a>, - open_paren: Span<'a>, - terms: Box>>, - close_paren: Span<'a>, - }, - Tuple(Box>), - NamedTuple(Box>), - Map(Box>), - Blank(Span<'a>), -} - -impl AstNode for Term<'_> { - fn children(&self) -> Option> { - match self { - Term::Primitive(token) => Some(vec![token]), - Term::UniversalVariable(token) => Some(vec![token]), - Term::ExistentialVariable(token) => Some(vec![token]), - Term::Binary { - lhs, - operation, - rhs, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - vec.push(&**lhs); - vec.push(operation); - vec.push(&**rhs); - Some(vec) - } - Term::Aggregation { - operation, - open_paren, - terms, - close_paren, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - vec.push(operation); - vec.push(open_paren); - vec.push(&**terms); - vec.push(close_paren); - Some(vec) - } - // TODO: check whether directly the children or Some(vec![named_tuple]) should get returned (for fidelity in ast) - Term::Tuple(tuple) => tuple.children(), - Term::NamedTuple(named_tuple) => named_tuple.children(), - Term::Map(map) => map.children(), - Term::Blank(token) => Some(vec![token]), - } - } - - fn span(&self) -> Span { - match self { - Term::Primitive(p) => p.span(), - Term::UniversalVariable(span) => *span, - Term::ExistentialVariable(span) => *span, - Term::Binary { span, .. } => *span, - Term::Aggregation { span, .. } => *span, - Term::Tuple(tuple) => tuple.span(), - Term::NamedTuple(named_tuple) => named_tuple.span(), - Term::Map(map) => map.span(), - Term::Blank(span) => *span, - } - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - macro_rules! name { - ($name:literal) => { - format!( - "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - $name, - self.span().location_line(), - self.span().get_utf8_column(), - self.span().fragment() - ) - }; - } - match self { - Term::Primitive(_) => name!("Primitive"), - Term::UniversalVariable(_) => name!("Variable"), - Term::ExistentialVariable(_) => name!("Existential Variable"), - Term::Binary { .. } => name!("Binary Term"), - Term::Aggregation { .. } => name!("Aggregation"), - Term::Tuple(_) => name!("Tuple"), - Term::NamedTuple(_) => name!("Function"), - Term::Map(_) => name!("Map"), - Term::Blank(_) => name!("Blank"), - } - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - match self { - Term::UniversalVariable(t) => Some(( - format!("variable/{}", t.span().fragment()), - "statement".to_string(), - )), - Term::Aggregation { operation, .. } => Some(( - format!("aggregation/{}", operation.span().fragment()), - "file".to_string(), - )), - Term::NamedTuple(named_tuple) => Some(( - format!("function/{}", named_tuple.identifier.span().fragment()), - "file".to_string(), - )), - _ => None, - } - } - - fn lsp_range_to_rename(&self) -> Option { - match self { - Term::Primitive(_) => None, - Term::UniversalVariable(t) => Some(t.range()), - Term::Blank { .. } => None, - Term::ExistentialVariable(t) => Some(t.range()), - Term::Binary { .. } => None, - Term::Aggregation { operation, .. } => Some(operation.range()), - Term::Tuple(_) => None, - Term::NamedTuple(named_tuple) => Some(named_tuple.identifier.range()), - Term::Map(_map) => None, - } - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - match self { - Term::Primitive(_) => Some((String::from("Primitive term"), SymbolKind::CONSTANT)), - Term::UniversalVariable(t) => { - Some((format!("Variable: {}", t.span()), SymbolKind::VARIABLE)) - } - Term::Blank { .. } => Some((String::from("Unary prefix"), SymbolKind::VARIABLE)), - Term::ExistentialVariable { .. } => { - Some((String::from("Existential"), SymbolKind::VARIABLE)) - } - Term::Binary { .. } => Some((String::from("Binary term"), SymbolKind::OPERATOR)), - Term::Aggregation { operation, .. } => Some(( - format!("Aggregation: {}", operation.fragment()), - SymbolKind::OPERATOR, - )), - Term::Tuple(_) => Some((String::from("Tuple"), SymbolKind::ARRAY)), - Term::NamedTuple(named_tuple) => Some(( - format!("Function: {}", named_tuple.identifier.fragment()), - SymbolKind::OPERATOR, - )), - Term::Map(_map) => Some((String::from("Map"), SymbolKind::ARRAY)), - } - } -} -impl std::fmt::Display for Term<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Primitive<'a> { - Constant(Span<'a>), - PrefixedConstant { - span: Span<'a>, - prefix: Option>, - colon: Span<'a>, - constant: Span<'a>, - }, - Number { - span: Span<'a>, - sign: Option>, - before: Option>, - dot: Option>, - after: Span<'a>, - exponent: Option>, - }, - String(Span<'a>), - Iri(Span<'a>), - RdfLiteral { - span: Span<'a>, - string: Span<'a>, - carets: Span<'a>, - iri: Span<'a>, - }, -} - -impl AstNode for Primitive<'_> { - fn children(&self) -> Option> { - match self { - Primitive::Constant(token) => Some(vec![token]), - Primitive::PrefixedConstant { - prefix, - colon, - constant, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(prefix) = prefix { - vec.push(prefix); - } - vec.push(colon); - vec.push(constant); - Some(vec) - } - Primitive::Number { - sign, - before, - dot, - after, - exponent, - .. - } => { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - if let Some(s) = sign { - vec.push(s); - } - if let Some(b) = before { - vec.push(b); - } - if let Some(d) = dot { - vec.push(d); - } - vec.push(after); - if let Some(exp) = exponent { - if let Some(mut children) = exp.children() { - vec.append(&mut children); - } - } - Some(vec) - } - Primitive::String(token) => Some(vec![token]), - Primitive::Iri(token) => Some(vec![token]), - Primitive::RdfLiteral { - string, - carets, - iri, - .. - } => Some(vec![string, carets, iri]), - } - } - - fn span(&self) -> Span { - match self { - Primitive::Constant(span) => *span, - Primitive::PrefixedConstant { span, .. } => *span, - Primitive::Number { span, .. } => *span, - Primitive::String(span) => *span, - Primitive::Iri(span) => *span, - Primitive::RdfLiteral { span, .. } => *span, - } - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - macro_rules! name { - ($name:literal) => { - format!( - "{} \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - $name, - self.span().location_line(), - self.span().get_utf8_column(), - self.span().fragment() - ) - }; - } - match self { - Primitive::Constant(_) => name!("Constant"), - Primitive::PrefixedConstant { .. } => name!("Prefixed Constant"), - Primitive::Number { .. } => name!("Number"), - Primitive::String(_) => name!("String"), - Primitive::Iri(_) => name!("Iri"), - Primitive::RdfLiteral { .. } => name!("RDF Literal"), - } - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - None - } -} -impl std::fmt::Display for Primitive<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} - -#[derive(Debug, Clone, PartialEq)] -pub(crate) struct Exponent<'a> { - pub(crate) e: Span<'a>, - pub(crate) sign: Option>, - pub(crate) number: Span<'a>, -} - -impl AstNode for Exponent<'_> { - fn children(&self) -> Option> { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - vec.push(&self.e); - if let Some(s) = &self.sign { - vec.push(s); - }; - vec.push(&self.number); - Some(vec) - } - - fn span(&self) -> Span { - todo!() - } - - fn is_leaf(&self) -> bool { - todo!() - } - - fn name(&self) -> String { - todo!() - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - None - } -} - -impl std::fmt::Display for Exponent<'_> { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() - } -} diff --git a/nemo/src/io/parser/ast/tuple.rs b/nemo/src/io/parser/ast/tuple.rs deleted file mode 100644 index 1fcab841c..000000000 --- a/nemo/src/io/parser/ast/tuple.rs +++ /dev/null @@ -1,62 +0,0 @@ -use tower_lsp::lsp_types::SymbolKind; - -use super::term::Term; -use super::{ast_to_ascii_tree, AstNode, List, Position, Range, Wsoc}; -use crate::io::lexer::{Span, Token}; -use ascii_tree::write_tree; - -#[derive(Debug, Clone, PartialEq)] -pub struct Tuple<'a> { - pub span: Span<'a>, - pub open_paren: Span<'a>, - pub terms: Option>>, - pub close_paren: Span<'a>, -} - -impl AstNode for Tuple<'_> { - fn children(&self) -> Option> { - let mut vec: Vec<&dyn AstNode> = Vec::new(); - vec.push(&self.open_paren); - if let Some(terms) = &self.terms { - vec.push(terms); - } - vec.push(&self.close_paren); - Some(vec) - } - - fn span(&self) -> Span { - self.span - } - - fn is_leaf(&self) -> bool { - false - } - - fn name(&self) -> String { - format!( - "Tuple \x1b[34m@{}:{} \x1b[92m{:?}\x1b[0m", - self.span.location_line(), - self.span.get_utf8_column(), - self.span.fragment() - ) - } - - fn lsp_identifier(&self) -> Option<(String, String)> { - None - } - - fn lsp_range_to_rename(&self) -> Option { - None - } - - fn lsp_symbol_info(&self) -> Option<(String, SymbolKind)> { - None - } -} -impl std::fmt::Display for Tuple<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut output = String::new(); - write_tree(&mut output, &ast_to_ascii_tree(self))?; - write!(f, "{output}") - } -} diff --git a/nemo/src/io/parser/iri.rs b/nemo/src/io/parser/iri.rs deleted file mode 100644 index a9da28c12..000000000 --- a/nemo/src/io/parser/iri.rs +++ /dev/null @@ -1,356 +0,0 @@ -/// A Parser for RFC 3987 IRIs -use nom::{ - branch::alt, - character::complete::{digit0, one_of, satisfy}, - combinator::{opt, recognize}, - multi::{count, many0, many1, many_m_n}, - sequence::{delimited, pair, tuple}, -}; - -use super::{ - old::token, - rfc5234::{alpha, digit, hexdig}, - types::{IntermediateResult, Span}, -}; - -use macros::traced; - -#[traced("parser::iri")] -pub fn iri(input: Span) -> IntermediateResult { - recognize(tuple(( - scheme, - token(":"), - ihier_part, - opt(pair(token("?"), iquery)), - opt(pair(token("#"), ifragment)), - )))(input) -} - -#[traced("parser::iri")] -fn ihier_part(input: Span) -> IntermediateResult { - alt(( - recognize(tuple((token("//"), iauthority, ipath_abempty))), - ipath_absolute, - ipath_rootless, - ipath_empty, - ))(input) -} - -#[traced("parser::iri")] -pub fn iri_reference(input: Span) -> IntermediateResult { - alt((iri, irelative_ref))(input) -} - -#[allow(dead_code)] -#[traced("parser::iri")] -pub fn absolute_iri(input: Span) -> IntermediateResult { - recognize(tuple(( - scheme, - token(":"), - ihier_part, - opt(pair(token("?"), iquery)), - )))(input) -} - -#[traced("parser::iri")] -pub fn irelative_ref(input: Span) -> IntermediateResult { - recognize(tuple(( - irelative_part, - opt(pair(token("?"), iquery)), - opt(pair(token("#"), ifragment)), - )))(input) -} - -#[traced("parser::iri")] -fn irelative_part(input: Span) -> IntermediateResult { - recognize(alt(( - recognize(tuple((token("//"), iauthority, ipath_abempty))), - ipath_absolute, - ipath_noscheme, - ipath_empty, - )))(input) -} - -#[traced("parser::iri")] -fn iauthority(input: Span) -> IntermediateResult { - recognize(tuple(( - opt(pair(iuserinfo, token("@"))), - ihost, - opt(pair(token(":"), port)), - )))(input) -} - -#[traced("parser::iri")] -fn iuserinfo(input: Span) -> IntermediateResult { - recognize(many0(alt(( - iunreserved, - pct_encoded, - sub_delims, - token(":"), - ))))(input) -} - -#[traced("parser::iri")] -fn ihost(input: Span) -> IntermediateResult { - alt((ip_literal, ipv4_address, ireg_name))(input) -} - -#[traced("parser::iri")] -fn ireg_name(input: Span) -> IntermediateResult { - recognize(many0(alt((iunreserved, pct_encoded, sub_delims))))(input) -} - -#[allow(dead_code)] -#[traced("parser::iri")] -pub fn ipath(input: Span) -> IntermediateResult { - alt(( - ipath_abempty, - ipath_absolute, - ipath_noscheme, - ipath_rootless, - ipath_empty, - ))(input) -} - -#[traced("parser::iri")] -fn ipath_abempty(input: Span) -> IntermediateResult { - recognize(many0(pair(token("/"), isegment)))(input) -} - -#[traced("parser::iri")] -fn ipath_absolute(input: Span) -> IntermediateResult { - recognize(pair( - token("/"), - opt(pair(isegment_nz, many0(pair(token("/"), isegment)))), - ))(input) -} - -#[traced("parser::iri")] -fn ipath_noscheme(input: Span) -> IntermediateResult { - recognize(pair(isegment_nz_nc, many0(pair(token("/"), isegment))))(input) -} - -#[traced("parser::iri")] -fn ipath_rootless(input: Span) -> IntermediateResult { - recognize(pair(isegment_nz, many0(pair(token("/"), isegment))))(input) -} - -#[traced("parser::iri")] -fn ipath_empty(input: Span) -> IntermediateResult { - token("")(input) -} - -#[traced("parser::iri")] -fn isegment(input: Span) -> IntermediateResult { - recognize(many0(ipchar))(input) -} - -#[traced("parser::iri")] -fn isegment_nz(input: Span) -> IntermediateResult { - recognize(many1(ipchar))(input) -} - -#[traced("parser::iri")] -fn isegment_nz_nc(input: Span) -> IntermediateResult { - recognize(many1(alt(( - iunreserved, - pct_encoded, - sub_delims, - token("@"), - ))))(input) -} - -#[traced("parser::iri")] -fn ipchar(input: Span) -> IntermediateResult { - alt((iunreserved, pct_encoded, sub_delims, token(":"), token("@")))(input) -} - -#[traced("parser::iri")] -fn iquery(input: Span) -> IntermediateResult { - recognize(many0(alt((ipchar, iprivate, token("/"), token("?")))))(input) -} - -#[traced("parser::iri")] -fn ifragment(input: Span) -> IntermediateResult { - recognize(many0(alt((ipchar, token("/"), token("?")))))(input) -} - -#[traced("parser::iri")] -fn iunreserved(input: Span) -> IntermediateResult { - alt(( - alpha, - digit, - token("-"), - token("."), - token("_"), - token("~"), - ucschar, - ))(input) -} - -#[traced("parser::iri")] -fn ucschar(input: Span) -> IntermediateResult { - recognize(satisfy(|c| { - [ - 0xa0u32..=0xd7ff, - 0xf900..=0xfdcf, - 0xfdf0..=0xffef, - 0x10000..=0x1fffd, - 0x20000..=0x2fffd, - 0x30000..=0x3fffd, - 0x40000..=0x4fffd, - 0x50000..=0x5fffd, - 0x60000..=0x6fffd, - 0x70000..=0x7fffd, - 0x80000..=0x8fffd, - 0x90000..=0x9fffd, - 0xa0000..=0xafffd, - 0xb0000..=0xbfffd, - 0xc0000..=0xcfffd, - 0xd0000..=0xdfffd, - 0xe0000..=0xefffd, - ] - .iter() - .any(|range| range.contains(&c.into())) - }))(input) -} - -#[traced("parser::iri")] -fn iprivate(input: Span) -> IntermediateResult { - recognize(satisfy(|c| { - [0xe000u32..=0xf8ff, 0xf000..=0xffffd, 0x100000..=0x10fffd] - .iter() - .any(|range| range.contains(&c.into())) - }))(input) -} - -#[traced("parser::iri")] -fn scheme(input: Span) -> IntermediateResult { - recognize(tuple(( - alpha, - many0(alt((alpha, digit, token("+"), token("-"), token(".")))), - )))(input) -} - -#[traced("parser::iri")] -fn port(input: Span) -> IntermediateResult { - digit0(input) -} - -#[traced("parser::iri")] -fn ip_literal(input: Span) -> IntermediateResult { - delimited(token("["), alt((ipv6_address, ipv_future)), token("]"))(input) -} - -#[traced("parser::iri")] -fn ipv_future(input: Span) -> IntermediateResult { - recognize(tuple(( - token("v"), - hexdig, - token("."), - many1(alt((unreserved, sub_delims, token(":")))), - )))(input) -} - -#[traced("parser::iri")] -fn ipv6_address(input: Span) -> IntermediateResult { - let h16_colon = || pair(h16, token(":")); - alt(( - recognize(tuple((count(h16_colon(), 6), ls32))), - recognize(tuple((token("::"), count(h16_colon(), 5), ls32))), - recognize(tuple((h16, token("::"), count(h16_colon(), 4), ls32))), - recognize(tuple(( - h16_colon(), - h16, - token("::"), - count(h16_colon(), 3), - ls32, - ))), - recognize(tuple(( - count(h16_colon(), 2), - h16, - token("::"), - count(h16_colon(), 2), - ls32, - ))), - recognize(tuple(( - count(h16_colon(), 3), - h16, - token("::"), - h16_colon(), - ls32, - ))), - recognize(tuple((count(h16_colon(), 4), h16, token("::"), ls32))), - recognize(tuple((count(h16_colon(), 5), h16, token("::"), h16))), - recognize(tuple((count(h16_colon(), 6), h16, token("::")))), - ))(input) -} - -#[traced("parser::iri")] -fn h16(input: Span) -> IntermediateResult { - recognize(many_m_n(1, 4, hexdig))(input) -} - -#[traced("parser::iri")] -fn ls32(input: Span) -> IntermediateResult { - alt((recognize(tuple((h16, token(":"), h16))), ipv4_address))(input) -} - -#[traced("parser::iri")] -fn ipv4_address(input: Span) -> IntermediateResult { - recognize(tuple(( - dec_octet, - token("."), - dec_octet, - token("."), - dec_octet, - token("."), - dec_octet, - )))(input) -} - -#[traced("parser::iri")] -fn dec_octet(input: Span) -> IntermediateResult { - alt(( - digit, // 0-9 - recognize(tuple(( - // 10-99 - satisfy(|c| ('1'..='9').contains(&c)), - digit, - ))), - recognize(tuple((token("1"), digit, digit))), // 100-199 - recognize(tuple(( - // 200-249 - token("2"), - satisfy(|c| ('0'..='4').contains(&c)), - digit, - ))), - recognize(tuple((token("25"), satisfy(|c| ('0'..='5').contains(&c))))), // 250-255 - ))(input) -} - -#[traced("parser::iri")] -fn pct_encoded(input: Span) -> IntermediateResult { - recognize(tuple((token("%"), hexdig, hexdig)))(input) -} - -#[traced("parser::iri")] -fn unreserved(input: Span) -> IntermediateResult { - alt((alpha, digit, recognize(one_of(r#"-._~"#))))(input) -} - -#[allow(dead_code)] -#[traced("parser::iri")] -pub fn reserved(input: Span) -> IntermediateResult { - alt((gen_delims, sub_delims))(input) -} - -#[traced("parser::iri")] -fn gen_delims(input: Span) -> IntermediateResult { - recognize(one_of(r#":/?#[]@"#))(input) -} - -#[traced("parser::iri")] -fn sub_delims(input: Span) -> IntermediateResult { - recognize(one_of(r#"!$&'()*+,;="#))(input) -} diff --git a/nemo/src/io/parser/rfc5234.rs b/nemo/src/io/parser/rfc5234.rs deleted file mode 100644 index 87def52f0..000000000 --- a/nemo/src/io/parser/rfc5234.rs +++ /dev/null @@ -1,25 +0,0 @@ -use nom::{ - branch::alt, - character::complete::{one_of, satisfy}, - combinator::recognize, -}; - -/// Parsers defined in RFC 5234 -use super::types::{IntermediateResult, Span}; - -use macros::traced; - -#[traced("parser::rfc5234")] -pub(super) fn alpha<'a>(input: Span<'a>) -> IntermediateResult> { - recognize(satisfy(|c| c.is_ascii_alphabetic()))(input) -} - -#[traced("parser::rfc5234")] -pub(super) fn digit<'a>(input: Span<'a>) -> IntermediateResult> { - recognize(satisfy(|c| c.is_ascii_digit()))(input) -} - -#[traced("parser::rfc5234")] -pub(super) fn hexdig<'a>(input: Span<'a>) -> IntermediateResult> { - alt((digit, recognize(one_of("ABCDEF"))))(input) -} diff --git a/nemo/src/io/parser/sparql.rs b/nemo/src/io/parser/sparql.rs deleted file mode 100644 index 5084d1dc8..000000000 --- a/nemo/src/io/parser/sparql.rs +++ /dev/null @@ -1,187 +0,0 @@ -//! Parsers for productions from the SPARQL 1.1 grammar. -use std::fmt::Display; - -use nom::{ - branch::alt, - character::complete::{one_of, satisfy}, - combinator::{map, opt, recognize}, - multi::{many0, many1, separated_list0}, - sequence::{delimited, pair, preceded, terminated, tuple}, -}; - -use super::{ - iri, - old::map_error, - old::token, - old::ParseError, - rfc5234::digit, - turtle::hex, - types::{IntermediateResult, Span}, -}; - -use macros::traced; - -#[derive(Debug)] -#[allow(clippy::enum_variant_names)] // `PrefixedName` comes from the SPARQL grammar -pub enum Name<'a> { - IriReference(&'a str), - PrefixedName { prefix: &'a str, local: &'a str }, - BlankNode(&'a str), -} - -impl Display for Name<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match *self { - Name::IriReference(iri) => write!(f, "{iri}"), - Name::PrefixedName { prefix, local } => write!(f, "{prefix}:{local}"), - Name::BlankNode(label) => write!(f, "_:{label}"), - } - } -} - -/// Parse an IRI reference, i.e., an IRI (relative or absolute) -/// wrapped in angle brackets. Roughly equivalent to the -/// [IRIREF](https://www.w3.org/TR/sparql11-query/#rIRIREF) -/// production of the SPARQL 1.1 grammar, but uses the full [RFC -/// 3987](https://www.ietf.org/rfc/rfc3987.txt) grammar to verify -/// the actual IRI. -#[traced("parser::sparql")] -pub fn iriref(input: Span) -> IntermediateResult { - map_error( - delimited(token("<"), iri::iri_reference, token(">")), - || ParseError::ExpectedIriref, - )(input) -} - -#[traced("parser::sparql")] -pub fn iri(input: Span) -> IntermediateResult { - alt((map(iriref, |name| Name::IriReference(&name)), prefixed_name))(input) -} - -#[traced("parser::sparql")] -pub fn pname_ns(input: Span) -> IntermediateResult { - let (rest, prefix) = map_error(terminated(opt(pn_prefix), token(":")), || { - ParseError::ExpectedPnameNs - })(input)?; - - Ok((rest, prefix.unwrap_or("".into()))) -} - -#[traced("parser::sparql")] -pub fn pn_chars_base(input: Span) -> IntermediateResult { - recognize(satisfy(|c| { - [ - 0x41_u32..=0x5A, - 0x61..=0x7A, - 0x00C0..=0x0D6, - 0x0D8..=0x0F6, - 0x00F8..=0x2FF, - 0x0370..=0x037D, - 0x037F..=0x1FFF, - 0x200C..=0x200D, - 0x2070..=0x218F, - 0x2C00..=0x2FEF, - 0x3001..=0xD7FF, - 0xF900..=0xFDCF, - 0xFDF0..=0xFFFD, - 0x10000..=0xEFFFF, - ] - .iter() - .any(|range| range.contains(&c.into())) - }))(input) -} - -#[traced("parser::sparql")] -pub fn pn_chars_u(input: Span) -> IntermediateResult { - alt((pn_chars_base, token("_")))(input) -} - -#[traced("parser::sparql")] -pub fn pn_chars(input: Span) -> IntermediateResult { - alt(( - pn_chars_u, - token("-"), - digit, - token("\u{00B7}"), - recognize(satisfy(|c| { - [0x0300u32..=0x036F, 0x203F..=0x2040] - .iter() - .any(|range| range.contains(&c.into())) - })), - ))(input) -} - -#[traced("parser::sparql")] -pub fn pn_prefix(input: Span) -> IntermediateResult { - recognize(tuple(( - pn_chars_base, - separated_list0(many1(token(".")), many0(pn_chars)), - )))(input) -} - -#[traced("parser::sparql")] -pub fn percent(input: Span) -> IntermediateResult { - recognize(tuple((token("%"), hex, hex)))(input) -} - -#[traced("parser::sparql")] -pub fn pn_local_esc(input: Span) -> IntermediateResult { - recognize(preceded(token(r"\"), one_of(r#"_~.-!$&'()*+,;=/?#@%"#)))(input) -} - -#[traced("parser::sparql")] -pub fn plx(input: Span) -> IntermediateResult { - alt((percent, pn_local_esc))(input) -} - -#[traced("parser::sparql")] -pub fn pn_local(input: Span) -> IntermediateResult { - recognize(pair( - alt((pn_chars_u, token(":"), digit, plx)), - opt(separated_list0( - many1(token(".")), - many0(alt((pn_chars, token(":"), plx))), - )), - ))(input) -} - -#[traced("parser::sparql")] -pub fn pname_ln(input: Span) -> IntermediateResult { - map(pair(pname_ns, pn_local), |(prefix, local)| { - Name::PrefixedName { - prefix: &prefix, - local: &local, - } - })(input) -} - -#[traced("parser::sparql")] -pub fn prefixed_name(input: Span) -> IntermediateResult { - map_error( - alt(( - pname_ln, - map(pname_ns, |prefix| Name::PrefixedName { - prefix: &prefix, - local: "", - }), - )), - || ParseError::ExpectedPrefixedName, - )(input) -} - -#[traced("parser::sparql")] -pub fn blank_node_label(input: Span) -> IntermediateResult { - map_error( - preceded( - token("_:"), - map( - recognize(pair( - alt((pn_chars_u, digit)), - opt(separated_list0(many1(token(".")), many0(pn_chars))), - )), - |name| Name::BlankNode(&name), - ), - ), - || ParseError::ExpectedBlankNodeLabel, - )(input) -} diff --git a/nemo/src/io/parser/turtle.rs b/nemo/src/io/parser/turtle.rs deleted file mode 100644 index ffcaab5ce..000000000 --- a/nemo/src/io/parser/turtle.rs +++ /dev/null @@ -1,186 +0,0 @@ -//! Parsers for productions from the RDF 1.1 Turtle grammar. - -use nom::{ - branch::alt, - character::complete::{alpha1, alphanumeric1, digit0, digit1, none_of, one_of}, - combinator::{cut, map, map_res, opt, recognize}, - multi::{count, many0}, - sequence::{delimited, pair, preceded, tuple}, -}; - -use macros::traced; - -use nemo_physical::datavalues::AnyDataValue; - -use super::{ - old::map_error, - old::token, - old::ParseError, - sparql::{iri, Name}, - types::{IntermediateResult, Span}, -}; - -/// Characters requiring escape sequences in single-line string literals. -const REQUIRES_ESCAPE: &str = "\u{22}\u{5C}\u{0A}\u{0D}"; - -/// Valid hexadecimal digits. -const HEXDIGIT: &str = "0123456789ABCDEFabcdef"; - -#[traced("parser::turtle")] -pub fn string(input: Span) -> IntermediateResult { - map_error( - alt(( - string_literal_long_quote, - string_literal_long_single_quote, - string_literal_quote, - string_literal_single_quote, - )), - || ParseError::ExpectedStringLiteral, - )(input) -} - -#[traced("parser::turtle")] -pub fn string_literal_quote(input: Span) -> IntermediateResult { - delimited( - token(r#"""#), - cut(recognize(many0(alt(( - recognize(none_of(REQUIRES_ESCAPE)), - echar, - uchar, - ))))), - token(r#"""#), - )(input) -} - -#[traced("parser::turtle")] -pub fn string_literal_single_quote(input: Span) -> IntermediateResult { - delimited( - token("'"), - cut(recognize(many0(alt(( - recognize(none_of(REQUIRES_ESCAPE)), - echar, - uchar, - ))))), - token("'"), - )(input) -} - -#[traced("parser::turtle")] -pub fn string_literal_long_single_quote(input: Span) -> IntermediateResult { - delimited( - token("'''"), - cut(recognize(many0(alt(( - recognize(none_of(r"'\")), - echar, - uchar, - ))))), - token("'''"), - )(input) -} - -#[traced("parser::turtle")] -pub fn string_literal_long_quote(input: Span) -> IntermediateResult { - delimited( - token(r#"""""#), - cut(recognize(many0(alt(( - recognize(none_of(r#""\"#)), - echar, - uchar, - ))))), - token(r#"""""#), - )(input) -} - -#[traced("parser::turtle")] -pub fn hex(input: Span) -> IntermediateResult { - recognize(one_of(HEXDIGIT))(input) -} - -#[traced("parser::turtle")] -pub fn uchar(input: Span) -> IntermediateResult { - recognize(alt(( - preceded(token(r"\u"), count(hex, 4)), - preceded(token(r"\U"), count(hex, 8)), - )))(input) -} - -#[traced("parser::turtle")] -pub fn echar(input: Span) -> IntermediateResult { - recognize(preceded(token(r"\"), one_of(r#"tbnrf"'\"#)))(input) -} - -#[traced("parser::turtle")] -pub fn sign(input: Span) -> IntermediateResult { - recognize(one_of("+-"))(input) -} - -#[traced("parser::turtle")] -pub fn integer(input: Span) -> IntermediateResult { - map_res(recognize(preceded(opt(sign), digit1)), |value| { - value.parse().map(AnyDataValue::new_integer_from_i64) - })(input) -} - -#[traced("parser::turtle")] -pub fn exponent(input: Span) -> IntermediateResult { - recognize(tuple((one_of("eE"), opt(sign), digit1)))(input) -} - -#[traced("parser::turtle")] -pub fn double(input: Span) -> IntermediateResult { - map_res( - recognize(preceded( - opt(sign), - alt(( - recognize(tuple((digit0, token("."), digit1, exponent))), - recognize(tuple((digit0, token("."), digit1))), - recognize(pair(digit1, exponent)), - )), - )), - |value| value.parse().map(AnyDataValue::new_double_from_f64)?, - )(input) -} - -#[traced("parser::turtle")] -pub fn numeric_literal(input: Span) -> IntermediateResult { - alt((double, integer))(input) -} - -#[derive(Debug)] -pub(super) enum RdfLiteral<'a> { - LanguageString { value: &'a str, tag: &'a str }, - DatatypeValue { value: &'a str, datatype: Name<'a> }, -} - -#[traced("parser::turtle")] -pub(super) fn rdf_literal<'a>(input: Span<'a>) -> IntermediateResult> { - let (remainder, value) = string(input)?; - let (remainder, literal) = alt(( - map(langtoken, |tag| RdfLiteral::LanguageString { - value: &value, - tag: &tag, - }), - map(preceded(token("^^"), iri), |datatype| { - RdfLiteral::DatatypeValue { - value: &value, - datatype, - } - }), - ))(remainder)?; - - Ok((remainder, literal)) -} - -#[traced("parser::turtle")] -pub fn langtoken(input: Span) -> IntermediateResult { - preceded( - token("@"), - recognize(tuple((alpha1, many0(preceded(token("-"), alphanumeric1))))), - )(input) -} - -#[allow(dead_code)] -#[traced("parser::turtle")] -pub fn boolean_literal(input: Span) -> IntermediateResult { - alt((map(token("true"), |_| true), map(token("false"), |_| false)))(input) -} diff --git a/nemo/src/io/parser/types.rs b/nemo/src/io/parser/types.rs deleted file mode 100644 index 54f3f2392..000000000 --- a/nemo/src/io/parser/types.rs +++ /dev/null @@ -1,669 +0,0 @@ -use std::{ - num::{ParseFloatError, ParseIntError}, - ops::Range, - str::{CharIndices, Chars}, -}; - -use nemo_physical::datavalues::DataValueCreationError; -use nom::{ - error::{ErrorKind, FromExternalError}, - AsBytes, IResult, InputIter, InputLength, InputTake, InputTakeAtPosition, -}; -use nom_locate::LocatedSpan; -use thiserror::Error; - -use crate::{ - io::formats::import_export::ImportExportError, - io::lexer::ParserState, - model::rule_model::{Aggregate, Constraint, Literal, Term, Variable}, -}; - -/// A [LocatedSpan] over the input. -pub(super) type Span<'a> = LocatedSpan<&'a str>; - -/// Create a [Span][nom_locate::LocatedSpan] over the input. -pub fn span_from_str(input: &str) -> Span<'_> { - Span::new(input) -} - -/// An intermediate parsing result -pub(super) type IntermediateResult<'a, T> = IResult, T, LocatedParseError>; - -/// The result of a parse -pub type ParseResult<'a, T> = Result; - -/// A [ParseError] at a certain location -#[derive(Debug, Error)] -#[error("Parse error on line {}, column {}: {}\nat {}{}", .line, .column, .source, .fragment, format_parse_error_context(.context))] -pub struct LocatedParseError { - #[source] - pub source: ParseError, - pub line: u32, - pub column: usize, - pub fragment: String, - pub context: Vec, -} - -impl LocatedParseError { - /// Append another [LocatedParseError] as context to this error. - pub fn append(&mut self, other: LocatedParseError) { - self.context.push(other) - } -} - -fn format_parse_error_context(context: &[LocatedParseError]) -> String { - let mut fragments = Vec::new(); - - for error in context { - let error_string = format!("{error}"); - for line in error_string.split('\n') { - fragments.push(format!("{}{line}", " ".repeat(2))); - } - } - - if fragments.is_empty() { - String::new() - } else { - format!("\nContext:\n{}", fragments.join("\n")) - } -} - -/// Body may contain literals or filter expressions -#[derive(Debug, Clone)] -pub enum BodyExpression { - /// Literal - Literal(Literal), - /// Constraint - Constraint(Constraint), -} - -/// Different operators allows in a constraint. -/// Has one entry for every variant in [Constraint]. -#[derive(Debug, Clone, Copy)] -pub enum ConstraintOperator { - /// Two terms are equal. - Equals, - /// Two terms are unequal. - Unequals, - /// Value of the left term is less than the value of the right term. - LessThan, - /// Value of the left term is greater than the value of the right term. - GreaterThan, - /// Value of the left term is less than or equal to the value of the right term. - LessThanEq, - /// Value of the left term is greater than or equal to the value of the right term. - GreaterThanEq, -} - -impl ConstraintOperator { - /// Turn operator into [Constraint]. - pub(crate) fn into_constraint(self, left: Term, right: Term) -> Constraint { - match self { - ConstraintOperator::Equals => Constraint::Equals(left, right), - ConstraintOperator::Unequals => Constraint::Unequals(left, right), - ConstraintOperator::LessThan => Constraint::LessThan(left, right), - ConstraintOperator::GreaterThan => Constraint::GreaterThan(left, right), - ConstraintOperator::LessThanEq => Constraint::LessThanEq(left, right), - ConstraintOperator::GreaterThanEq => Constraint::GreaterThanEq(left, right), - } - } -} - -/// Defines arithmetic operators -#[derive(Debug, Clone, Copy)] -pub(super) enum ArithmeticOperator { - Addition, - Subtraction, - Multiplication, - Division, -} - -/// Errors that can occur during parsing. -#[derive(Debug, Error)] -pub enum ParseError { - /// An external error during parsing. - #[error(transparent)] - ExternalError(#[from] Box), - /// An error related to a file format. - #[error(r#"unknown file format "{0}""#)] - FileFormatError(String), - /// A syntax error. Note that we cannot take [&'a str] here, as - /// bounds on [std::error::Error] require ['static] lifetime. - #[error("syntax error: {0}")] - SyntaxError(String), - /// More input needed. - #[error("expected further input: {0}")] - MissingInput(String), - /// Use of an undeclared prefix. - #[error(r#"undeclared prefix "{0}""#)] - UndeclaredPrefix(String), - /// Re-declared prefix - #[error(r#"prefix "{0}" re-declared"#)] - RedeclaredPrefix(String), - /// An existentially quantified variable occurs in the body of a rule. - #[error(r#"variable "{0}" occurs existentially quantified in the rule body"#)] - BodyExistential(Variable), - /// A wildcard pattern was used inside of the rule head. - #[error(r#"rule head must not contain unnamed variables "_""#)] - UnnamedInHead, - /// The universal variable is not safe or derived. - #[error(r#"variable "{0}" appears in the head but cannot be derived from the body"#)] - UnsafeHeadVariable(Variable), - /// Complex term uses an unsafe variable. - #[error(r#"the value of variable "{1}" contained in term "{0}" cannot be derived"#)] - UnsafeComplexTerm(String, Variable), - /// The unsafe variable appears in multiple negative body literals. - #[error(r#"the unsafe variable "{0}" appears in multiple negative body literals"#)] - UnsafeVariableInMultipleNegativeLiterals(Variable), - /// Constraint on unsafe unsafe variable may only use variables from that negated literal - #[error(r#"Term "{0}" uses variable {1} from negated literal {2} but also the variable {3}, which does not appear in it."#)] - ConstraintOutsideVariable(String, Variable, String, Variable), - /// An aggregate term occurs in the body of a rule. - #[error(r#"An aggregate term ("{0}") occurs in the body of a rule"#)] - AggregateInBody(Aggregate), - /// Multiple aggregates in one rule - #[error("Currently, only one aggregate per rule is supported.")] - MultipleAggregates, - /// Aggregates cannot be used within existential rules - #[error("Aggregates may not appear in existential rules.")] - AggregatesPlusExistentials, - /// A variable is both existentially and universally quantified - #[error(r#"variables named "{0}" occur with existential and universal quantification"#)] - BothQuantifiers(String), - /// An RDF data source declaration has arity != 3. - #[error( - r#"RDF data source for predicate "{0}" (from "{1}") has invalid arity {2}, should be 3"# - )] - RdfSourceInvalidArity(String, String, usize), - /// A SPARQL query data source has an arity that doesn't match the number of variables given. - #[error( - r#"SPARQL data source for predicate "{0}" has arity {1}, but {2} variables are given"# - )] - SparqlSourceInvalidArity(String, usize, usize), - /// SPARQL query data sources are currently not supported. - #[error(r#"SPARQL data source for predicate "{0}" is not yet implemented"#)] - UnsupportedSparqlSource(String), - /// Expected a dot. - #[error(r#"Expected "{0}""#)] - ExpectedToken(String), - /// Expected an Iriref. - #[error("Expected an IRI")] - ExpectedIriref, - /// Expected a base declaration. - #[error(r#"Expected a "@base" declaration"#)] - ExpectedBaseDeclaration, - /// Expected a prefix declaration. - #[error(r#"Expected a "@prefix" declaration"#)] - ExpectedPrefixDeclaration, - /// Expected a predicate declaration. - #[error(r#"Expected a "@declare" type declaration"#)] - ExpectedPredicateDeclaration, - /// Expected a prefix. - #[error(r#"Expected a prefix"#)] - ExpectedPnameNs, - /// Expected a logical type name. - #[error("Expected a type name")] - ExpectedLogicalTypeName, - /// Expected a data source declaration. - #[error(r#"Expected a "@source" declaration"#)] - ExpectedDataSourceDeclaration, - /// Expected an output declaration. - #[error(r#"Expected an "@output" declaration"#)] - ExpectedOutputDeclaration, - /// Expected a string literal. - #[error("Expected a string literal")] - ExpectedStringLiteral, - /// Expected a statement. - #[error("Expected a statement (i.e., either a fact or a rule)")] - ExpectedStatement, - /// Expected a fact. - #[error("Expected a fact")] - ExpectedFact, - /// Expected a rule. - #[error("Expected a rule")] - ExpectedRule, - /// Expected a prefixed name. - #[error("Expected a prefixed name")] - ExpectedPrefixedName, - /// Expected a blank node label. - #[error("Expected a blank node label")] - ExpectedBlankNodeLabel, - /// Expected an IRI identifier. - #[error("Expected an IRI identifier (for e.g. predicate names or functions in term trees)")] - ExpectedIriIdentifier, - /// Expected an IRI as a constant. - #[error("Expected an IRI as a constant name")] - ExpectedIriConstant, - /// Expected an IRI-like identifier. - #[error( - "Expected an IRI-like identifier (for e.g. predicate names or functions in term trees)." - )] - ExpectedIriLikeIdentifier, - /// Expected a bare predicate name. - #[error("Expected a bare name")] - ExpectedBareName, - /// Expected a ground term. - #[error("Expected a ground term")] - ExpectedGroundTerm, - /// Expected an atom. - #[error("Expected an atom")] - ExpectedAtom, - /// Expected a term. - #[error("Expected a term")] - ExpectedTerm, - /// Expected a variable. - #[error("Expected a variable")] - ExpectedVariable, - /// Expected a universally quantified variable. - #[error("Expected a universally quantified variable")] - ExpectedUniversalVariable, - /// Expected an existentially quantified variable. - #[error("Expected an existentially quantified variable")] - ExpectedExistentialVariable, - /// Expected a variable name. - #[error("Expected a variable name")] - ExpectedVariableName, - /// Expected a literal. - #[error("Expected a literal")] - ExpectedLiteral, - /// Expected a positive literal. - #[error("Expected a positive literal")] - ExpectedPositiveLiteral, - /// Expected a negative literal. - #[error("Expected a negative literal")] - ExpectedNegativeLiteral, - /// Expected a filter operator. - #[error("Expected a filter operator")] - ExpectedFilterOperator, - /// Expected a constraint. - #[error("Expected a constraint")] - ExpectedConstraint, - /// Expected a body expression. - #[error("Expected a literal or a filter expression")] - ExpectedBodyExpression, - /// Expected an arithmetic expression. - #[error("Expected an arithmetic expression")] - ExpectedArithmeticExpression, - /// Expected an arithmetic product expression. - #[error("Expected an arithmetic product expression")] - ExpectedArithmeticProduct, - /// Expected an arithmetic factor expression. - #[error("Expected an arithmetic factor expression")] - ExpectedArithmeticFactor, - /// Encountered a base declaration after any other directive. - #[error("A @base declaration can only be the first statement in the program")] - LateBaseDeclaration, - /// Encountered a prefix declaration after any non-base non-prefix directive. - #[error("A @prefix declaration must occur before any non-@base non-@prefix declarations.")] - LatePrefixDeclaration, - /// Expected a function term - #[error("Expected a function term")] - ExpectedFunctionTerm, - /// Expected a known unary function - #[error("Expected a known unary function")] - ExpectedUnaryFunction, - /// Expected a term tree (i.e. a term that can additionally involve e.g. arithmetic operations) - #[error("Expected a term tree (i.e. a term that can additionally involve e.g. arithmetic operations)")] - ExpectedPrimitiveTerm, - /// Expected an aggregate - #[error("Expected an aggregate term")] - ExpectedAggregate, - /// Expected an parenthesised expression. - #[error("Expected an parenthesised expression")] - ExpectedParenthesisedExpression, - /// Expected an parenthesised term tree. - #[error("Expected an parenthesised term tree")] - ExpectedParenthesisedTerm, - /// Unknown aggregate operation - #[error(r#"Aggregate operation "{0}" is not known"#)] - UnknownAggregateOperation(String), -} - -impl ParseError { - /// Locate this error by adding a position. - pub fn at(self, position: Span) -> LocatedParseError { - // miri doesn't like nom_locate, cf. https://github.com/fflorent/nom_locate/issues/88 - let column = if cfg!(not(miri)) { - position.naive_get_utf8_column() - } else { - 0 - }; - let fragment = if position.is_empty() { - String::new() - } else { - let line = if cfg!(not(miri)) { - String::from_utf8(position.get_line_beginning().to_vec()) - .expect("input is valid UTF-8") - } else { - String::new() - }; - format!("\"{line}\"\n{}^", "-".repeat(3 + column)) - }; - - LocatedParseError { - source: self, - line: position.location_line(), - column, - fragment, - context: Vec::new(), - } - } -} - -impl From> for LocatedParseError { - fn from(err: nom::Err) -> Self { - match err { - nom::Err::Incomplete(_) => todo!(), - nom::Err::Error(_) => todo!(), - nom::Err::Failure(_) => todo!(), - } - } -} - -impl From> for crate::error::Error { - fn from(err: nom::Err) -> Self { - crate::error::Error::ParseError(LocatedParseError::from(err)) - } -} - -impl nom::error::ParseError> for LocatedParseError { - fn from_error_kind(input: Span, kind: ErrorKind) -> Self { - ParseError::SyntaxError(kind.description().to_string()).at(input) - } - - fn append(input: Span, kind: ErrorKind, other: Self) -> Self { - let mut error = ParseError::SyntaxError(kind.description().to_string()).at(input); - error.append(other); - error - } -} - -impl FromExternalError, crate::error::Error> for LocatedParseError { - fn from_external_error(input: Span, _kind: ErrorKind, e: crate::error::Error) -> Self { - ParseError::ExternalError(Box::new(e)).at(input) - } -} - -impl FromExternalError, ParseError> for LocatedParseError { - fn from_external_error(input: Span<'_>, kind: ErrorKind, e: ParseError) -> Self { - let mut err = >>::from_error_kind(input, kind); - err.append(e.at(input)); - err - } -} - -impl FromExternalError, ParseIntError> for LocatedParseError { - fn from_external_error(input: Span, _kind: ErrorKind, e: ParseIntError) -> Self { - ParseError::ExternalError(Box::new(crate::error::ReadingError::into(e.into()))).at(input) - } -} - -impl FromExternalError, ParseFloatError> for LocatedParseError { - fn from_external_error(input: Span, _kind: ErrorKind, e: ParseFloatError) -> Self { - ParseError::ExternalError(Box::new(crate::error::ReadingError::into(e.into()))).at(input) - } -} - -impl FromExternalError, crate::error::ReadingError> for LocatedParseError { - fn from_external_error( - input: Span<'_>, - _kind: ErrorKind, - e: crate::error::ReadingError, - ) -> Self { - ParseError::ExternalError(Box::new(e.into())).at(input) - } -} - -impl FromExternalError, ImportExportError> for LocatedParseError { - fn from_external_error(input: Span<'_>, _kind: ErrorKind, e: ImportExportError) -> Self { - ParseError::ExternalError(Box::new(e.into())).at(input) - } -} - -impl FromExternalError, DataValueCreationError> for LocatedParseError { - fn from_external_error(input: Span<'_>, _kind: ErrorKind, e: DataValueCreationError) -> Self { - ParseError::ExternalError(Box::new(e.into())).at(input) - } -} - -#[derive(Debug, Clone, Copy)] -pub(crate) struct Input<'a, 's> { - pub(crate) input: crate::io::lexer::Span<'a>, - pub(crate) parser_state: ParserState<'s>, -} -impl<'a, 's> Input<'a, 's> { - pub(crate) fn new(input: &'a str, errors: ParserState<'s>) -> Input<'a, 's> { - Input { - input: Span::new(input), - parser_state: errors, - } - } -} -impl ToRange for Input<'_, '_> { - fn to_range(&self) -> Range { - self.input.to_range() - } -} - -impl AsBytes for Input<'_, '_> { - fn as_bytes(&self) -> &[u8] { - self.input.fragment().as_bytes() - } -} - -impl<'a, 's> nom::Compare> for Input<'a, 's> { - fn compare(&self, t: Input) -> nom::CompareResult { - self.input.compare(t.as_bytes()) - } - - fn compare_no_case(&self, t: Input) -> nom::CompareResult { - self.input.compare_no_case(t.as_bytes()) - } -} -impl nom::Compare<&str> for Input<'_, '_> { - fn compare(&self, t: &str) -> nom::CompareResult { - self.input.compare(t) - } - - fn compare_no_case(&self, t: &str) -> nom::CompareResult { - self.input.compare_no_case(t) - } -} - -impl nom::ExtendInto for Input<'_, '_> { - type Item = char; - - type Extender = String; - - fn new_builder(&self) -> Self::Extender { - self.input.new_builder() - } - - fn extend_into(&self, acc: &mut Self::Extender) { - self.input.extend_into(acc) - } -} - -impl nom::FindSubstring<&str> for Input<'_, '_> { - fn find_substring(&self, substr: &str) -> Option { - self.input.find_substring(substr) - } -} - -impl<'a, 'e, T> nom::FindToken for Input<'a, 'e> -where - &'a str: nom::FindToken, -{ - fn find_token(&self, token: T) -> bool { - self.input.find_token(token) - } -} - -impl<'a, 's> InputIter for Input<'a, 's> { - type Item = char; - - type Iter = CharIndices<'a>; - - type IterElem = Chars<'a>; - - fn iter_indices(&self) -> Self::Iter { - todo!() - } - - fn iter_elements(&self) -> Self::IterElem { - todo!() - } - - fn position

(&self, _predicate: P) -> Option - where - P: Fn(Self::Item) -> bool, - { - todo!() - } - - fn slice_index(&self, count: usize) -> Result { - self.input.slice_index(count) - } -} - -impl InputLength for Input<'_, '_> { - fn input_len(&self) -> usize { - self.input.input_len() - } -} - -impl InputTake for Input<'_, '_> { - fn take(&self, count: usize) -> Self { - Input { - input: self.input.take(count), - parser_state: self.parser_state, - } - } - - fn take_split(&self, count: usize) -> (Self, Self) { - let (first, second) = self.input.take_split(count); - ( - Input { - input: first, - parser_state: self.parser_state, - }, - Input { - input: second, - parser_state: self.parser_state, - }, - ) - } -} - -impl InputTakeAtPosition for Input<'_, '_> { - type Item = char; - - fn split_at_position>( - &self, - predicate: P, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - match self.input.position(predicate) { - Some(n) => Ok(self.take_split(n)), - None => Err(nom::Err::Incomplete(nom::Needed::new(1))), - } - } - - fn split_at_position1>( - &self, - _predicate: P, - _e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - todo!() - } - - fn split_at_position_complete>( - &self, - predicate: P, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - match self.split_at_position(predicate) { - Err(nom::Err::Incomplete(_)) => Ok(self.take_split(self.input_len())), - res => res, - } - } - - fn split_at_position1_complete>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool, - { - match self.input.fragment().position(predicate) { - Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))), - Some(n) => Ok(self.take_split(n)), - None => { - if self.input.fragment().input_len() == 0 { - Err(nom::Err::Error(E::from_error_kind(*self, e))) - } else { - Ok(self.take_split(self.input_len())) - } - } - } - } -} - -impl nom::Offset for Input<'_, '_> { - fn offset(&self, second: &Self) -> usize { - self.input.offset(&second.input) - } -} - -impl nom::ParseTo for Input<'_, '_> { - fn parse_to(&self) -> Option { - todo!() - } -} - -impl<'a, 'e, R> nom::Slice for Input<'a, 'e> -where - &'a str: nom::Slice, -{ - fn slice(&self, range: R) -> Self { - Input { - input: self.input.slice(range), - parser_state: self.parser_state, - } - } -} - -impl nom_greedyerror::Position for Input<'_, '_> { - fn position(&self) -> usize { - nom_greedyerror::Position::position(&self.input) - } -} - -impl std::fmt::Display for Input<'_, '_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "line {}, column {}", - self.input.location_line(), - self.input.get_utf8_column() - ) - } -} - -impl nom_supreme::context::ContextError for Input<'_, '_> { - fn add_context(_location: I, _ctx: C, _other: Self) -> Self { - todo!() - } -} - -pub(crate) trait ToRange { - fn to_range(&self) -> Range; -} diff --git a/nemo/src/io/resource_providers.rs b/nemo/src/io/resource_providers.rs index c6f944028..3a5180401 100644 --- a/nemo/src/io/resource_providers.rs +++ b/nemo/src/io/resource_providers.rs @@ -5,7 +5,7 @@ use std::{io::BufRead, path::PathBuf, rc::Rc}; // use crate::io::parser::{all_input_consumed, iri::iri}; use nemo_physical::{error::ReadingError, resource::Resource}; -use super::compression_format::CompressionFormat; +use crate::rule_model::components::import_export::compression::CompressionFormat; /// A resource provider for files. pub mod file; diff --git a/nemo/src/io/resource_providers/file.rs b/nemo/src/io/resource_providers/file.rs index 356de0176..52962c1bd 100644 --- a/nemo/src/io/resource_providers/file.rs +++ b/nemo/src/io/resource_providers/file.rs @@ -7,7 +7,7 @@ use std::{ use nemo_physical::{error::ReadingError, resource::Resource}; use path_slash::PathBufExt; -use crate::io::compression_format::CompressionFormat; +use crate::rule_model::components::import_export::compression::CompressionFormat; use super::{is_iri, ResourceProvider}; diff --git a/nemo/src/io/resource_providers/http.rs b/nemo/src/io/resource_providers/http.rs index b78345a65..0d2a2c356 100644 --- a/nemo/src/io/resource_providers/http.rs +++ b/nemo/src/io/resource_providers/http.rs @@ -2,7 +2,7 @@ use std::io::{BufRead, BufReader, Read}; use nemo_physical::{error::ReadingError, resource::Resource}; -use crate::io::compression_format::CompressionFormat; +use crate::rule_model::components::import_export::compression::CompressionFormat; use super::{is_iri, ResourceProvider}; diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index ecf384c66..f5d12aab3 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -30,10 +30,10 @@ pub mod parser; pub mod syntax; pub mod execution; -pub mod model; pub mod rule_model; pub mod util; +mod chase_model; mod program_analysis; mod table_manager; diff --git a/nemo/src/model.rs b/nemo/src/model.rs deleted file mode 100644 index fd3719a18..000000000 --- a/nemo/src/model.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! The rule models. - -/// Defines the rule model variant used during the chase computation. -pub mod chase_model; - -/// Defines the "official" rule model that we expose to the outside -/// world. Allows faithful serialization of parsed rule programs. -pub mod rule_model; - -/// Forward everything to the rule model. -pub use rule_model::*; - -/// Map from variables to terms -pub type VariableAssignment = std::collections::HashMap; diff --git a/nemo/src/model/rule_model/program.rs b/nemo/src/model/rule_model/program.rs deleted file mode 100644 index 0474c0e63..000000000 --- a/nemo/src/model/rule_model/program.rs +++ /dev/null @@ -1,270 +0,0 @@ -use std::collections::{HashMap, HashSet}; - -use crate::model::{ExportDirective, ImportDirective}; - -use super::{Atom, Identifier, Rule}; - -/// A (ground) fact. -#[derive(Debug, Eq, PartialEq, Clone)] -pub struct Fact(pub Atom); - -impl std::fmt::Display for Fact { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - -/// A statement that can occur in the program. -#[derive(Debug, Eq, PartialEq, Clone)] -pub enum Statement { - /// A fact. - Fact(Fact), - /// A rule. - Rule(Rule), -} - -/// A complete program. -#[derive(Debug, Default, Clone)] -pub struct Program { - base: Option, - prefixes: HashMap, - rules: Vec, - facts: Vec, - imports: Vec, - exports: Vec, - output_predicates: Vec, -} - -/// A Builder for a program. -#[derive(Debug, Default)] -pub struct ProgramBuilder { - program: Program, -} - -impl ProgramBuilder { - /// Construct a new builder. - pub fn new() -> Self { - Default::default() - } - - /// Construct a [Program] from this builder. - pub fn build(self) -> Program { - self.program - } - - /// Set the base IRI. - pub fn base(mut self, base: String) -> Self { - self.program.base = Some(base); - self - } - - /// Add a prefix. - pub fn prefix(mut self, prefix: String, iri: String) -> Self { - self.program.prefixes.insert(prefix, iri); - self - } - - /// Add prefixes. - pub fn prefixes(mut self, prefixes: T) -> Self - where - T: IntoIterator, - { - self.program.prefixes.extend(prefixes); - self - } - - /// Add an imported table. - pub fn import(mut self, import: ImportDirective) -> Self { - self.program.imports.push(import); - self - } - - /// Add imported tables. - pub fn imports(mut self, imports: T) -> Self - where - T: IntoIterator, - { - self.program.imports.extend(imports); - self - } - - /// Add an exported table. - pub fn export(mut self, export: ExportDirective) -> Self { - self.program.exports.push(export); - self - } - - /// Add exported tables. - pub fn exports(mut self, exports: T) -> Self - where - T: IntoIterator, - { - self.program.exports.extend(exports); - self - } - - /// Add a rule. - pub fn rule(mut self, rule: Rule) -> Self { - self.program.rules.push(rule); - self - } - - /// Add rules. - pub fn rules(mut self, rules: T) -> Self - where - T: IntoIterator, - { - self.program.rules.extend(rules); - self - } - - /// Add a fact. - pub fn fact(mut self, fact: Fact) -> Self { - self.program.facts.push(fact); - self - } - - /// Add facts. - pub fn facts(mut self, facts: T) -> Self - where - T: IntoIterator, - { - self.program.facts.extend(facts); - self - } - - /// Mark predicate as output predicate. - pub fn output_predicate(self, predicate: Identifier) -> Self { - self.output_predicates([predicate]) - } - - /// Mark predicates as output predicates. - pub fn output_predicates(mut self, predicates: T) -> Self - where - T: IntoIterator, - { - self.program.output_predicates.extend(predicates); - self - } -} - -impl Program { - /// Return a [builder][ProgramBuilder] for the [Program]. - pub fn builder() -> ProgramBuilder { - Default::default() - } - - /// Get the base IRI, if set. - #[must_use] - pub fn base(&self) -> Option { - self.base.clone() - } - - /// Return all rules in the program - immutable. - #[must_use] - pub fn rules(&self) -> &Vec { - &self.rules - } - - /// Return all facts in the program. - #[must_use] - pub fn facts(&self) -> &Vec { - &self.facts - } - - /// Return a HashSet of all predicates in the program (in rules and facts). - #[must_use] - pub fn predicates(&self) -> HashSet { - self.rules() - .iter() - .flat_map(|rule| { - rule.head() - .iter() - .map(|atom| atom.predicate()) - .chain(rule.body().iter().map(|literal| literal.predicate())) - }) - .chain(self.facts().iter().map(|atom| atom.0.predicate())) - .collect() - } - - /// Return a HashSet of all idb predicates (predicates occuring rule heads) in the program. - #[must_use] - pub fn idb_predicates(&self) -> HashSet { - self.rules() - .iter() - .flat_map(|rule| rule.head()) - .map(|atom| atom.predicate()) - .collect() - } - - /// Return a HashSet of all edb predicates (all predicates minus idb predicates) in the program. - #[must_use] - pub fn edb_predicates(&self) -> HashSet { - self.predicates() - .difference(&self.idb_predicates()) - .cloned() - .collect() - } - - /// Return an Iterator over all output predicates that - /// were explicitly marked in output directives. - pub fn output_predicates(&self) -> impl Iterator { - self.output_predicates.iter() - } - - /// Add output predicates to the program. - pub fn add_output_predicates(&mut self, predicates: T) - where - T: IntoIterator, - { - self.output_predicates.extend(predicates); - } - - /// Remove all output predicates of the program. - pub fn clear_output_predicates(&mut self) { - self.output_predicates.clear(); - } - - /// Return all prefixes in the program. - #[must_use] - pub fn prefixes(&self) -> &HashMap { - &self.prefixes - } - - /// Return all [ImportDirective]s of the program. - pub fn imports(&self) -> impl Iterator { - self.imports.iter() - } - - /// Add [ImportDirective]s to the program. - pub fn add_imports(&mut self, imports: T) - where - T: IntoIterator, - { - self.imports.extend(imports); - } - - /// Return all [ExportDirective]s of the program. - pub fn exports(&self) -> impl Iterator { - self.exports.iter() - } - - /// Add [ExportDirective]s to the program. - pub fn add_exports(&mut self, exports: T) - where - T: IntoIterator, - { - self.exports.extend(exports); - } - - /// Remove all [ExportDirective]s of the program. - pub fn clear_exports(&mut self) { - self.exports.clear(); - } - - /// Look up a given prefix. - #[must_use] - pub fn resolve_prefix(&self, tag: &str) -> Option { - self.prefixes.get(tag).cloned() - } -} diff --git a/nemo/src/model/rule_model/rule.rs b/nemo/src/model/rule_model/rule.rs deleted file mode 100644 index 1ab98eab0..000000000 --- a/nemo/src/model/rule_model/rule.rs +++ /dev/null @@ -1,385 +0,0 @@ -use std::collections::{HashMap, HashSet}; - -use crate::io::parser::types::ParseError; -use crate::model::VariableAssignment; - -use super::{Atom, Constraint, Literal, PrimitiveTerm, Term, Variable}; - -/// A rule. -#[derive(Debug, Eq, PartialEq, Clone)] -pub struct Rule { - /// Head atoms of the rule - head: Vec, - /// Body literals of the rule - body: Vec, - /// Constraints on the body of the rule - constraints: Vec, -} - -impl Rule { - /// Construct a new rule. - pub fn new(head: Vec, body: Vec, constraints: Vec) -> Self { - Self { - head, - body, - constraints, - } - } - - fn calculate_derived_variables( - safe_variables: &HashSet, - constraints: &[Constraint], - ) -> HashSet { - let mut derived_variables = safe_variables.clone(); - - let mut satisfied_constraints = HashSet::::new(); - while satisfied_constraints.len() < constraints.len() { - let num_satisified_constraints = satisfied_constraints.len(); - - for (constraint_index, constraint) in constraints.iter().enumerate() { - if satisfied_constraints.contains(&constraint_index) { - continue; - } - - if let Some((variable, term)) = constraint.has_form_assignment() { - if !derived_variables.contains(variable) - && term - .variables() - .all(|term_variable| derived_variables.contains(term_variable)) - { - derived_variables.insert(variable.clone()); - satisfied_constraints.insert(constraint_index); - continue; - } - } - } - - if satisfied_constraints.len() == num_satisified_constraints { - return derived_variables; - } - } - - derived_variables - } - - /// Return all variables that appear in negative literals - /// but cannot be derived from positive literals. - /// - /// For each variable also returns the associated index of the literal. - /// - /// Returns an error if one negative variable is associated with multiple literals. - fn calculate_negative_variables( - negative: &[Literal], - safe_variables: &HashSet, - ) -> Result, ParseError> { - let mut negative_variables = HashMap::::new(); - - for (literal_index, negative_literal) in negative.iter().enumerate() { - let mut current_unsafe = HashMap::::new(); - - for negative_term in negative_literal.terms() { - if let Term::Primitive(PrimitiveTerm::Variable(variable)) = negative_term { - if safe_variables.contains(variable) { - continue; - } - - current_unsafe.insert(variable.clone(), literal_index); - - if negative_variables.contains_key(variable) { - return Err(ParseError::UnsafeVariableInMultipleNegativeLiterals( - variable.clone(), - )); - } - } - } - - negative_variables.extend(current_unsafe) - } - - Ok(negative_variables) - } - - /// Construct a new rule, validating constraints on variable usage. - pub(crate) fn new_validated( - head: Vec, - body: Vec, - constraints: Vec, - ) -> Result { - // All the existential variables used in the rule - let existential_variable_names = head - .iter() - .flat_map(|a| a.existential_variables().flat_map(|v| v.name())) - .collect::>(); - - for variable in body - .iter() - .flat_map(|l| l.variables()) - .chain(constraints.iter().flat_map(|c| c.variables())) - { - // Existential variables may only occur in the head - if variable.is_existential() { - return Err(ParseError::BodyExistential(variable.clone())); - } - - // There may not be a universal variable whose name is the same that of an existential - if let Some(name) = variable.name() { - if existential_variable_names.contains(&name) { - return Err(ParseError::BothQuantifiers(name)); - } - } - } - - // Divide the literals into a positive and a negative part - let (positive, negative): (Vec<_>, Vec<_>) = body - .iter() - .cloned() - .partition(|literal| literal.is_positive()); - - // Safe variables are considered to be - // all variables occuring as primitive terms in a positive body literal - // or every value that is equal to such a variable - let safe_variables = Self::safe_variables_literals(&positive); - - // Derived variables are variables that result from functional expressions - // expressed as ?Variable = Term constraints, - // where the term only contains safe or derived variables. - let derived_variables = Self::calculate_derived_variables(&safe_variables, &constraints); - - // Negative variables are variables that occur as primitive terms in negative literals - // bot cannot be derived - let negative_variables = Self::calculate_negative_variables(&negative, &derived_variables)?; - - // Each constraint must only use derived variables - // or if it contains negative variables, then all variables in the constraint - // must be from the same atom - for constraint in &constraints { - let unknown = constraint.variables().find(|variable| { - !derived_variables.contains(variable) && !negative_variables.contains_key(variable) - }); - - if let Some(variable) = unknown { - return Err(ParseError::UnsafeComplexTerm( - constraint.to_string(), - variable.clone(), - )); - } - - if let Some(negative_variable) = constraint - .variables() - .find(|variable| negative_variables.contains_key(variable)) - { - let negative_literal = &negative[*negative_variables - .get(negative_variable) - .expect("Map must contain key")]; - let allowed_variables = negative_literal - .variables() - .cloned() - .collect::>(); - - if let Some(not_allowed) = constraint - .variables() - .find(|variable| !allowed_variables.contains(variable)) - { - return Err(ParseError::ConstraintOutsideVariable( - constraint.to_string(), - negative_variable.clone(), - negative_literal.to_string(), - not_allowed.clone(), - )); - } - } - } - - // Each complex term in the body and head must only use safe or derived variables - for term in body - .iter() - .flat_map(|l| l.terms()) - .chain(head.iter().flat_map(|a| a.terms())) - { - if term.is_primitive() { - continue; - } - - for variable in term.variables() { - if !derived_variables.contains(variable) { - return Err(ParseError::UnsafeComplexTerm( - term.to_string(), - variable.clone(), - )); - } - } - } - - let mut is_existential = false; - - // Head atoms may only use variables that are safe or derived - for variable in head.iter().flat_map(|a| a.variables()) { - if variable.is_existential() { - is_existential = true; - } - - if variable.is_unnamed() { - return Err(ParseError::UnnamedInHead); - } - - if variable.is_universal() && !derived_variables.contains(variable) { - return Err(ParseError::UnsafeHeadVariable(variable.clone())); - } - } - - // Check for aggregates in the body of a rule - for literal in &body { - #[allow(clippy::never_loop)] - for aggregate in literal.aggregates() { - return Err(ParseError::AggregateInBody(aggregate.clone())); - } - } - for constraint in &constraints { - #[allow(clippy::never_loop)] - for aggregate in constraint.aggregates() { - return Err(ParseError::AggregateInBody(aggregate.clone())); - } - } - - // We only allow one aggregate per rule, - // and do not allow them to appear together with existential variables - let mut aggregate_count = 0; - for head_atom in &head { - for term in head_atom.terms() { - aggregate_count += term.aggregates().len(); - - if aggregate_count > 1 { - return Err(ParseError::MultipleAggregates); - } - } - } - - if aggregate_count > 0 && is_existential { - return Err(ParseError::AggregatesPlusExistentials); - } - - Ok(Rule { - head, - body, - constraints, - }) - } - - /// Return all variables that are "safe". - /// A variable is safe if it occurs in a positive body literal. - fn safe_variables_literals(literals: &[Literal]) -> HashSet { - let mut result = HashSet::new(); - - for literal in literals { - if let Literal::Positive(atom) = literal { - for term in atom.terms() { - if let Term::Primitive(PrimitiveTerm::Variable(variable)) = term { - result.insert(variable.clone()); - } - } - } - } - - result - } - - /// Return all variables that are "safe". - /// A variable is safe if it occurs in a positive body literal, - /// or is equal to such a value. - pub fn safe_variables(&self) -> HashSet { - Self::safe_variables_literals(&self.body) - } - - /// Return the head atoms of the rule - immutable. - #[must_use] - pub fn head(&self) -> &Vec { - &self.head - } - - /// Return the head atoms of the rule - mutable. - #[must_use] - pub fn head_mut(&mut self) -> &mut Vec { - &mut self.head - } - - /// Return the body literals of the rule - immutable. - #[must_use] - pub fn body(&self) -> &Vec { - &self.body - } - - /// Return the body literals of the rule - mutable. - #[must_use] - pub fn body_mut(&mut self) -> &mut Vec { - &mut self.body - } - - /// Return the constraints of the rule - immutable. - #[must_use] - pub fn constraints(&self) -> &Vec { - &self.constraints - } - - /// Return the filters of the rule - mutable. - #[must_use] - pub fn constraints_mut(&mut self) -> &mut Vec { - &mut self.constraints - } - - /// Replaces [Variable]s with [super::Term]s according to the provided assignment. - pub fn apply_assignment(&mut self, assignment: &VariableAssignment) { - self.body - .iter_mut() - .for_each(|l| l.apply_assignment(assignment)); - self.head - .iter_mut() - .for_each(|a| a.apply_assignment(assignment)); - self.constraints - .iter_mut() - .for_each(|f| f.apply_assignment(assignment)); - } - - /// Return the number of negative body atoms contained in the rule. - pub fn num_negative_body(&self) -> usize { - self.body - .iter() - .filter(|literal| literal.is_negative()) - .count() - } -} - -impl std::fmt::Display for Rule { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for (index, atom) in self.head.iter().enumerate() { - atom.fmt(f)?; - - if index < self.head.len() - 1 { - f.write_str(", ")?; - } - } - - f.write_str(" :- ")?; - - for (index, literal) in self.body.iter().enumerate() { - literal.fmt(f)?; - - if index < self.body.len() - 1 { - f.write_str(", ")?; - } - } - - if !self.constraints.is_empty() { - f.write_str(", ")?; - } - - for (index, constraint) in self.constraints.iter().enumerate() { - constraint.fmt(f)?; - - if index < self.constraints.len() - 1 { - f.write_str(", ")?; - } - } - - f.write_str(" .") - } -} diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index bc405d9ee..e3b4f11db 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -31,9 +31,9 @@ impl Fact { } } - /// Create a new [Fact] from an AST - pub fn from_ast(_ast: crate::io::parser::ast::statement::Fact) { - todo!("create a fact from an ast") + /// Return the predicate associated with this fact. + pub fn predicate(&self) -> &Tag { + &self.predicate } /// Return an iterator over the subterms of this fact. diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index c500ee3d7..24c653b92 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -5,13 +5,22 @@ pub mod attributes; pub mod compression; pub mod file_formats; -use std::{fmt::Display, hash::Hash}; +use std::{collections::HashMap, fmt::Display, hash::Hash}; +use attributes::ImportExportAttribute; use file_formats::FileFormat; +use nemo_physical::datavalues::DataValue; -use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; +use crate::{ + io::formats::dsv::value_format::DsvValueFormats, + rule_model::{error::ValidationErrorBuilder, origin::Origin}, +}; -use super::{tag::Tag, term::map::Map, ProgramComponent}; +use super::{ + tag::Tag, + term::{map::Map, primitive::Primitive, Term}, + ProgramComponent, +}; /// An import/export specification. This object captures all information that is typically /// present in an import or export directive in a Nemo program, including the main format, @@ -31,6 +40,80 @@ pub(crate) struct ImportExportDirective { } impl ImportExportDirective { + /// For a given [Term] return its contents as a plain string. + /// + /// This returns a value if the term is am iri and + /// returns `None` otherwise. + pub fn plain_value(term: &Term) -> Option { + if let Term::Primitive(Primitive::Ground(any_value)) = term { + return any_value.value().to_iri(); + } + + None + } + + /// For a given [Term] return its contents as a plain string. + /// + /// This returns a value if the term is a plain string and + /// returns `None` otherwise. + pub fn string_value(term: &Term) -> Option { + if let Term::Primitive(Primitive::Ground(any_value)) = term { + return any_value.value().to_plain_string(); + } + + None + } + + /// For a given [Term] return its contents as an integer. + /// + /// This returns a value if the term is an integer and + /// returns `None` otherwise. + pub fn integer_value(term: &Term) -> Option { + if let Term::Primitive(Primitive::Ground(any_value)) = term { + return any_value.value().to_i64(); + } + + None + } + + /// Return a [HashMap] containing the attributes of this directive. + fn attribute_map(&self) -> HashMap { + let mut result = HashMap::new(); + + for (key, value) in self.attributes.key_value() { + if let Some(name) = + Self::plain_value(&key).and_then(|plain| ImportExportAttribute::from_name(&plain)) + { + result.insert(name, value); + } + } + + result + } + + /// Return the expected arity based on the format or given type information, + /// + /// Returns `None` if it is not possible to deduce this information. + pub fn expected_arity(&self) -> Option { + if let Some(arity) = self.format.arity() { + return Some(arity); + } + + match self.format { + FileFormat::CSV | FileFormat::DSV | FileFormat::TSV => { + if let Some(value_format) = self.attribute_map().get(&ImportExportAttribute::Format) + { + if let Term::Tuple(tuple) = value_format { + return DsvValueFormats::from_tuple(tuple).map(|format| format.arity()); + } + } + } + _ => {} + } + + None + } + /// Helper function for the display implementations of /// [ImportDirective] and [ExportDirective] /// to format the content of this object. @@ -85,8 +168,15 @@ impl ImportDirective { } /// Return the attributes. - pub fn attributes(&self) -> &Map { - &self.0.attributes + pub fn attributes(&self) -> HashMap { + self.0.attribute_map() + } + + /// Return the expected arity based on the format or given type information, + /// + /// Returns `None` if it is not possible to deduce this information. + pub fn expected_arity(&self) -> Option { + self.0.expected_arity() } } @@ -157,8 +247,15 @@ impl ExportDirective { } /// Return the attributes. - pub fn attributes(&self) -> &Map { - &self.0.attributes + pub fn attributes(&self) -> HashMap { + self.0.attribute_map() + } + + /// Return the expected arity based on the format or given type information, + /// + /// Returns `None` if it is not possible to deduce this information. + pub fn expected_arity(&self) -> Option { + self.0.expected_arity() } } diff --git a/nemo/src/rule_model/components/import_export/attributes.rs b/nemo/src/rule_model/components/import_export/attributes.rs index 65dbba5c8..9bc16e828 100644 --- a/nemo/src/rule_model/components/import_export/attributes.rs +++ b/nemo/src/rule_model/components/import_export/attributes.rs @@ -10,24 +10,31 @@ use crate::syntax::import_export::attribute; /// Supported attributes in import/export directives #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq, Hash)] #[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] pub enum ImportExportAttribute { /// Location of the file #[assoc(name = attribute::RESOURCE)] + #[assoc(from_name = attribute::RESOURCE)] Resource, /// Data types of the input relations #[assoc(name = attribute::FORMAT)] + #[assoc(from_name = attribute::FORMAT)] Format, /// Base IRI #[assoc(name = attribute::BASE)] + #[assoc(from_name = attribute::BASE)] Base, /// Delimiter used to separate values #[assoc(name = attribute::DSV_DELIMITER)] + #[assoc(from_name = attribute::DSV_DELIMITER)] Delimiter, /// Compression format #[assoc(name = attribute::COMPRESSION)] + #[assoc(from_name = attribute::COMPRESSION)] Compression, /// Limit import/export to first n number of facts #[assoc(name = attribute::LIMIT)] + #[assoc(from_name = attribute::LIMIT)] Limit, } diff --git a/nemo/src/rule_model/components/import_export/compression.rs b/nemo/src/rule_model/components/import_export/compression.rs index bd1b3b9f5..96132a065 100644 --- a/nemo/src/rule_model/components/import_export/compression.rs +++ b/nemo/src/rule_model/components/import_export/compression.rs @@ -8,14 +8,17 @@ use enum_assoc::Assoc; use crate::syntax::import_export::attribute; /// Compression formats -#[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] +#[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq, Default)] #[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] pub enum CompressionFormat { /// No compression + #[default] #[assoc(name = attribute::VALUE_COMPRESSION_NONE)] None, /// GZip compression #[assoc(name = attribute::VALUE_COMPRESSION_GZIP)] + #[assoc(from_name = attribute::VALUE_COMPRESSION_GZIP)] GZip, } diff --git a/nemo/src/rule_model/components/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs index 1ea45a597..5e9113cf3 100644 --- a/nemo/src/rule_model/components/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -23,11 +23,14 @@ pub(crate) enum AttributeRequirement { /// Supported file formats #[derive(Assoc, EnumIter, Debug, Copy, Clone, Eq, PartialEq, Hash)] #[func(pub fn name(&self) -> &'static str)] +#[func(pub fn from_name(name: &str) -> Option)] #[func(pub fn extension(&self) -> &'static str)] #[func(pub fn attributes(&self) -> HashMap)] +#[func(pub fn arity(&self) -> Option)] pub enum FileFormat { /// Comma-separated values #[assoc(name = file_format::CSV)] + #[assoc(from_name = file_format::CSV)] #[assoc(extension = file_format::EXTENSION_CSV)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) @@ -35,6 +38,7 @@ pub enum FileFormat { CSV, /// Delimiter-separated values #[assoc(name = file_format::DSV)] + #[assoc(from_name = file_format::DSV)] #[assoc(extension = file_format::EXTENSION_DSV)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) @@ -42,6 +46,7 @@ pub enum FileFormat { DSV, /// Tab-separated values #[assoc(name = file_format::TSV)] + #[assoc(from_name = file_format::TSV)] #[assoc(extension = file_format::EXTENSION_TSV)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) @@ -49,45 +54,57 @@ pub enum FileFormat { TSV, /// JSON objects #[assoc(name = file_format::JSON)] + #[assoc(from_name = file_format::JSON)] #[assoc(extension = file_format::EXTENSION_JSON)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] + #[assoc(arity = 3)] // TODO: In the future we probably want arbitrary arity here JSON, /// RDF 1.1 N-Triples #[assoc(name = file_format::RDF_NTRIPLES)] + #[assoc(from_name = file_format::RDF_NTRIPLES)] #[assoc(extension = file_format::EXTENSION_RDF_NTRIPLES)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] + #[assoc(arity = 3)] NTriples, /// RDF 1.1 N-Quads #[assoc(name = file_format::RDF_NQUADS)] + #[assoc(from_name = file_format::RDF_NQUADS)] #[assoc(extension = file_format::EXTENSION_RDF_NQUADS)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] + #[assoc(arity = 4)] NQuads, /// RDF 1.1 Turtle #[assoc(name = file_format::RDF_TURTLE)] + #[assoc(from_name = file_format::RDF_TURTLE)] #[assoc(extension = file_format::EXTENSION_RDF_TURTLE)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] + #[assoc(arity = 3)] Turtle, /// RDF 1.1 RDF/XML #[assoc(name = file_format::RDF_XML)] + #[assoc(from_name = file_format::RDF_XML)] #[assoc(extension = file_format::EXTENSION_RDF_XML)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] + #[assoc(arity = 3)] RDFXML, /// RDF 1.1 TriG #[assoc(name = file_format::RDF_TRIG)] + #[assoc(from_name = file_format::RDF_TRIG)] #[assoc(extension = file_format::EXTENSION_RDF_TRIG)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required) ]))] + #[assoc(arity = 4)] TriG, } diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index b36e82696..3d3d38131 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -6,8 +6,8 @@ use crate::rule_model::error::{ValidationError, ValidationErrorBuilder}; use super::{ atom::Atom, - term::{operation::Operation, Term}, - ProgramComponent, + term::{operation::Operation, primitive::variable::Variable, Term}, + IterableVariables, ProgramComponent, }; /// Literal @@ -84,3 +84,17 @@ impl ProgramComponent for Literal { } } } + +impl IterableVariables for Literal { + fn variables<'a>(&'a self) -> Box + 'a> { + match self { + Literal::Positive(literal) => literal.variables(), + Literal::Negative(literal) => literal.variables(), + Literal::Operation(literal) => literal.variables(), + } + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + todo!() + } +} diff --git a/nemo/src/rule_model/components/output.rs b/nemo/src/rule_model/components/output.rs index f35229a83..474209c49 100644 --- a/nemo/src/rule_model/components/output.rs +++ b/nemo/src/rule_model/components/output.rs @@ -19,13 +19,18 @@ pub struct Output { } impl Output { - /// Create a mew [Output] + /// Create a mew [Output]. pub fn new(predicate: Tag) -> Self { Self { origin: Origin::default(), predicate, } } + + /// Return the output predicate. + pub fn predicate(&self) -> &Tag { + &self.predicate + } } impl Display for Output { diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 10e064d45..083d1cce4 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -82,13 +82,8 @@ impl Rule { &mut self.head } - /// Return a list of "safe" variables. - /// - /// A variable is considered safe, - /// if it occurs in a positive body atom, - /// or is derived via the equality operation - /// from other safe variables. - pub fn safe_variables(&self) -> HashSet<&Variable> { + /// Return the set of variables that are bound in positive body atoms. + pub fn positive_variables(&self) -> HashSet<&Variable> { let mut result = HashSet::new(); for literal in &self.body { @@ -103,6 +98,18 @@ impl Rule { } } + result + } + + /// Return a set of "safe" variables. + /// + /// A variable is considered safe, + /// if it occurs in a positive body atom, + /// or is derived via the equality operation + /// from other safe variables. + pub fn safe_variables(&self) -> HashSet<&Variable> { + let mut result = self.positive_variables(); + loop { let current_count = result.len(); @@ -127,7 +134,9 @@ impl Rule { result } - /// Check for + /// Check if + /// * are no complex terms occurring in the head + /// * an aggregate occurs at most once fn validate_term_head(builder: &mut ValidationErrorBuilder, term: &Term) -> Result { if term.is_map() || term.is_tuple() || term.is_function() { builder.report_error( @@ -157,7 +166,12 @@ impl Rule { Ok(first_aggregate) } - /// Check for + /// Check if + /// * body does not contain any existential variables + /// * body does not contain aggregation + /// * body does not contain any complex term + /// * used operations do not use anonymous variables + /// * operations only use safe variables fn validate_term_body( builder: &mut ValidationErrorBuilder, term: &Term, @@ -383,6 +397,28 @@ impl ProgramComponent for Rule { } } +impl IterableVariables for Rule { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new( + self.head() + .iter() + .flat_map(|atom| atom.variables()) + .chain(self.body().iter().flat_map(|literal| literal.variables())), + ) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + let head_variables = self.head.iter_mut().flat_map(|atom| atom.variables_mut()); + + let body_variables = self + .body + .iter_mut() + .flat_map(|literal| literal.variables_mut()); + + Box::new(head_variables.chain(body_variables)) + } +} + /// Builder for a rule #[derive(Debug, Default)] pub struct RuleBuilder { diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index f07a9f935..1df810e01 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -72,10 +72,14 @@ impl Operation { let right = self.subterms.get(1).expect("invalid program component"); if let Term::Primitive(Primitive::Variable(variable)) = left { - Some((variable, right)) + return Some((variable, right)); } else { - None + if let Term::Primitive(Primitive::Variable(variable)) = right { + return Some((variable, left)); + } } + + None } } diff --git a/nemo/src/rule_model/components/term/primitive/ground.rs b/nemo/src/rule_model/components/term/primitive/ground.rs index e5e75ba06..97da5dc57 100644 --- a/nemo/src/rule_model/components/term/primitive/ground.rs +++ b/nemo/src/rule_model/components/term/primitive/ground.rs @@ -52,6 +52,11 @@ impl GroundTerm { ValueDomain::Other => ValueType::Other, } } + + /// Return the [AnyDataValue] of this term + pub fn value(&self) -> AnyDataValue { + self.value.clone() + } } impl From for GroundTerm { diff --git a/nemo/src/rule_model/components/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs index 4b29523ce..610ab9e15 100644 --- a/nemo/src/rule_model/components/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -21,7 +21,7 @@ pub struct VariableName(String); impl VariableName { /// Create a new [VariableName]. - fn new(name: String) -> Self { + pub fn new(name: String) -> Self { Self(name) } @@ -82,6 +82,14 @@ impl Variable { pub fn is_existential(&self) -> bool { matches!(self, Variable::Existential(_)) } + + /// Change the name of this variable. + pub fn rename(&mut self, name: VariableName) { + match self { + Variable::Universal(variable) => variable.rename(name), + Variable::Existential(variable) => variable.rename(name), + } + } } impl From for Variable { diff --git a/nemo/src/rule_model/components/term/primitive/variable/existential.rs b/nemo/src/rule_model/components/term/primitive/variable/existential.rs index 45f2420ce..619ce1f80 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/existential.rs @@ -35,6 +35,11 @@ impl ExistentialVariable { pub fn name(&self) -> String { self.name.to_string() } + + /// Change the name of this variable. + pub fn rename(&mut self, name: VariableName) { + self.name = name; + } } impl Display for ExistentialVariable { diff --git a/nemo/src/rule_model/components/term/primitive/variable/universal.rs b/nemo/src/rule_model/components/term/primitive/variable/universal.rs index 215156718..1868fc605 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/universal.rs @@ -55,6 +55,11 @@ impl UniversalVariable { pub fn is_anonymous(&self) -> bool { self.name.is_none() } + + /// Change the name of this variable. + pub fn rename(&mut self, name: VariableName) { + self.name = Some(name); + } } impl Display for UniversalVariable { diff --git a/nemo/src/rule_model/error/info.rs b/nemo/src/rule_model/error/info.rs index b75fb1b2e..9620e1ab2 100644 --- a/nemo/src/rule_model/error/info.rs +++ b/nemo/src/rule_model/error/info.rs @@ -13,6 +13,9 @@ pub enum Info { /// First use occurred somewhere #[assoc(message = format!("first use occurred here"))] FirstUse, + /// Predicate different arity + #[assoc(message = format!("predicate was used here with arity {}", _arity))] + PredicateArity { arity: usize }, } impl std::fmt::Display for Info { diff --git a/nemo/src/rule_model/error/validation_error.rs b/nemo/src/rule_model/error/validation_error.rs index 9c59b15e1..e238b1247 100644 --- a/nemo/src/rule_model/error/validation_error.rs +++ b/nemo/src/rule_model/error/validation_error.rs @@ -80,6 +80,11 @@ pub enum ValidationErrorKind { #[error(r#"anonymous variable used in operation"#)] #[assoc(code = 215)] OperationAnonymous, + /// Inconsistent arities for predicates + #[error(r#"predicate {predicate} used with multiple arities."#)] + #[assoc(code = 216)] + #[assoc(note = "each predicate is only allowed to have one arity")] + InconsistentArities { predicate: String }, /// Unsupported feature: Multiple aggregates in one rule #[error(r#"multiple aggregates in one rule is currently unsupported"#)] diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 41da6ddda..bef22456a 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -1,15 +1,34 @@ //! This module defines [Program]. -use super::components::{ - fact::Fact, - import_export::{ExportDirective, ImportDirective}, - output::Output, - rule::Rule, +use std::{ + collections::{hash_map::Entry, HashMap}, + fmt::Write, +}; + +use crate::rule_model::components::tag::Tag; + +use super::{ + components::{ + fact::Fact, + import_export::{ExportDirective, ImportDirective}, + literal::Literal, + output::Output, + rule::Rule, + ProgramComponent, + }, + error::{ + info::Info, validation_error::ValidationErrorKind, ComplexErrorLabelKind, + ValidationErrorBuilder, + }, + origin::Origin, }; /// Representation of a nemo program #[derive(Debug, Default)] pub struct Program { + /// Origin of this component + origin: Origin, + /// Imported resources imports: Vec, /// Exported resources @@ -22,6 +41,239 @@ pub struct Program { outputs: Vec, } +impl Program { + /// Return an iterator over all imports. + pub fn imports(&self) -> impl Iterator { + self.imports.iter() + } + + /// Return an iterator over all exports. + pub fn exports(&self) -> impl Iterator { + self.exports.iter() + } + + /// Return an iterator over all rules. + pub fn rules(&self) -> impl Iterator { + self.rules.iter() + } + + /// Return an iterator over all facts. + pub fn facts(&self) -> impl Iterator { + self.facts.iter() + } + + /// Return an iterator over all outputs. + pub fn outputs(&self) -> impl Iterator { + self.outputs.iter() + } + + /// Return an iterator over all imports. + pub fn imports_mut(&mut self) -> impl Iterator { + self.imports.iter_mut() + } + + /// Return an iterator over all exports. + pub fn exports_mut(&mut self) -> impl Iterator { + self.exports.iter_mut() + } + + /// Return an iterator over all rules. + pub fn rules_mut(&mut self) -> impl Iterator { + self.rules.iter_mut() + } + + /// Return an iterator over all facts. + pub fn facts_mut(&mut self) -> impl Iterator { + self.facts.iter_mut() + } + + /// Return an iterator over all outputs. + pub fn outputs_mut(&mut self) -> impl Iterator { + self.outputs.iter_mut() + } + + /// Check if a different arity was already used for the given predicate + /// and report an error if this was the case. + fn validate_arity( + predicate_arity: &mut HashMap, + tag: Tag, + arity: usize, + origin: Origin, + builder: &mut ValidationErrorBuilder, + ) { + let predicate_string = tag.to_string(); + + match predicate_arity.entry(tag) { + Entry::Occupied(entry) => { + let (previous_arity, previous_origin) = entry.get(); + + if arity != *previous_arity { + builder + .report_error( + origin, + ValidationErrorKind::InconsistentArities { + predicate: predicate_string, + }, + ) + .add_label( + ComplexErrorLabelKind::Information, + previous_origin.clone(), + Info::PredicateArity { + arity: *previous_arity, + }, + ); + } + } + Entry::Vacant(entry) => { + entry.insert((arity, origin)); + } + } + } + + /// Validate the global program properties without validating + /// each program element. + pub(crate) fn validate_global_properties( + &self, + builder: &mut ValidationErrorBuilder, + ) -> Result<(), ()> { + let mut predicate_arity = HashMap::::new(); + + for import in self.imports() { + let predicate = import.predicate().clone(); + let origin = import.origin().clone(); + + if let Some(arity) = import.expected_arity() { + Self::validate_arity(&mut predicate_arity, predicate, arity, origin, builder); + } + } + + for fact in self.facts() { + let predicate = fact.predicate().clone(); + let arity = fact.subterms().count(); + let origin = fact.origin().clone(); + + Self::validate_arity(&mut predicate_arity, predicate, arity, origin, builder); + } + + for rule in self.rules() { + for atom in rule.head() { + let predicate = atom.predicate().clone(); + let arity = atom.arguments().count(); + let origin = atom.origin().clone(); + + Self::validate_arity(&mut predicate_arity, predicate, arity, origin, builder); + } + + for literal in rule.body() { + match literal { + Literal::Positive(atom) | Literal::Negative(atom) => { + let predicate = atom.predicate().clone(); + let arity = atom.arguments().count(); + let origin = atom.origin().clone(); + + Self::validate_arity( + &mut predicate_arity, + predicate, + arity, + origin, + builder, + ); + } + Literal::Operation(_) => { + continue; + } + } + } + } + + for export in self.exports() { + let predicate = export.predicate().clone(); + let origin = export.origin().clone(); + + if let Some(arity) = export.expected_arity() { + Self::validate_arity(&mut predicate_arity, predicate, arity, origin, builder); + } + } + + Ok(()) + } +} + +impl ProgramComponent for Program { + fn parse(_string: &str) -> Result + where + Self: Sized, + { + todo!() + } + + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } + + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + where + Self: Sized, + { + for import in self.imports() { + let _ = import.validate(builder); + } + + for fact in self.facts() { + let _ = fact.validate(builder); + } + + for rule in self.rules() { + let _ = rule.validate(builder); + } + + for output in self.outputs() { + let _ = output.validate(builder); + } + + for export in self.exports() { + let _ = export.validate(builder); + } + + self.validate_global_properties(builder) + } +} + +impl std::fmt::Display for Program { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for import in self.imports() { + import.fmt(f)?; + f.write_char('\n'); + } + for fact in self.facts() { + fact.fmt(f)?; + f.write_char('\n'); + } + for rule in self.rules() { + rule.fmt(f)?; + f.write_char('\n'); + } + for output in self.outputs() { + output.fmt(f)?; + f.write_char('\n'); + } + for export in self.exports() { + export.fmt(f)?; + f.write_char('\n'); + } + + Ok(()) + } +} + /// Builder for [Program]s #[derive(Debug, Default)] pub struct ProgramBuilder { @@ -59,4 +311,9 @@ impl ProgramBuilder { pub fn add_output(&mut self, output: Output) { self.program.outputs.push(output); } + + /// Validate the current program. + pub fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> { + self.program.validate(builder) + } } diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index a68992841..5210a9937 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -178,6 +178,10 @@ impl<'a> ASTProgramTranslation<'a> { } } + let _ = self + .program_builder + .validate(&mut self.validation_error_builder); + self.errors.extend( self.validation_error_builder .finalize() diff --git a/nemo/src/syntax.rs b/nemo/src/syntax.rs index e8cf14f8a..b09e6732e 100644 --- a/nemo/src/syntax.rs +++ b/nemo/src/syntax.rs @@ -52,6 +52,21 @@ pub mod directive { /// The string used in the keyword for the output directive. pub const OUTPUT: &str = "output"; + + pub mod value_formats { + /// The string used to represent the import/export format any + pub const ANY: &str = "any"; + /// The string used to represent the import/export format string + pub const STRING: &str = "string"; + /// The string used to represent the import/export format integer + pub const INT: &str = "int"; + /// The string used to represent the import/export format double + pub const DOUBLE: &str = "double"; + /// The string used to represent the import/export format float + pub const FLOAT: &str = "float"; + /// The string used to indicate that a column is skipped + pub const SKIP: &str = "skip"; + } } pub mod rule { diff --git a/nemo/src/table_manager.rs b/nemo/src/table_manager.rs index fa9b0409e..ddc05894d 100644 --- a/nemo/src/table_manager.rs +++ b/nemo/src/table_manager.rs @@ -1,8 +1,6 @@ //! Managing of tables -use crate::error::Error; - -use super::model::Identifier; +use crate::{error::Error, rule_model::components::tag::Tag}; use bytesize::ByteSize; use nemo_physical::{ @@ -209,16 +207,16 @@ struct PredicateInfo { arity: usize, } -/// Identifier of a subtable in a chase sequence. +/// Tag of a subtable in a chase sequence. #[derive(Debug, Clone)] pub struct SubtableIdentifier { - predicate: Identifier, + predicate: Tag, step: usize, } impl SubtableIdentifier { /// Create a new [SubtableIdentifier]. - pub fn new(predicate: Identifier, step: usize) -> Self { + pub fn new(predicate: Tag, step: usize) -> Self { Self { predicate, step } } } @@ -332,10 +330,10 @@ pub(crate) struct TableManager { database: DatabaseInstance, /// Map containg all the ids of all the sub tables associated with a predicate. - predicate_subtables: HashMap, + predicate_subtables: HashMap, /// Mapping predicate identifiers to a [PredicateInfo] which contains relevant information. - predicate_to_info: HashMap, + predicate_to_info: HashMap, } impl Default for TableManager { @@ -364,14 +362,14 @@ impl TableManager { /// Return the step number of the last subtable that was added under a predicate. /// Returns None if the predicate has no subtables. - pub(crate) fn last_step(&self, predicate: &Identifier) -> Option { + pub(crate) fn last_step(&self, predicate: &Tag) -> Option { self.predicate_subtables.get(predicate)?.last_step() } /// Count all the rows in the table manager that belong to a predicate. /// /// TODO: Currently only counting of in-memory facts is supported, see - pub(crate) fn predicate_count_rows(&self, predicate: &Identifier) -> Option { + pub(crate) fn predicate_count_rows(&self, predicate: &Tag) -> Option { self.predicate_subtables .get(predicate) .map(|s| s.count_rows(&self.database)) @@ -389,7 +387,7 @@ impl TableManager { /// and return the [PermanentTableId] of that new table. pub(crate) fn combine_predicate( &mut self, - predicate: &Identifier, + predicate: &Tag, ) -> Result, Error> { match self.last_step(predicate) { Some(last_step) => self.combine_tables(predicate, 0..(last_step + 1)), @@ -400,7 +398,7 @@ impl TableManager { /// Generates an appropriate table name for subtable. pub(crate) fn generate_table_name( &self, - predicate: &Identifier, + predicate: &Tag, order: &ColumnOrder, step: usize, ) -> String { @@ -413,7 +411,7 @@ impl TableManager { /// Generates an appropriate table name for a table that represents multiple subtables. fn generate_table_name_combined( &self, - predicate: &Identifier, + predicate: &Tag, order: &ColumnOrder, steps: &Range, ) -> String { @@ -429,7 +427,7 @@ impl TableManager { /// Generates an appropriate table name for a table that is a reordered version of another. fn generate_table_name_reference( &self, - predicate: &Identifier, + predicate: &Tag, step: usize, referenced_table_id: PermanentTableId, permutation: &Permutation, @@ -442,7 +440,7 @@ impl TableManager { /// Intitializes helper structures that are needed for handling the table associated with the predicate. /// Must be done before calling functions that add tables to that predicate. - pub(crate) fn register_predicate(&mut self, predicate: Identifier, arity: usize) { + pub(crate) fn register_predicate(&mut self, predicate: Tag, arity: usize) { let predicate_info = PredicateInfo { arity }; if self @@ -458,7 +456,7 @@ impl TableManager { /// Check whether a predicate has been registered. #[allow(dead_code)] - fn predicate_exists(&self, predicate: &Identifier) -> bool { + fn predicate_exists(&self, predicate: &Tag) -> bool { self.predicate_subtables.contains_key(predicate) } @@ -473,7 +471,7 @@ impl TableManager { /// Add a table that represents the input facts for some predicate for the chase procedure. /// Predicate must be registered before calling this function. - pub(crate) fn add_edb(&mut self, predicate: Identifier, sources: Vec) { + pub(crate) fn add_edb(&mut self, predicate: Tag, sources: Vec) { let arity = if let Some(source) = sources.first() { source.arity() } else { @@ -494,7 +492,7 @@ impl TableManager { /// Add a [Trie] as a subtable of a predicate. /// Predicate must be registered before calling this function. #[allow(dead_code)] - fn add_table(&mut self, predicate: Identifier, step: usize, order: ColumnOrder, trie: Trie) { + fn add_table(&mut self, predicate: Tag, step: usize, order: ColumnOrder, trie: Trie) { let name = self.generate_table_name(&predicate, &order, step); let table_id = self.database.register_add_trie(&name, order, trie); @@ -534,7 +532,7 @@ impl TableManager { /// /// # Panics /// Panics if the predicate has not been registered yet. - pub(crate) fn arity(&self, predicate: &Identifier) -> usize { + pub(crate) fn arity(&self, predicate: &Tag) -> usize { self.predicate_to_info .get(predicate) .expect("Predicate should be registered before calling this function") @@ -542,11 +540,7 @@ impl TableManager { } /// Return the ids of all subtables of a predicate within a certain range of steps. - pub fn tables_in_range( - &self, - predicate: &Identifier, - range: &Range, - ) -> Vec { + pub fn tables_in_range(&self, predicate: &Tag, range: &Range) -> Vec { self.predicate_subtables .get(predicate) .map(|handler| handler.cover_range(range)) @@ -556,7 +550,7 @@ impl TableManager { /// Combine subtables in a certain range into one larger table. pub fn combine_tables( &mut self, - predicate: &Identifier, + predicate: &Tag, range: Range, ) -> Result, Error> { let combined_order: ColumnOrder = ColumnOrder::default(); @@ -594,7 +588,7 @@ impl TableManager { pub fn execute_plan( &mut self, subtable_plan: SubtableExecutionPlan, - ) -> Result, Error> { + ) -> Result, Error> { let result = self.database.execute_plan(subtable_plan.execution_plan)?; let mut updated_predicates = Vec::new(); @@ -646,11 +640,7 @@ impl TableManager { /// Return the chase step of the sub table that contains the given row within the given predicate. /// Returns None if the row does not exist. - pub fn find_table_row( - &mut self, - predicate: &Identifier, - row: &[AnyDataValue], - ) -> Option { + pub fn find_table_row(&mut self, predicate: &Tag, row: &[AnyDataValue]) -> Option { let handler = self.predicate_subtables.get(predicate)?; for (step, id) in &handler.single { diff --git a/testfile.rls b/testfile.rls deleted file mode 100644 index 1df8ea366..000000000 --- a/testfile.rls +++ /dev/null @@ -1,44 +0,0 @@ -/// base directive -@base . -/// prefix directive -@prefix rdfs: . -///import directive -@import sourceA :- csv { resource : "sources/dataA.csv" } . -/// export directive -@export a :- csv {} . - -/// output directive -@output a, b, c . -/// declare directive -@declare father(_:any, _:any). -@declare mother ( child : any , mother : any ) . - -// Facts: -father(alice, bob). -mother(bob, carla). -father(bob, darius). -mother(alice, carla). -father(, ). -mother(, ). -father(, ). -mother(, ). - -// Rules: -parent(?X, ?Y) :- mother(?X, ?Y). -parent(?X, ?Y) :- father(?X, ?Y). -parent( ?X , ?Y ) :- ~sibling( ?X , ?Y ) . -a(?x) :- b(?x, B) . -s(4) :- s(3). - -ancestor(?X,?Y) :- parent(?X, ?Y) . -ancestor(?X,?Z) :- ancestor(?X, ?Y), parent(?Y, ?Z) . -ancestorOfAlice(?X) :- ancestor(alice,?X). - -mydata(a,b) . -mydata("hello", 42) . -mydata(3.14, "2023-06-19"^^) . - -resulta(?N + 10) :- mydata(_, ?N) . -resultB(?R) :- mydata(?X, ?Y), ?R = SQRT(?X) . -result(?D) :- mydata(?X, _), ?D = DATATYPE(?X) . - diff --git a/testfile1.rls b/testfile1.rls deleted file mode 100644 index a2c0ef447..000000000 --- a/testfile1.rls +++ /dev/null @@ -1,172 +0,0 @@ -//!This ruleset normalises an EL ontology in OWL/RDF encoding. -//!Unsupported OWL EL features include: oneOf, allDisjoint. -//!The encoding used for property chains is also slightly antique. -//! -//!The ruleset computes facts for the following predicates: -//! nf:isMainClass(?C): ?C is an "interesting" class (not just an auxiliary class expression) -//! nf:isSubClass(?C): ?C occurs in a subclass position (i.e., negatively) -//! nf:conj(?C,?D1,?D2): ?C is the conjunction of ?D1 and ?D2 -//! nf:exists(?C,?P,?D): ?C is the existential restriction of property ?P with values from ?D -//! nf:subClassOf(?C,?D): ?C is syntactically specified to be a subclass of ?D -//! nf:subPropChain(?S1,?S2,?R): there was a role chain axiom ?S1 o ?S2 -> ?R -//! nf:subProp(?R,?S): ?R is a subproperty of ?S (directly or indirectly) -//! -//!Also, this program implements EL reasoning based on the (optimized) encoding described here: -//!https://link.springer.com/article/10.1007/s10817-013-9296-3 -//!The description of rules at the bottom reference the names from Figure 3. -//! -//!The reasoning is applied to the normalised ontologies. - -@prefix nf: . -@prefix inf: . -@prefix owl: . -@prefix rdf: . -@prefix sct: . -@prefix xsd: . -@prefix rdfs: . -@prefix genid: . - -@import TRIPLE :- rdf { resource : "galen-el.nt.gz" } . - -// ↓================ Errors ================↓ -// The constant names with prefixes don't get parsed properly -///Mark classes: -ClassObject(owl::someValuesFrom) . -ClassObject(rdf::first) . -ClassObject(rdfs::subClassOf) . -ClassObject(owl::equivalentClass) . -ClassSubject(rdfs::subClassOf) . -ClassSubject(owl::equivalentClass) . -// ↑================ Errors ================↑ -class(?O) :- TRIPLE(?X, ?P, ?O), ClassObject(?P) . -class(?X) :- TRIPLE(?X, ?P, ?O), ClassSubject(?P) . - -//Distinguish auxiliary class expressions from primary classes: - -// ↓================ Errors ================↓ -/// Mark auxiliary existential role restrictions: -synEx(?Y,?P,?X), auxClass(?X) :- TRIPLE(?X, owl:someValuesFrom, ?Y), TRIPLE(?X, owl:onProperty, ?P) . - -//Mark auxiliary conjunctions: - -next(?L1,?L2) :- TRIPLE(?L1,rdf:rest,?L2) . - first(?L1) :- TRIPLE(?X, owl:intersectionOf, ?L1) . -// ↑================ Errors ================↑ -nonfirst(?L2) :- first(?L1), next(?L1,?L2) . -nonfirst(?L2) :- nonfirst(?L1), next(?L1,?L2) . - last(?Ln) :- next(?Ln,rdf:nil) . - nonlast(?L) :- next(?L,?Ln), last(?Ln) . - nonlast(?L1) :- next(?L1,?L2), nonlast(?L2) . - in(?L,?C) :- TRIPLE(?L,rdf:first,?C) . - -//Mark conjunctions: -synConj(?X,?C1,?C2), auxClass(?X) :- - TRIPLE(?X, owl:intersectionOf, ?L1), next(?L1,?L2), last(?L2), in(?L1,?C1), in(?L2,?C2) . -synConj(?X,?C1,?L2), auxClass(?X) :- - TRIPLE(?X, owl:intersectionOf, ?L1), next(?L1,?L2), nonlast(?L2), in(?L1,?C1) . -synConj(?L1,?C1,?L2), auxClass(?L1) :- - nonfirst(?L1), next(?L1,?L2), nonlast(?L2), in(?L1,?C1) . -synConj(?L1,?C1,?C2), auxClass(?L1) :- - nonfirst(?L1), next(?L1,?L2), last(?L2), in(?L1,?C1), in(?L2,?C2) . - -//The other classes are "main classes" that are not normalised: -nf:isMainClass(?X) :- class(?X), ~auxClass(?X) . - -//Normalise auxiliary nested class expressions: - repOf(?X,?X) :- nf:isMainClass(?X) . //keep main classes unchanged - synExRep(?X,?P,?Rep) :- synEx(?Y,?P,?X), repOf(?Y,?Rep) . -nf:exists(!New,?P,?Rep) :- synExRep(?X,?P,?Rep) . - repOf(?X,?N) :- synExRep(?X,?P,?Rep), nf:exists(?N,?P,?Rep) . - // nf:exists(!New,?P,?Rep) :- synEx(?Y,?P,?X), repOf(?Y,?Rep) . - // repOf(?X,?N) :- synEx(?Y,?P), repOf(?Y,?Rep), nf:exists(?N,?P,?Rep) . -nf:conj(!New,?R1,?R2) :- synConj(?X,?C1,?C2), repOf(?C1,?R1), repOf(?C2,?R2) . - repOf(?X,?N) :- synConj(?X,?C1,?C2), repOf(?C1,?R1), repOf(?C2,?R2), nf:conj(?N,?R1,?R2) . - - -///Extract old-style property chains: -nf:subPropChain(?S,?T,?R), nf:subProp(?R,?R) :- - TRIPLE(?L,rdfs:subPropertyOf,?R), TRIPLE(?L,owl:propertyChain,?L1), - in(?L1,?S), next(?L1,?L2), in(?L2,?T) . - -///Initialise subsumption axioms: - prepareSco(?X,?Y) :- TRIPLE(?X, rdfs:subClassOf, ?Y) . - prepareSco(?X,?Y), prepareSco(?Y,?X) :- TRIPLE(?X, owl:equivalentClass, ?Y) . -nf:subClassOf(?RX,?RY), nf:isSubClass(?RX) :- prepareSco(?X,?Y), repOf(?X,?RX), repOf(?Y,?RY) . - -///Initialise disjointness: -nf:subClassOf(!C,owl:Nothing), nf:conj(!C,?X,?Y), nf:isSubClass(!C), nf:isSubClass(?X), nf:isSubClass(?Y) - :- TRIPLE(?X,owl:disjointWith,?Y) . - -///Mark classes in subclass position recursively: - nf:isSubClass(?D) :- nf:exists(?C, ?P, ?D), nf:isSubClass(?C) . -nf:isSubClass(?C1), nf:isSubClass(?C2) :- nf:conj(?X, ?C1, ?C2), nf:isSubClass(?X) . - -///Precompute role hierarchy: -directSubProp(?R,?S) :- TRIPLE(?R,rdfs:subPropertyOf,?S) . -//Initialise role hierarchy only for roles in subclass positions: -nf:subProp(?P,?P) :- nf:exists(?C,?P,?D), nf:isSubClass(?C) . -nf:subProp(?R,?T) :- nf:subProp(?R,?S), directSubProp(?S,?T) . - -//Inference rules - -//Start classification for all named classes -inf:init(?C) :- nf:isMainClass(?C) . -//R_init -inf:init(?C) :- inf:ex(?E, ?R, ?C) . - -//R_0: Every class is a sub class of itself -inf:subClassOf(?C, ?C) :- inf:init(?C) . -//R_\top: Every class is a sub class of owl:Thing -inf:subClassOf(?C, "") :- nf:isMainClass(?C) . - -//R_\sqcap^-: -// If ?C is contained in the intersection of ?D1 and ?D1, -// then ?C is contained in ?D1 and ?C is contained in ?D2. -inf:subClassOf(?C,?D1), inf:subClassOf(?C,?D2) :- inf:subClassOf(?C,?Y), nf:conj(?Y,?D1,?D2) . - -//R_\sqcap^+: -// If ?C is contained in ?D1 and ?D2, -// then ?C is contained in the intersection ?I of ?D1 ?D2 -inf:subClassOf(?C, ?I) :- - inf:subClassOf(?C, ?D1), inf:subClassOf(?C, ?D2), - nf:conj(?I, ?D1, ?D2), nf:isSubClass(?I) . - -//R_\exists^-: -// If every ?Y has an R-relation to a ?C -// and every ?E is a ?Y, -// then every ?E has an ?R-relation to a ?C -inf:ex(?E, ?R, ?C) :- inf:subClassOf(?E, ?Y), nf:exists(?Y, ?R, ?C) . - -//R_\exists^+: -// If every ?E has an ?R-relation to a ?C, -// and every ?C is a ?D, -// and ?R is a subproperty of ?S, -// then every ?E has an ?S-relation to a ?D -// (i.e. every ?E is an ?Y where ?Y is the class of -// of individuals that have an ?S-relation to a ?D) -inf:subClassOf(?E, ?Y) :- - inf:ex(?E, ?R, ?C), inf:subClassOf(?C, ?D), nf:subProp(?R, ?S), - nf:exists(?Y, ?S, ?D), nf:isSubClass(?Y) . - -//R_\circ: -// If ?E has an ?R1-relation to a ?C and ?C has an ?R2-relation to a ?D, -// and ?R1 is a subproperty of ?S1 and ?R2 is a subproperty of ?S2 -// then ?E has an ?S-relation to a ?D -inf:ex(?E, ?S, ?D) :- - inf:ex(?E, ?R1, ?C), inf:ex(?C, ?R2, ?D), - nf:subProp(?R1, ?S1), nf:subProp(?R2, ?S2), - nf:subPropChain(?S1, ?S2, ?S) . - -//R_\sqsubseteq: Transitive closure of the subclass-of relation -inf:subClassOf(?C,?E) :- inf:subClassOf(?C,?D), nf:subClassOf(?D,?E) . - -//R_\bot: If every ?E has an R-relation to a ?C but ?C is empty, then ?E is also empty -inf:subClassOf(?E, "") :- - inf:ex(?E,?R,?C), inf:subClassOf(?C,"") . - -///Extract final results for main classes - -mainSubClassOf(?A,?B) :- - inf:subClassOf(?A,?B), nf:isMainClass(?A), nf:isMainClass(?B) . - -@export mainSubClassOf :- csv{compression:"gzip"} . diff --git a/testfile2.rls b/testfile2.rls deleted file mode 100644 index e576fb8d5..000000000 --- a/testfile2.rls +++ /dev/null @@ -1,14 +0,0 @@ -// Facts: -father( // father predicate means, that 'alice has father bob' - alice, - bob). -mother bob, carla). -father(bob darius). -mother(alice, carla . - -// Rules: -parent(?X, ?Y) :- mother(?X, ?Y) -parent(?X, ?Y) :- father(?X, ?Y). -parent( ?X , ?Y :- ~sibling( ?X , ?Y ) . - - diff --git a/testfile3.rls b/testfile3.rls deleted file mode 100644 index e72804564..000000000 --- a/testfile3.rls +++ /dev/null @@ -1,10 +0,0 @@ -//! This is just a testfile. -//! This testfile is used to check the behaviour of the parser. - -//// normal comment. - -/// @base . -/// @basetest . -#[whohoo(my, first, attribute)] -#[but(can, it, parse, 2)] // this attribute is only ther for testing purposes -head(?x) :- body(?x, constant). From 72114a5cf662ca2896972604c401ab4bea82813a Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 4 Sep 2024 17:55:08 +0200 Subject: [PATCH 150/214] Validation of import/export --- nemo/src/io/formats.rs | 9 + nemo/src/io/formats/dsv/value_format.rs | 12 +- nemo/src/io/formats/rdf/value_format.rs | 2 +- nemo/src/rule_model/components.rs | 66 ++++- nemo/src/rule_model/components/atom.rs | 6 +- nemo/src/rule_model/components/fact.rs | 8 +- .../rule_model/components/import_export.rs | 237 +++++++++++++++++- .../components/import_export/attributes.rs | 9 +- .../components/import_export/file_formats.rs | 46 +++- nemo/src/rule_model/components/literal.rs | 6 +- nemo/src/rule_model/components/output.rs | 6 +- nemo/src/rule_model/components/rule.rs | 11 +- nemo/src/rule_model/components/term.rs | 11 + .../rule_model/components/term/aggregate.rs | 6 +- .../rule_model/components/term/function.rs | 6 +- nemo/src/rule_model/components/term/map.rs | 6 +- .../rule_model/components/term/operation.rs | 6 +- .../rule_model/components/term/primitive.rs | 9 +- .../components/term/primitive/ground.rs | 23 +- .../components/term/primitive/variable.rs | 5 + .../term/primitive/variable/existential.rs | 6 +- .../term/primitive/variable/universal.rs | 6 +- nemo/src/rule_model/components/term/tuple.rs | 6 +- nemo/src/rule_model/error/hint/similar.rs | 4 +- nemo/src/rule_model/error/validation_error.rs | 39 +++ nemo/src/rule_model/program.rs | 6 +- 26 files changed, 511 insertions(+), 46 deletions(-) diff --git a/nemo/src/io/formats.rs b/nemo/src/io/formats.rs index 984a8169d..ca72a0464 100644 --- a/nemo/src/io/formats.rs +++ b/nemo/src/io/formats.rs @@ -118,6 +118,15 @@ pub(crate) enum Direction { Export, } +impl std::fmt::Display for Direction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Direction::Import => f.write_str("import"), + Direction::Export => f.write_str("export"), + } + } +} + /// A trait for exporting table data, e.g., to some file. // TODO Maybe this should be directly in io, since it is the interface to the OutputManager? pub trait TableWriter { diff --git a/nemo/src/io/formats/dsv/value_format.rs b/nemo/src/io/formats/dsv/value_format.rs index 4fae5e541..fad503c2c 100644 --- a/nemo/src/io/formats/dsv/value_format.rs +++ b/nemo/src/io/formats/dsv/value_format.rs @@ -7,7 +7,7 @@ use nemo_physical::datavalues::{AnyDataValue, DataValue, DataValueCreationError} use crate::{ parser::{ast::token::Token, input::ParserInput, ParserState}, - rule_model::components::term::{primitive::Primitive, tuple::Tuple, Term}, + rule_model::components::{import_export::ImportExportDirective, term::tuple::Tuple}, syntax::directive::value_formats, }; @@ -72,11 +72,11 @@ impl DsvValueFormats { let mut result = Vec::new(); for value in tuple.arguments() { - if let Term::Primitive(Primitive::Ground(ground)) = value { - if let Some(format) = DsvValueFormat::from_name(&ground.to_string()) { - result.push(format); - continue; - } + if let Some(format) = ImportExportDirective::plain_value(value) + .and_then(|name| DsvValueFormat::from_name(&name)) + { + result.push(format); + continue; } return None; diff --git a/nemo/src/io/formats/rdf/value_format.rs b/nemo/src/io/formats/rdf/value_format.rs index ee4ccc769..6c1ed53d6 100644 --- a/nemo/src/io/formats/rdf/value_format.rs +++ b/nemo/src/io/formats/rdf/value_format.rs @@ -18,7 +18,7 @@ use crate::{ #[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)] #[func(pub fn name(&self) -> &'static str)] #[func(pub fn from_name(name: &str) -> Option)] -pub(super) enum RdfValueFormat { +pub(crate) enum RdfValueFormat { /// General format that accepts any RDF term. #[assoc(name = value_formats::ANY)] #[assoc(from_name = value_formats::ANY)] diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index ac1552c0e..9a7e428d2 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -14,6 +14,7 @@ pub mod term; use std::fmt::{Debug, Display}; +use enum_assoc::Assoc; use term::primitive::variable::Variable; use super::{ @@ -23,26 +24,85 @@ use super::{ /// TODO: Think whether this is needed /// Types of [ProgramComponent]s -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Assoc, Debug, Copy, Clone, Eq, PartialEq)] +#[func(pub fn name(&self) -> &'static str)] pub enum ProgramComponentKind { /// Variable + #[assoc(name = "variable")] Variable, - /// Primitive groun term - PrimitiveGround, + /// Plain String + #[assoc(name = "string")] + PlainString, + /// Language tagged string + #[assoc(name = "language tagged string")] + LanguageTaggedString, + /// Iri + #[assoc(name = "iri")] + Iri, + /// Single precision point number + #[assoc(name = "float")] + Float, + /// Double precision floating point number + #[assoc(name = "double")] + Double, + /// Integer value + #[assoc(name = "integer")] + Integer, + /// Boolean + #[assoc(name = "boolean")] + Boolean, + /// Null + #[assoc(name = "null")] + Null, + /// Other + #[assoc(name = "data value")] + Other, /// Map + #[assoc(name = "map")] Map, /// Tuple + #[assoc(name = "tuple")] Tuple, /// Operation + #[assoc(name = "operation")] Operation, /// Function term + #[assoc(name = "function")] FunctionTerm, + /// Aggregation term + #[assoc(name = "aggregation")] + Aggregation, /// Atom + #[assoc(name = "atom")] Atom, + /// Literal + #[assoc(name = "literal")] + Literal, + /// Rule + #[assoc(name = "rule")] + Rule, + /// Fact + #[assoc(name = "fact")] + Fact, + /// Import + #[assoc(name = "import")] + Import, + /// Export + #[assoc(name = "export")] + Export, + /// Output + #[assoc(name = "output")] + Output, + /// Program + #[assoc(name = "program")] + Program, } /// Trait implemented by objects that are part of the logical rule model of the nemo language. pub trait ProgramComponent: Debug + Display { + /// Return the [ProgramComponentKind] of this component. + fn kind(&self) -> ProgramComponentKind; + /// Construct this object from a string. fn parse(_string: &str) -> Result where diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index f34cb7f01..cc2f0ddd2 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -10,7 +10,7 @@ use crate::rule_model::{ use super::{ tag::Tag, term::{primitive::variable::Variable, Term}, - IterableVariables, ProgramComponent, + IterableVariables, ProgramComponent, ProgramComponentKind, }; /// Atom @@ -156,6 +156,10 @@ impl ProgramComponent for Atom { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Atom + } } impl IterableVariables for Atom { diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index e3b4f11db..a6fd2130d 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -7,7 +7,9 @@ use crate::rule_model::{ origin::Origin, }; -use super::{atom::Atom, tag::Tag, term::Term, IterableVariables, ProgramComponent}; +use super::{ + atom::Atom, tag::Tag, term::Term, IterableVariables, ProgramComponent, ProgramComponentKind, +}; /// A (ground) fact #[derive(Debug, Clone, Eq)] @@ -131,4 +133,8 @@ impl ProgramComponent for Fact { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Fact + } } diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index 24c653b92..0f1d31662 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -5,21 +5,33 @@ pub mod attributes; pub mod compression; pub mod file_formats; -use std::{collections::HashMap, fmt::Display, hash::Hash}; +use std::{ + collections::{HashMap, HashSet}, + fmt::Display, + hash::Hash, +}; use attributes::ImportExportAttribute; -use file_formats::FileFormat; +use compression::CompressionFormat; +use file_formats::{AttributeRequirement, FileFormat}; use nemo_physical::datavalues::DataValue; use crate::{ - io::formats::dsv::value_format::DsvValueFormats, - rule_model::{error::ValidationErrorBuilder, origin::Origin}, + io::formats::{ + dsv::value_format::{DsvValueFormat, DsvValueFormats}, + rdf::value_format::RdfValueFormat, + Direction, + }, + rule_model::{ + error::{hint::Hint, validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, + }, }; use super::{ tag::Tag, term::{map::Map, primitive::Primitive, Term}, - ProgramComponent, + ProgramComponent, ProgramComponentKind, }; /// An import/export specification. This object captures all information that is typically @@ -77,7 +89,7 @@ impl ImportExportDirective { } /// Return a [HashMap] containing the attributes of this directive. - fn attribute_map(&self) -> HashMap { + pub fn attribute_map(&self) -> HashMap { let mut result = HashMap::new(); for (key, value) in self.attributes.key_value() { @@ -91,6 +103,22 @@ impl ImportExportDirective { result } + /// Return a [HahsMap] containing the attributes of this directive, + /// including the origin of each key. + fn attribute_map_key(&self) -> HashMap { + let mut result = HashMap::new(); + + for (key, value) in self.attributes.key_value() { + if let Some(name) = + Self::plain_value(&key).and_then(|plain| ImportExportAttribute::from_name(&plain)) + { + result.insert(name, (key.origin(), value)); + } + } + + result + } + /// Return the expected arity based on the format or given type information, /// /// Returns `None` if it is not possible to deduce this information. @@ -126,6 +154,195 @@ impl ImportExportDirective { } } +impl ImportExportDirective { + /// Validate directive + pub fn validate( + &self, + direction: Direction, + builder: &mut ValidationErrorBuilder, + ) -> Result<(), ()> { + if direction == Direction::Export && self.format == FileFormat::JSON { + builder.report_error( + self.origin.clone(), + ValidationErrorKind::UnsupportedJsonExport, + ); + return Err(()); + } + + let attributes = self.attribute_map_key(); + for (attribute, requirement) in self.format.attributes() { + if requirement == AttributeRequirement::Required && attributes.get(&attribute).is_none() + { + builder.report_error( + self.origin, + ValidationErrorKind::ImportExportMissingRequiredAttribute { + attribute: attribute.name().to_string(), + direction: direction.to_string(), + }, + ) + } + } + + let expected_attributes = self.format.attributes().keys().collect::>(); + for (attribute, (attribute_origin, value)) in attributes.iter() { + if !expected_attributes.contains(attribute) { + builder + .report_error( + attribute_origin, + ValidationErrorKind::ImportExportUnrecognizedAttribute { + format: self.format.name().to_string(), + attribute: attribute.name().to_string(), + }, + ) + .add_hint(Hint::similar( + "parameter", + attribute.name(), + expected_attributes.iter().map(|attribute| attribute.name()), + )) + } + + if attribute.value_type() != value.kind() { + builder.report_error( + value.origin().clone(), + ValidationErrorKind::ImportExportAttributeValueType { + parameter: attribute.name().to_string(), + given: value.kind().name.to_string(), + expected: attribute.value_type().name().to_string(), + }, + ); + + continue; + } + + let _ = match attribute { + ImportExportAttribute::Format => match self.format { + FileFormat::CSV | FileFormat::DSV | FileFormat::TSV => { + Self::validate_attribute_format_dsv(value, builder) + } + FileFormat::NTriples + | FileFormat::NQuads + | FileFormat::Turtle + | FileFormat::RDFXML + | FileFormat::TriG => Self::validate_attribute_format_rdf(value, builder), + FileFormat::JSON => Ok(()), + }, + ImportExportAttribute::Delimiter => Self::validate_delimiter(value, builder), + ImportExportAttribute::Compression => Self::validate_compression(value, builder), + ImportExportAttribute::Limit => Self::validate_limit(value, builder), + ImportExportAttribute::Base => Ok(()), + ImportExportAttribute::Resource => Ok(()), + }; + } + + Ok(()) + } + + /// Validate the format attribute for dsv + fn validate_attribute_format_dsv( + value: &Term, + builder: &mut ValidationErrorBuilder, + ) -> Result<(), ()> { + if let Term::Tuple(tuple) = value { + for argument in tuple.arguments() { + if ImportExportDirective::plain_value(value) + .and_then(|name| DsvValueFormat::from_name(&name)) + .is_none() + { + builder.report_error( + argument.origin().clone(), + ValidationErrorKind::ImportExportValueFormat { + file_format: String::from("dsv"), + }, + ); + + return Err(()); + } + } + + Ok(()) + } else { + unreachable!("value should be of correct type") + } + } + + /// Validate the format attribute for dsv + fn validate_attribute_format_rdf( + value: &Term, + builder: &mut ValidationErrorBuilder, + ) -> Result<(), ()> { + if let Term::Tuple(tuple) = value { + for argument in tuple.arguments() { + if ImportExportDirective::plain_value(value) + .and_then(|name| RdfValueFormat::from_name(&name)) + .is_none() + { + builder.report_error( + argument.origin().clone(), + ValidationErrorKind::ImportExportValueFormat { + file_format: String::from("rdf"), + }, + ); + + return Err(()); + } + } + + Ok(()) + } else { + unreachable!("value should be of correct type") + } + } + + /// Check if the delimiter is a single character string. + fn validate_delimiter(value: &Term, builder: &mut ValidationErrorBuilder) -> Result<(), ()> { + if let Some(delimiter) = ImportExportDirective::string_value(value) { + if delimiter.len() != 1 { + builder.report_error( + value.origin().clone(), + ValidationErrorKind::ImportExportDelimiter, + ); + + return Err(()); + } + } + + Ok(()) + } + + /// Check if the limit is a non-negative number. + fn validate_limit(value: &Term, builder: &mut ValidationErrorBuilder) -> Result<(), ()> { + if let Term::Primitive(Primitive::Ground(ground)) = value { + if !ground.value().fits_into_u64() { + builder.report_error( + value.origin().clone(), + ValidationErrorKind::ImportExportLimitNegative, + ); + return Err(()); + } + } + + Ok(()) + } + + /// Check if the compression format is supported. + fn validate_compression(value: &Term, builder: &mut ValidationErrorBuilder) -> Result<(), ()> { + if let Some(compression) = ImportExportDirective::string_value(value) { + if CompressionFormat::from_name(&compression).is_none() { + builder.report_error( + value.origin().clone(), + ValidationErrorKind::ImportExportUnknownCompression { + format: compression, + }, + ); + + return Err(()); + } + } + + Ok(()) + } +} + impl PartialEq for ImportExportDirective { fn eq(&self, other: &Self) -> bool { self.predicate == other.predicate @@ -219,6 +436,10 @@ impl ProgramComponent for ImportDirective { { todo!() } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Import + } } /// An export specification. @@ -298,4 +519,8 @@ impl ProgramComponent for ExportDirective { { todo!() } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Export + } } diff --git a/nemo/src/rule_model/components/import_export/attributes.rs b/nemo/src/rule_model/components/import_export/attributes.rs index 9bc16e828..b347e0d73 100644 --- a/nemo/src/rule_model/components/import_export/attributes.rs +++ b/nemo/src/rule_model/components/import_export/attributes.rs @@ -5,36 +5,43 @@ use std::{fmt::Display, hash::Hash}; use enum_assoc::Assoc; -use crate::syntax::import_export::attribute; +use crate::{rule_model::components::ProgramComponentKind, syntax::import_export::attribute}; /// Supported attributes in import/export directives #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq, Hash)] #[func(pub fn name(&self) -> &'static str)] #[func(pub fn from_name(name: &str) -> Option)] +#[func(pub fn value_type(&self) -> ProgramComponentKind)] pub enum ImportExportAttribute { /// Location of the file #[assoc(name = attribute::RESOURCE)] #[assoc(from_name = attribute::RESOURCE)] + #[assoc(value_type = ProgramComponentKind::PlainString)] Resource, /// Data types of the input relations #[assoc(name = attribute::FORMAT)] #[assoc(from_name = attribute::FORMAT)] + #[assoc(value_type = ProgramComponentKind::Tuple)] Format, /// Base IRI #[assoc(name = attribute::BASE)] #[assoc(from_name = attribute::BASE)] + #[assoc(value_type = ProgramComponentKind::Iri)] Base, /// Delimiter used to separate values #[assoc(name = attribute::DSV_DELIMITER)] #[assoc(from_name = attribute::DSV_DELIMITER)] + #[assoc(value_type = ProgramComponentKind::String)] Delimiter, /// Compression format #[assoc(name = attribute::COMPRESSION)] #[assoc(from_name = attribute::COMPRESSION)] + #[assoc(value_type = ProgramComponentKind::PlainString)] Compression, /// Limit import/export to first n number of facts #[assoc(name = attribute::LIMIT)] #[assoc(from_name = attribute::LIMIT)] + #[assoc(value_type = ProgramComponentKind::Integer)] Limit, } diff --git a/nemo/src/rule_model/components/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs index 5e9113cf3..cd9e4c354 100644 --- a/nemo/src/rule_model/components/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -33,7 +33,10 @@ pub enum FileFormat { #[assoc(from_name = file_format::CSV)] #[assoc(extension = file_format::EXTENSION_CSV)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Limit, AttributeRequirement::Optional), + (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] CSV, /// Delimiter-separated values @@ -41,7 +44,11 @@ pub enum FileFormat { #[assoc(from_name = file_format::DSV)] #[assoc(extension = file_format::EXTENSION_DSV)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Delimiter, AttributeRequirement::Required), + (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Limit, AttributeRequirement::Optional), + (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] DSV, /// Tab-separated values @@ -49,7 +56,10 @@ pub enum FileFormat { #[assoc(from_name = file_format::TSV)] #[assoc(extension = file_format::EXTENSION_TSV)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Limit, AttributeRequirement::Optional), + (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] TSV, /// JSON objects @@ -66,7 +76,11 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_NTRIPLES)] #[assoc(extension = file_format::EXTENSION_RDF_NTRIPLES)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Base, AttributeRequirement::Optional), + (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Limit, AttributeRequirement::Optional), + (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] #[assoc(arity = 3)] NTriples, @@ -75,7 +89,11 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_NQUADS)] #[assoc(extension = file_format::EXTENSION_RDF_NQUADS)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Base, AttributeRequirement::Optional), + (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Limit, AttributeRequirement::Optional), + (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] #[assoc(arity = 4)] NQuads, @@ -84,7 +102,11 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_TURTLE)] #[assoc(extension = file_format::EXTENSION_RDF_TURTLE)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Base, AttributeRequirement::Optional), + (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Limit, AttributeRequirement::Optional), + (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] #[assoc(arity = 3)] Turtle, @@ -93,7 +115,11 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_XML)] #[assoc(extension = file_format::EXTENSION_RDF_XML)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Base, AttributeRequirement::Optional), + (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Limit, AttributeRequirement::Optional), + (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] #[assoc(arity = 3)] RDFXML, @@ -102,7 +128,11 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_TRIG)] #[assoc(extension = file_format::EXTENSION_RDF_TRIG)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Base, AttributeRequirement::Optional), + (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Limit, AttributeRequirement::Optional), + (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] #[assoc(arity = 4)] TriG, diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index 3d3d38131..d99b6ad73 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -7,7 +7,7 @@ use crate::rule_model::error::{ValidationError, ValidationErrorBuilder}; use super::{ atom::Atom, term::{operation::Operation, primitive::variable::Variable, Term}, - IterableVariables, ProgramComponent, + IterableVariables, ProgramComponent, ProgramComponentKind, }; /// Literal @@ -83,6 +83,10 @@ impl ProgramComponent for Literal { Literal::Operation(literal) => literal.validate(builder), } } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Literal + } } impl IterableVariables for Literal { diff --git a/nemo/src/rule_model/components/output.rs b/nemo/src/rule_model/components/output.rs index 474209c49..153245aa8 100644 --- a/nemo/src/rule_model/components/output.rs +++ b/nemo/src/rule_model/components/output.rs @@ -4,7 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; -use super::{tag::Tag, ProgramComponent}; +use super::{tag::Tag, ProgramComponent, ProgramComponentKind}; /// Output directive /// @@ -77,4 +77,8 @@ impl ProgramComponent for Output { { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Output + } } diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 083d1cce4..414a6c876 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -18,7 +18,7 @@ use super::{ primitive::{variable::Variable, Primitive}, Term, }, - IterableVariables, ProgramComponent, + IterableVariables, ProgramComponent, ProgramComponentKind, }; /// Rule @@ -328,10 +328,7 @@ impl ProgramComponent for Rule { if let Some(hint) = Hint::similar( "variable", variable_name, - &safe_variables - .iter() - .flat_map(|variable| variable.name()) - .collect::>(), + safe_variables.iter().flat_map(|variable| variable.name()), ) { info.add_hint(hint); } @@ -395,6 +392,10 @@ impl ProgramComponent for Rule { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Rule + } } impl IterableVariables for Rule { diff --git a/nemo/src/rule_model/components/term.rs b/nemo/src/rule_model/components/term.rs index 98bab72c2..255331246 100644 --- a/nemo/src/rule_model/components/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -281,6 +281,17 @@ impl ProgramComponent for Term { Term::Tuple(term) => term.validate(builder), } } + + fn kind(&self) -> super::ProgramComponentKind { + match self { + Term::Primitive(term) => term.kind(), + Term::Aggregate(term) => term.kind(), + Term::FunctionTerm(term) => term.kind(), + Term::Map(term) => term.kind(), + Term::Operation(term) => term.kind(), + Term::Tuple(term) => term.kind(), + } + } } impl IterableVariables for Term { diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index fa9ceb3a6..b7cde3273 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -8,7 +8,7 @@ use strum_macros::EnumIter; use crate::{ rule_model::{ - components::{IterableVariables, ProgramComponent}, + components::{IterableVariables, ProgramComponent, ProgramComponentKind}, error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, }, @@ -222,6 +222,10 @@ impl ProgramComponent for Aggregate { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Aggregation + } } impl IterableVariables for Aggregate { diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index aca2bc756..44e756020 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::{tag::Tag, IterableVariables, ProgramComponent}, + components::{tag::Tag, IterableVariables, ProgramComponent, ProgramComponentKind}, error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -154,6 +154,10 @@ impl ProgramComponent for FunctionTerm { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::FunctionTerm + } } impl IterableVariables for FunctionTerm { diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index ecf35547b..5e5cd4779 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::{tag::Tag, IterableVariables, ProgramComponent}, + components::{tag::Tag, IterableVariables, ProgramComponent, ProgramComponentKind}, error::ValidationErrorBuilder, origin::Origin, }; @@ -142,6 +142,10 @@ impl ProgramComponent for Map { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Map + } } impl IterableVariables for Map { diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index 1df810e01..6aad88d90 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -7,7 +7,7 @@ use std::{fmt::Display, hash::Hash}; use operation_kind::OperationKind; use crate::rule_model::{ - components::{IterableVariables, ProgramComponent}, + components::{IterableVariables, ProgramComponent, ProgramComponentKind}, error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, }; @@ -237,6 +237,10 @@ impl ProgramComponent for Operation { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Operation + } } impl IterableVariables for Operation { diff --git a/nemo/src/rule_model/components/term/primitive.rs b/nemo/src/rule_model/components/term/primitive.rs index 83c02cfa8..df26357a4 100644 --- a/nemo/src/rule_model/components/term/primitive.rs +++ b/nemo/src/rule_model/components/term/primitive.rs @@ -10,7 +10,7 @@ use nemo_physical::datavalues::AnyDataValue; use variable::{existential::ExistentialVariable, universal::UniversalVariable, Variable}; use crate::rule_model::{ - components::{IterableVariables, ProgramComponent}, + components::{IterableVariables, ProgramComponent, ProgramComponentKind}, error::ValidationErrorBuilder, origin::Origin, }; @@ -147,6 +147,13 @@ impl ProgramComponent for Primitive { Primitive::Ground(ground) => ground.validate(builder), } } + + fn kind(&self) -> ProgramComponentKind { + match self { + Primitive::Variable(primitive) => primitive.kind(), + Primitive::Ground(primitive) => primitive.kind(), + } + } } impl IterableVariables for Primitive { diff --git a/nemo/src/rule_model/components/term/primitive/ground.rs b/nemo/src/rule_model/components/term/primitive/ground.rs index 97da5dc57..fee6d32e5 100644 --- a/nemo/src/rule_model/components/term/primitive/ground.rs +++ b/nemo/src/rule_model/components/term/primitive/ground.rs @@ -5,7 +5,7 @@ use std::{fmt::Display, hash::Hash}; use nemo_physical::datavalues::{AnyDataValue, DataValue, IriDataValue, ValueDomain}; use crate::rule_model::{ - components::{term::value_type::ValueType, ProgramComponent}, + components::{term::value_type::ValueType, ProgramComponent, ProgramComponentKind}, error::{ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -151,4 +151,25 @@ impl ProgramComponent for GroundTerm { { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + match self.value.value_domain() { + ValueDomain::PlainString => ProgramComponentKind::PlainString, + ValueDomain::LanguageTaggedString => ProgramComponentKind::LanguageTaggedString, + ValueDomain::Iri => ProgramComponentKind::Iri, + ValueDomain::Float => ProgramComponentKind::Float, + ValueDomain::Double => ProgramComponentKind::Double, + ValueDomain::UnsignedLong + | ValueDomain::NonNegativeLong + | ValueDomain::UnsignedInt + | ValueDomain::NonNegativeInt + | ValueDomain::Int + | ValueDomain::Long => ProgramComponentKind::Integer, + ValueDomain::Tuple => ProgramComponentKind::Tuple, + ValueDomain::Map => ProgramComponentKind::Map, + ValueDomain::Boolean => ProgramComponentKind::Boolean, + ValueDomain::Null => ProgramComponentKind::Null, + ValueDomain::Other => ProgramComponentKind::Other, + } + } } diff --git a/nemo/src/rule_model/components/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs index 610ab9e15..ff4ed6850 100644 --- a/nemo/src/rule_model/components/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -6,6 +6,7 @@ use existential::ExistentialVariable; use universal::UniversalVariable; use crate::rule_model::{ + components::ProgramComponentKind, error::{ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -147,4 +148,8 @@ impl ProgramComponent for Variable { Variable::Existential(existential) => existential.validate(builder), } } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Variable + } } diff --git a/nemo/src/rule_model/components/term/primitive/variable/existential.rs b/nemo/src/rule_model/components/term/primitive/variable/existential.rs index 619ce1f80..50706878f 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/existential.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::ProgramComponent, + components::{ProgramComponent, ProgramComponentKind}, error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -99,4 +99,8 @@ impl ProgramComponent for ExistentialVariable { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Variable + } } diff --git a/nemo/src/rule_model/components/term/primitive/variable/universal.rs b/nemo/src/rule_model/components/term/primitive/variable/universal.rs index 1868fc605..003420c3e 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/universal.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::ProgramComponent, + components::{ProgramComponent, ProgramComponentKind}, error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, origin::Origin, }; @@ -121,4 +121,8 @@ impl ProgramComponent for UniversalVariable { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Variable + } } diff --git a/nemo/src/rule_model/components/term/tuple.rs b/nemo/src/rule_model/components/term/tuple.rs index 941060970..57544520a 100644 --- a/nemo/src/rule_model/components/term/tuple.rs +++ b/nemo/src/rule_model/components/term/tuple.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::{IterableVariables, ProgramComponent}, + components::{IterableVariables, ProgramComponent, ProgramComponentKind}, error::ValidationErrorBuilder, origin::Origin, }; @@ -121,6 +121,10 @@ impl ProgramComponent for Tuple { Ok(()) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Tuple + } } impl IterableVariables for Tuple { diff --git a/nemo/src/rule_model/error/hint/similar.rs b/nemo/src/rule_model/error/hint/similar.rs index 9818a64d1..88b094aa8 100644 --- a/nemo/src/rule_model/error/hint/similar.rs +++ b/nemo/src/rule_model/error/hint/similar.rs @@ -11,10 +11,10 @@ const SIMILARITY_THRESHOLD: f64 = 0.8; impl Hint { /// Checks whether a similar string exist in a collection of source strings. /// Returns the most similar string, if there is one - pub fn similar( + pub fn similar>>( kind: &str, target: impl AsRef, - options: &[impl AsRef], + options: Options, ) -> Option { if target.as_ref().len() < SIMILARITY_MIN_LENGTH { return None; diff --git a/nemo/src/rule_model/error/validation_error.rs b/nemo/src/rule_model/error/validation_error.rs index e238b1247..72185a28c 100644 --- a/nemo/src/rule_model/error/validation_error.rs +++ b/nemo/src/rule_model/error/validation_error.rs @@ -85,6 +85,41 @@ pub enum ValidationErrorKind { #[assoc(code = 216)] #[assoc(note = "each predicate is only allowed to have one arity")] InconsistentArities { predicate: String }, + /// Import/Export: Missing required attribute + #[error(r#"missing required parameter `{attribute}` in {direction} statement"#)] + #[assoc(code = 217)] + ImportExportMissingRequiredAttribute { + attribute: String, + direction: String, + }, + /// Import/Export: Unrecognized parameter + #[error(r#"file format {format} does not recognize parameter `{attribute}`"#)] + #[assoc(code = 218)] + ImportExportUnrecognizedAttribute { format: String, attribute: String }, + /// Import/Export: wrong input type for resource attribute + #[error(r#"parameter `{parameter}` was given as a `{given}`, expected `{expected}`"#)] + #[assoc(code = 219)] + ImportExportAttributeValueType { + parameter: String, + given: String, + expected: String, + }, + /// Import/Export: dsv wrong value format + #[error(r#"unknown {file_format} value format"#)] + #[assoc(code = 220)] + ImportExportValueFormat { file_format: String }, + /// Import/Export: negative limit + #[error(r#"limit was negative"#)] + #[assoc(code = 221)] + ImportExportLimitNegative, + /// Import/Export: delimiter + #[error(r#"delimiter must be a single character"#)] + #[assoc(code = 222)] + ImportExportDelimiter, + /// Import/Export: unknown compression format + #[error(r#"unknown compression format `{format}`"#)] + #[assoc(code = 223)] + ImportExportUnknownCompression { format: String }, /// Unsupported feature: Multiple aggregates in one rule #[error(r#"multiple aggregates in one rule is currently unsupported"#)] @@ -102,4 +137,8 @@ pub enum ValidationErrorKind { #[assoc(code = 996)] #[error(r#"complex terms are currently unsupported"#)] UnsupportedComplexTerm, + /// Unsupported feature: Exporting Json files + #[error(r#"exporting in json is currently unsupported"#)] + #[assoc(code = 995)] + UnsupportedJsonExport, } diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index bef22456a..b9d7fd0a8 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -14,7 +14,7 @@ use super::{ literal::Literal, output::Output, rule::Rule, - ProgramComponent, + ProgramComponent, ProgramComponentKind, }, error::{ info::Info, validation_error::ValidationErrorKind, ComplexErrorLabelKind, @@ -245,6 +245,10 @@ impl ProgramComponent for Program { self.validate_global_properties(builder) } + + fn kind(&self) -> ProgramComponentKind { + ProgramComponentKind::Program + } } impl std::fmt::Display for Program { From c3ef79464f7462e2e65ef7e6ef0eaa6e13c2254a Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 5 Sep 2024 11:47:38 +0200 Subject: [PATCH 151/214] Move program analysis and variable order to chase program module --- nemo/src/chase_model/analysis.rs | 5 +++++ .../analysis.rs => chase_model/analysis/program_analysis.rs} | 0 .../analysis}/variable_order.rs | 0 3 files changed, 5 insertions(+) rename nemo/src/{program_analysis/analysis.rs => chase_model/analysis/program_analysis.rs} (100%) rename nemo/src/{program_analysis => chase_model/analysis}/variable_order.rs (100%) diff --git a/nemo/src/chase_model/analysis.rs b/nemo/src/chase_model/analysis.rs index e69de29bb..7f4a55724 100644 --- a/nemo/src/chase_model/analysis.rs +++ b/nemo/src/chase_model/analysis.rs @@ -0,0 +1,5 @@ +//! This module defines functionality for analyzing [ChaseProgram]s, +//! deriving information that is useful for planning execution plans for the physical layer. + +pub(crate) mod program_analysis; +pub(crate) mod variable_order; diff --git a/nemo/src/program_analysis/analysis.rs b/nemo/src/chase_model/analysis/program_analysis.rs similarity index 100% rename from nemo/src/program_analysis/analysis.rs rename to nemo/src/chase_model/analysis/program_analysis.rs diff --git a/nemo/src/program_analysis/variable_order.rs b/nemo/src/chase_model/analysis/variable_order.rs similarity index 100% rename from nemo/src/program_analysis/variable_order.rs rename to nemo/src/chase_model/analysis/variable_order.rs From 31c240ec7b64a7ade2c1eb340e0367a2bd98bc56 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 5 Sep 2024 11:48:14 +0200 Subject: [PATCH 152/214] Delete unused type inference code --- nemo/src/program_analysis/type_inference.rs | 909 ------------------ .../type_inference/position_graph.rs | 335 ------- .../type_inference/type_requirement.rs | 366 ------- 3 files changed, 1610 deletions(-) delete mode 100644 nemo/src/program_analysis/type_inference.rs delete mode 100644 nemo/src/program_analysis/type_inference/position_graph.rs delete mode 100644 nemo/src/program_analysis/type_inference/type_requirement.rs diff --git a/nemo/src/program_analysis/type_inference.rs b/nemo/src/program_analysis/type_inference.rs deleted file mode 100644 index 6f4c53e4d..000000000 --- a/nemo/src/program_analysis/type_inference.rs +++ /dev/null @@ -1,909 +0,0 @@ -use std::collections::HashMap; - -use crate::model::{ - chase_model::{ChaseAtom, ChaseProgram}, - types::error::TypeError, - Identifier, PrimitiveTerm, PrimitiveType, Variable, -}; - -use self::{ - position_graph::PositionGraph, - type_requirement::{ - merge_type_requirements, override_type_requirements, requirements_from_aggregates_in_rules, - requirements_from_existentials_in_rules, requirements_from_facts, - requirements_from_imports, requirements_from_literals_in_rules, - requirements_from_pred_decls, TypeRequirement, - }, -}; - -mod position_graph; -mod type_requirement; - -type PredicateTypes = HashMap>; -type VariableTypesForRules = Vec>; - -/// The Type Inference Result consists of a pair of predicate types and a list of variable types -/// (for each rule) if successful; otherwise an error is returned -type TypeInferenceResult = Result<(PredicateTypes, VariableTypesForRules), TypeError>; - -pub(super) fn infer_types(program: &ChaseProgram) -> TypeInferenceResult { - let pred_reqs = requirements_from_pred_decls(program.parsed_predicate_declarations())?; - let import_reqs = requirements_from_imports(program.imports())?; - let fact_reqs = requirements_from_facts(program.facts())?; - let literal_reqs = requirements_from_literals_in_rules(program.rules())?; - let aggregate_reqs = requirements_from_aggregates_in_rules(program.rules())?; - let existential_reqs = requirements_from_existentials_in_rules(program.rules())?; - - let mut type_requirements = import_reqs; - merge_type_requirements(&mut type_requirements, fact_reqs)?; - merge_type_requirements(&mut type_requirements, literal_reqs)?; - override_type_requirements(&mut type_requirements, pred_reqs); - merge_type_requirements(&mut type_requirements, aggregate_reqs)?; - merge_type_requirements(&mut type_requirements, existential_reqs)?; - - for (predicate, arity) in program.get_all_predicates() { - type_requirements - .entry(predicate) - .or_insert(vec![TypeRequirement::None; arity]); - } - - let position_graph = PositionGraph::from_rules(program.rules()); - - let type_requirements = position_graph.propagate_type_requirements(type_requirements)?; - - // All the types that are not set will be mapped to a default type - let pred_types = type_requirements - .into_iter() - .map(|(predicate, types)| { - ( - predicate, - types - .into_iter() - .map(|t| Option::::from(t).unwrap_or_default()) - .collect(), - ) - }) - .collect(); - - // we check that everything is consistent - position_graph.check_type_requirement_compatibility(&pred_types)?; - - let rule_var_types = get_rule_variable_types(program, &pred_types); - - check_for_incompatible_constant_types(program, &rule_var_types, &pred_types)?; - check_for_nonnumeric_arithmetic(program, &rule_var_types)?; - check_aggregate_types(program, &rule_var_types)?; - - Ok((pred_types, rule_var_types)) -} - -fn get_rule_variable_types( - program: &ChaseProgram, - pred_types: &PredicateTypes, -) -> VariableTypesForRules { - program - .rules() - .iter() - .map(|rule| { - let mut variable_types: HashMap = HashMap::new(); - - for atom in rule.head() { - for (term_position, term) in atom.terms().iter().enumerate() { - if let PrimitiveTerm::Variable(variable) = term { - variable_types.entry(variable.clone()).or_insert( - pred_types - .get(&atom.predicate()) - .expect("Every predicate should have received type information.") - [term_position], - ); - } - } - } - for atom in rule.all_body() { - for (term_position, variable) in atom.terms().iter().enumerate() { - variable_types.entry(variable.clone()).or_insert( - pred_types - .get(&atom.predicate()) - .expect("Every predicate should have received type information.") - [term_position], - ); - } - } - variable_types - }) - .collect() -} - -fn check_for_incompatible_constant_types( - program: &ChaseProgram, - rule_var_types: &VariableTypesForRules, - predicate_types: &PredicateTypes, -) -> Result<(), TypeError> { - for fact in program.facts() { - let predicate_types = predicate_types - .get(&fact.predicate()) - .expect("Previous analysis should have assigned a type vector to each predicate."); - - for (term_index, constant) in fact.terms().iter().enumerate() { - let logical_type = predicate_types[term_index]; - logical_type.ground_term_to_data_value_t(constant.clone())?; - } - } - - for (rule, var_types) in program.rules().iter().zip(rule_var_types) { - for constraint in rule.all_constraints() { - if let Some(example_variable) = constraint.variables().next() { - let variable_type = var_types - .get(example_variable) - .expect("Previous analysis should have assigned a type to each variable."); - - for term in [constraint.left(), constraint.right()] { - for primitive_term in term.primitive_terms() { - if let PrimitiveTerm::Constant(constant) = primitive_term { - variable_type.ground_term_to_data_value_t(constant.clone())?; - } - } - } - } - } - - for constructor in rule.constructors() { - let variable_type = var_types - .get(constructor.variable()) - .expect("Previous analysis should have assigned a type to each variable."); - - for term in constructor.term().primitive_terms() { - if let PrimitiveTerm::Constant(constant) = term { - variable_type.ground_term_to_data_value_t(constant.clone())?; - } - } - } - } - - Ok(()) -} - -fn check_for_nonnumeric_arithmetic( - program: &ChaseProgram, - rule_var_types: &VariableTypesForRules, -) -> Result<(), TypeError> { - for (rule, var_types) in program.rules().iter().zip(rule_var_types) { - for constraint in rule.all_constraints() { - if let Some(example_variable) = constraint.variables().next() { - let variable_type = var_types - .get(example_variable) - .expect("Previous analysis should have assigned a type to each variable."); - - // Note: For now, separating this two checks doesn't make much sense - // because every type that allows for instance an addition, also allows comparison such as < - - if constraint.is_numeric() && !variable_type.allows_numeric_operations() { - return Err(TypeError::InvalidRuleNonNumericComparison); - } - - if (!constraint.left().is_primitive() || !constraint.right().is_primitive()) - && !variable_type.allows_numeric_operations() - { - return Err(TypeError::InvalidRuleNonNumericArithmetic); - } - } - } - - for constructor in rule.constructors() { - let variable_type = var_types - .get(constructor.variable()) - .expect("Previous analysis should have assigned a type to each variable."); - - if !constructor.term().is_primitive() && !variable_type.allows_numeric_operations() { - return Err(TypeError::InvalidRuleNonNumericArithmetic); - } - } - } - - Ok(()) -} - -fn check_aggregate_types( - program: &ChaseProgram, - rule_var_types: &VariableTypesForRules, -) -> Result<(), TypeError> { - for (rule, var_types) in program.rules().iter().zip(rule_var_types) { - for aggregate in rule.aggregates() { - let variable_type = var_types.get(&aggregate.input_variables[0]).expect( - "Previous analysis should have assigned a type to each aggregate output variable.", - ); - - aggregate - .logical_aggregate_operation - .check_input_type(&aggregate.input_variables[0].name(), *variable_type)?; - } - } - - Ok(()) -} - -#[cfg(test)] -mod test { - use std::collections::HashMap; - - use crate::{ - io::formats::dsv::DsvHandler, - model::{ - chase_model::{ - ChaseFact, ChaseProgram, ChaseRule, Constructor, PrimitiveAtom, VariableAtom, - }, - Constant, Constraint, Identifier, NumericLiteral, PrimitiveTerm, PrimitiveType, Term, - TupleConstraint, Variable, - }, - program_analysis::{analysis::get_fresh_rule_predicate, type_inference::infer_types}, - }; - - type TestData = ( - (ChaseRule, ChaseRule, ChaseRule), - (ChaseFact, ChaseFact, ChaseFact), - ( - Identifier, - Identifier, - Identifier, - Identifier, - Identifier, - Identifier, - Identifier, - ), - ); - - fn get_test_rules_and_facts_and_predicates() -> TestData { - let a = Identifier("a".to_string()); - let b = Identifier("b".to_string()); - let c = Identifier("c".to_string()); - let r = Identifier("r".to_string()); - let s = Identifier("s".to_string()); - let t = Identifier("t".to_string()); - let q = Identifier("q".to_string()); - - let x = Variable::Universal(Identifier("x".to_string())); - let headop = Variable::Universal(Identifier("headop".to_string())); - let z = Variable::Existential(Identifier("z".to_string())); - - let tx = PrimitiveTerm::Variable(x.clone()); - let theadop = PrimitiveTerm::Variable(headop.clone()); - let tz = PrimitiveTerm::Variable(z); - - let v42 = Variable::Universal(Identifier("PLACEHOLDER_42".to_string())); - let c42 = Constant::NumericLiteral(NumericLiteral::Integer(42)); - let t42 = PrimitiveTerm::Constant(c42.clone()); - let tt42 = Term::Primitive(t42.clone()); - - let c3 = Constant::NumericLiteral(NumericLiteral::Integer(3)); - - let v7 = Variable::Universal(Identifier("PLACEHOLDER_7".to_string())); - let c7 = Constant::NumericLiteral(NumericLiteral::Integer(7)); - let t7 = PrimitiveTerm::Constant(c7.clone()); - let tt7 = Term::Primitive(t7.clone()); - - let tt55 = Term::Primitive(PrimitiveTerm::Constant(Constant::NumericLiteral( - NumericLiteral::Integer(55), - ))); - - // A(x) :- B(x), C(x). - let basic_rule = ChaseRule::new( - vec![PrimitiveAtom::new(a.clone(), vec![tx.clone()])], - vec![], - vec![], - vec![ - VariableAtom::new(b.clone(), vec![x.clone()]), - VariableAtom::new(c.clone(), vec![x.clone()]), - ], - vec![], - vec![], - vec![], - ); - - // R(x, !z) :- A(x). - let exis_rule = ChaseRule::new( - vec![PrimitiveAtom::new(r.clone(), vec![tx.clone(), tz])], - vec![], - vec![], - vec![VariableAtom::new(a.clone(), vec![x.clone()])], - vec![], - vec![], - vec![], - ); - - // S(x, 55) :- T(42, x), Q(7). - let rule_with_constant = ChaseRule::new( - vec![PrimitiveAtom::new(s.clone(), vec![tx.clone(), theadop])], - vec![Constructor::new(headop, tt55)], - vec![], - vec![ - VariableAtom::new(t.clone(), vec![v42.clone(), x]), - VariableAtom::new(q.clone(), vec![v7.clone()]), - ], - vec![ - Constraint::Equals(Term::Primitive(PrimitiveTerm::Variable(v42)), tt42), - Constraint::Equals(Term::Primitive(PrimitiveTerm::Variable(v7)), tt7), - ], - vec![], - vec![], - ); - - let fact1 = ChaseFact::new(t.clone(), vec![c42.clone(), c3.clone()]); - let fact2 = ChaseFact::new(t.clone(), vec![c3, c7.clone()]); - let fact3 = ChaseFact::new(t.clone(), vec![c7, c42]); - - ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) - } - - #[test] - fn infer_types_no_decl() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let no_decl = ChaseProgram::builder() - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::Any]), - (b, vec![PrimitiveType::Any]), - (c, vec![PrimitiveType::Any]), - (r, vec![PrimitiveType::Any, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&no_decl).unwrap().0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_a_string_decl() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let a_string_decl = ChaseProgram::builder() - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(a.clone(), vec![PrimitiveType::String]) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::String]), - (b, vec![PrimitiveType::Any]), - (c, vec![PrimitiveType::Any]), - (r, vec![PrimitiveType::String, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&a_string_decl).unwrap().0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_a_int_decl() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let a_int_decl = ChaseProgram::builder() - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(a.clone(), vec![PrimitiveType::Integer]) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::Integer]), - (b, vec![PrimitiveType::Any]), - (c, vec![PrimitiveType::Any]), - (r, vec![PrimitiveType::Integer, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&a_int_decl).unwrap().0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_b_string_decl() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let b_string_decl = ChaseProgram::builder() - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(b.clone(), vec![PrimitiveType::String]) - .build(); - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::String]), - (b, vec![PrimitiveType::String]), - (c, vec![PrimitiveType::Any]), - (r, vec![PrimitiveType::String, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&b_string_decl).unwrap().0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_b_and_c_int_decl() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let b_integer_decl = ChaseProgram::builder() - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(b.clone(), vec![PrimitiveType::Integer]) - .predicate_declaration(c.clone(), vec![PrimitiveType::Integer]) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::Integer]), - (b, vec![PrimitiveType::Integer]), - (c, vec![PrimitiveType::Integer]), - (r, vec![PrimitiveType::Integer, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&b_integer_decl).unwrap().0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_b_source_decl() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let b_source_decl = ChaseProgram::builder() - .import( - DsvHandler::csv() - .try_into_import(String::new(), b.clone(), TupleConstraint::from_arity(1)) - .unwrap(), - ) - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::String]), - (b, vec![PrimitiveType::String]), - (c, vec![PrimitiveType::Any]), - (r, vec![PrimitiveType::String, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&b_source_decl).unwrap().0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_c_explicit_decl_overrides_source_type() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let c_explicit_decl_overrides_source_type = ChaseProgram::builder() - .import( - DsvHandler::csv() - .try_into_import(String::new(), c.clone(), TupleConstraint::from_arity(1)) - .unwrap(), - ) - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(b.clone(), vec![PrimitiveType::Integer]) - .predicate_declaration(c.clone(), vec![PrimitiveType::Integer]) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::Integer]), - (b, vec![PrimitiveType::Integer]), - (c, vec![PrimitiveType::Integer]), - (r, vec![PrimitiveType::Integer, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&c_explicit_decl_overrides_source_type) - .unwrap() - .0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_a_and_c_conflict_with_implicit_source_decl() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, _b, c, _r, _s, _t, _q), - ) = get_test_rules_and_facts_and_predicates(); - - let a_and_c_conflict_with_implicit_source_decl = ChaseProgram::builder() - .import( - DsvHandler::csv() - .try_into_import(String::new(), c, TupleConstraint::from_arity(1)) - .unwrap(), - ) - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(a, vec![PrimitiveType::Integer]) - .build(); - - let inferred_types_res = infer_types(&a_and_c_conflict_with_implicit_source_decl); - assert!(inferred_types_res.is_err()); - } - - #[test] - fn infer_types_a_and_c_conflict_with_explicit_source_decl_that_would_be_compatible_the_other_way_around( - ) { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, _b, c, _r, _s, _t, _q), - ) = get_test_rules_and_facts_and_predicates(); - - let a_and_c_conflict_with_explicit_source_decl_that_would_be_compatible_the_other_way_around = - ChaseProgram::builder() - .import( - DsvHandler::csv() - .try_into_import( - String::new(), - c, - TupleConstraint::at_least([PrimitiveType::Any]), - ) - .unwrap(), - ) - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(a, vec![PrimitiveType::String]) - .build(); - - let inferred_types_res = - infer_types(&a_and_c_conflict_with_explicit_source_decl_that_would_be_compatible_the_other_way_around); - assert!(inferred_types_res.is_err()); - } - - #[test] - fn infer_types_a_and_b_source_decl_resolvable_conflict() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let a_and_b_source_decl_resolvable_conflict = ChaseProgram::builder() - .import( - DsvHandler::csv() - .try_into_import(String::new(), b.clone(), TupleConstraint::from_arity(1)) - .unwrap(), - ) - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(a.clone(), vec![PrimitiveType::Any]) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::Any]), - (b, vec![PrimitiveType::String]), - (c, vec![PrimitiveType::Any]), - (r, vec![PrimitiveType::Any, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&a_and_b_source_decl_resolvable_conflict) - .unwrap() - .0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_r_source_decl_resolvable_conflict_with_exis() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let r_source_decl_resolvable_conflict_with_exis = ChaseProgram::builder() - .import( - DsvHandler::csv() - .try_into_import(String::new(), r.clone(), TupleConstraint::from_arity(2)) - .unwrap(), - ) - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::Any]), - (b, vec![PrimitiveType::Any]), - (c, vec![PrimitiveType::Any]), - (r, vec![PrimitiveType::String, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&r_source_decl_resolvable_conflict_with_exis) - .unwrap() - .0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_b_and_c_conflict_decl() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (_a, b, c, _r, _s, _t, _q), - ) = get_test_rules_and_facts_and_predicates(); - - let b_and_c_conflict_decl = ChaseProgram::builder() - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(b, vec![PrimitiveType::Integer]) - .predicate_declaration(c, vec![PrimitiveType::String]) - .build(); - - let inferred_types_res = infer_types(&b_and_c_conflict_decl); - assert!(inferred_types_res.is_err()); - } - - #[test] - fn infer_types_b_anc_c_conflict_decl_resolvable() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, r, s, t, q), - ) = get_test_rules_and_facts_and_predicates(); - - let b_and_c_conflict_decl_resolvable = ChaseProgram::builder() - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(b.clone(), vec![PrimitiveType::Any]) - .predicate_declaration(c.clone(), vec![PrimitiveType::String]) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::Any]), - (b, vec![PrimitiveType::Any]), - (c, vec![PrimitiveType::String]), - (r, vec![PrimitiveType::Any, PrimitiveType::Any]), - (s, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (t, vec![PrimitiveType::Integer, PrimitiveType::Integer]), - (q, vec![PrimitiveType::Integer]), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&b_and_c_conflict_decl_resolvable).unwrap().0; - assert_eq!(inferred_types, expected_types); - } - - #[test] - fn infer_types_a_unresolvable_conflict_with_source_decls_of_b_and_c() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (a, b, c, _r, _s, _t, _q), - ) = get_test_rules_and_facts_and_predicates(); - - let a_unresolvable_conflict_with_source_decls_of_b_and_c = ChaseProgram::builder() - .import( - DsvHandler::csv() - .try_into_import( - String::new(), - b, - TupleConstraint::at_least([PrimitiveType::Integer]), - ) - .unwrap(), - ) - .import( - DsvHandler::csv() - .try_into_import( - String::new(), - c, - TupleConstraint::at_least([PrimitiveType::Integer]), - ) - .unwrap(), - ) - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(a, vec![PrimitiveType::Any]) - .build(); - - let inferred_types_res = infer_types(&a_unresolvable_conflict_with_source_decls_of_b_and_c); - assert!(inferred_types_res.is_err()); - } - - #[test] - fn infer_types_s_decl_unresolvable_conflict_with_fact_values() { - let ( - (basic_rule, exis_rule, rule_with_constant), - (fact1, fact2, fact3), - (_a, _b, _c, _r, s, _t, _q), - ) = get_test_rules_and_facts_and_predicates(); - - let s_decl_unresolvable_conflict_with_fact_values = ChaseProgram::builder() - .rule(basic_rule) - .rule(exis_rule) - .rule(rule_with_constant) - .fact(fact1) - .fact(fact2) - .fact(fact3) - .predicate_declaration(s, vec![PrimitiveType::Any, PrimitiveType::Integer]) - .build(); - - let inferred_types_res = infer_types(&s_decl_unresolvable_conflict_with_fact_values); - assert!(inferred_types_res.is_err()); - } - - #[test] - fn infer_types_two_times_same_head_predicate() { - let ( - (_basic_rule, exis_rule, _rule_with_constant), - (_fact1, _fact2, _fact3), - (a, _b, _c, r, _s, _t, _q), - ) = get_test_rules_and_facts_and_predicates(); - - let x = Variable::Universal(Identifier("x".to_string())); - let y = Variable::Existential(Identifier("y".to_string())); - let z = Variable::Existential(Identifier("z".to_string())); - - let ty = PrimitiveTerm::Variable(y); - let tz = PrimitiveTerm::Variable(z); - - // R(!y, !z) :- A(x). - let exis_rule_2 = ChaseRule::new( - vec![PrimitiveAtom::new(r.clone(), vec![ty, tz])], - vec![], - vec![], - vec![VariableAtom::new(a.clone(), vec![x])], - vec![], - vec![], - vec![], - ); - - let two_times_same_head_predicate = ChaseProgram::builder() - .rule(exis_rule_2) - .rule(exis_rule) - .predicate_declaration(a.clone(), vec![PrimitiveType::String]) - .build(); - - let expected_types: HashMap> = [ - (a, vec![PrimitiveType::String]), - (r, vec![PrimitiveType::Any, PrimitiveType::Any]), - ( - get_fresh_rule_predicate(0), - vec![PrimitiveType::Any, PrimitiveType::Any], - ), - (get_fresh_rule_predicate(1), vec![PrimitiveType::Any]), - ] - .into_iter() - .collect(); - - let inferred_types = infer_types(&two_times_same_head_predicate).unwrap().0; - assert_eq!(inferred_types, expected_types); - } -} diff --git a/nemo/src/program_analysis/type_inference/position_graph.rs b/nemo/src/program_analysis/type_inference/position_graph.rs deleted file mode 100644 index bd1df69a1..000000000 --- a/nemo/src/program_analysis/type_inference/position_graph.rs +++ /dev/null @@ -1,335 +0,0 @@ -use std::collections::{HashMap, HashSet}; - -use petgraph::{ - visit::{Dfs, EdgeFiltered}, - Directed, -}; - -use crate::{ - model::{ - chase_model::{ChaseAtom, ChaseRule}, - types::error::TypeError, - Identifier, PrimitiveTerm, PrimitiveType, Variable, - }, - util::labeled_graph::LabeledGraph, -}; - -use super::type_requirement::PredicateTypeRequirements; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(super) struct PredicatePosition { - pub(super) predicate: Identifier, - pub(super) position: usize, -} - -impl PredicatePosition { - /// Create new [PredicatePosition]. - pub fn new(predicate: Identifier, position: usize) -> Self { - Self { - predicate, - position, - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub(super) enum PositionGraphEdge { - WithinBody, - BodyToHeadSameVariable, - /// Describes data flow from an aggregate input variable to and aggregate output variable, where the output variable has a static type (see [AggregateOperation::static_output_type]). - /// This has no impact on type propagation from input to output, but is there for completeness sake, as it still describes some data flow. - BodyToHeadAggregateStaticOutputType, - /// Describes data flow from an aggregate input variable to and aggregate output variable, where the aggregate output variable has the same type as the input variable (see [AggregateOperation::static_output_type]). - BodyToHeadAggregateNonStaticOutputType, -} - -pub(super) type PositionGraph = LabeledGraph; - -impl PositionGraph { - pub(super) fn from_rules(rules: &Vec) -> PositionGraph { - let mut graph = PositionGraph::default(); - - for rule in rules { - let mut variables_to_head_positions = - HashMap::>::new(); - - for atom in rule.head() { - for (term_position, term) in atom.terms().iter().enumerate() { - if let PrimitiveTerm::Variable(variable) = term { - let predicate_position = - PredicatePosition::new(atom.predicate(), term_position); - - variables_to_head_positions - .entry(variable.clone()) - .and_modify(|e| e.push(predicate_position.clone())) - .or_insert(vec![predicate_position]); - } - } - } - - let mut aggregate_input_to_output_variables = - HashMap::>::new(); - for aggregate in rule.aggregates() { - for input_variable_identifier in &aggregate.input_variables { - let edge_label = if aggregate.aggregate_operation.static_output_type().is_some() - { - PositionGraphEdge::BodyToHeadAggregateStaticOutputType - } else { - PositionGraphEdge::BodyToHeadAggregateNonStaticOutputType - }; - - aggregate_input_to_output_variables - .entry(input_variable_identifier.clone()) - .or_default() - .push((aggregate.output_variable.clone(), edge_label)); - } - } - - let mut variables_to_last_node = HashMap::::new(); - - for atom in rule.all_body() { - for (term_position, variable) in atom.terms().iter().enumerate() { - let predicate_position = - PredicatePosition::new(atom.predicate(), term_position); - - // Add head position edges - { - // NOTE: we connect each body position to each head position of the same variable - for pos in variables_to_head_positions - .get(variable) - .into_iter() - .flatten() - { - graph.add_edge( - predicate_position.clone(), - pos.clone(), - PositionGraphEdge::BodyToHeadSameVariable, - ); - } - - // NOTE: we connect every aggregate input variable to it's corresponding output variable in the head. - // If there the output variable is used inside a constructor, we connect the aggregate input variable to the constructor instead - // Aggregate output variables may be used multiple times in the future, possibly also inside of constructors - for (output_variable, edge_label) in aggregate_input_to_output_variables - .get(variable) - .into_iter() - .flatten() - { - // The aggregate output is used in all head atoms that use the aggregate output variable - let mut variables_using_aggregate_variable = vec![output_variable]; - - // Furthermore, all constructors that use the aggregate output variable should get the same type restrictions - for constructor in rule.constructors() { - if constructor - .term() - .primitive_terms() - .contains(&&PrimitiveTerm::Variable(output_variable.clone())) - { - variables_using_aggregate_variable.push(constructor.variable()); - } - } - - // Head positions that use the body variable through aggregates/constructors - let relevant_head_positions = variables_using_aggregate_variable - .into_iter() - .filter_map(|variable| variables_to_head_positions.get(variable)) - .flatten(); - - for pos in relevant_head_positions { - graph.add_edge( - predicate_position.clone(), - pos.clone(), - *edge_label, - ); - } - } - } - - // NOTE: we do not fully interconnect body positions as we start DFS from - // each possible position later covering all possible combinations - // nonetheless - variables_to_last_node - .entry(variable.clone()) - .and_modify(|entry| { - let last_position = - std::mem::replace(entry, predicate_position.clone()); - graph.add_edge( - last_position.clone(), - predicate_position.clone(), - PositionGraphEdge::WithinBody, - ); - graph.add_edge( - predicate_position.clone(), - last_position, - PositionGraphEdge::WithinBody, - ); - }) - .or_insert(predicate_position); - } - } - - for constructor in rule.constructors() { - for head_position in variables_to_head_positions - .get(constructor.variable()) - .unwrap_or(&[]) - { - for term in constructor.term().primitive_terms() { - if let PrimitiveTerm::Variable(body_variable) = term { - // There might be no entry in `variables_to_last_node`, e.g. when the variable is an aggregate output variable, because in this case the variable is not in the body - if let Some(body_position) = variables_to_last_node.get(body_variable) { - graph.add_edge( - body_position.clone(), - head_position.clone(), - PositionGraphEdge::BodyToHeadSameVariable, - ); - } - } - } - } - } - - for constraint in rule.all_constraints() { - let variables = constraint - .left() - .variables() - .chain(constraint.right().variables()); - let next_variables = constraint - .left() - .variables() - .chain(constraint.right().variables()) - .skip(1); - - for (current_variable, next_variable) in variables.zip(next_variables) { - let position_current = variables_to_last_node - .get(current_variable) - .expect("Variables in filters should also appear in the rule body") - .clone(); - let position_next = variables_to_last_node - .get(next_variable) - .expect("Variables in filters should also appear in the rule body") - .clone(); - - graph.add_edge( - position_current.clone(), - position_next.clone(), - PositionGraphEdge::WithinBody, - ); - graph.add_edge( - position_next, - position_current, - PositionGraphEdge::WithinBody, - ); - } - } - } - - graph - } - - fn dfs_for_type_requirements_check( - &self, - edge_types: HashSet, - start_position: PredicatePosition, - mut payload: impl FnMut(&PredicatePosition) -> Result<(), TypeError>, - ) -> Result<(), TypeError> { - if let Some(start_node) = self.get_node(&start_position) { - let edge_filtered_graph = - EdgeFiltered::from_fn(self.graph(), |e| edge_types.contains(e.weight())); - - let mut dfs = Dfs::new(&edge_filtered_graph, start_node); - - while let Some(next_node) = dfs.next(&edge_filtered_graph) { - let next_position = self - .graph() - .node_weight(next_node) - .expect("The DFS iterator guarantees that every node exists."); - - payload(next_position)?; - } - } - - Ok(()) - } - - pub(super) fn propagate_type_requirements( - &self, - reqs: PredicateTypeRequirements, - ) -> Result { - let mut propagated_reqs = reqs.clone(); - - // Propagate each type from its declaration - for (predicate, types) in reqs.into_iter() { - for (position, logical_type_requirement) in types.iter().enumerate() { - let predicate_position = PredicatePosition::new(predicate.clone(), position); - - self.dfs_for_type_requirements_check( - HashSet::from([ - PositionGraphEdge::BodyToHeadSameVariable, - PositionGraphEdge::BodyToHeadAggregateNonStaticOutputType, - ]), - predicate_position, - |next_position| { - let current_type_requirement = &mut propagated_reqs - .get_mut(&next_position.predicate) - .expect("The initialization step inserted every known predicate") - [next_position.position]; - - if let Some(replacement) = current_type_requirement - .replace_with_max_type_if_compatible(*logical_type_requirement) - { - *current_type_requirement = replacement; - Ok(()) - } else { - Err(TypeError::InvalidRuleConflictingTypes( - next_position.predicate.0.clone(), - next_position.position + 1, - Option::::from(*current_type_requirement) - .expect("if the type requirement is none, there is a maximum"), - Option::::from(*logical_type_requirement) - .expect("if the type requirement is none, there is a maximum"), - )) - } - }, - )?; - } - } - - Ok(propagated_reqs) - } - - pub(super) fn check_type_requirement_compatibility( - &self, - reqs: &HashMap>, - ) -> Result<(), TypeError> { - // Check compatibility of body types without overwriting - for (predicate, types) in reqs { - for (position, logical_type) in types.iter().enumerate() { - let predicate_position = PredicatePosition::new(predicate.clone(), position); - - self.dfs_for_type_requirements_check( - HashSet::from([PositionGraphEdge::WithinBody]), - predicate_position, - |next_position| { - let current_type = &reqs - .get(&next_position.predicate) - .expect("The initialization step inserted every known predicate") - [next_position.position]; - - current_type.partial_cmp(logical_type).map(|_| ()).ok_or( - // TODO: maybe just throw a warning here? (comparison of incompatible - // types can be done but will trivially result in inequality) - TypeError::InvalidRuleConflictingTypes( - next_position.predicate.0.clone(), - next_position.position + 1, - *current_type, - *logical_type, - ), - ) - }, - )?; - } - } - - Ok(()) - } -} diff --git a/nemo/src/program_analysis/type_inference/type_requirement.rs b/nemo/src/program_analysis/type_inference/type_requirement.rs deleted file mode 100644 index 02b55d8c3..000000000 --- a/nemo/src/program_analysis/type_inference/type_requirement.rs +++ /dev/null @@ -1,366 +0,0 @@ -use std::collections::HashMap; - -use nemo_physical::aggregates::operation::AggregateOperation; - -use crate::model::{ - chase_model::{ - variable::{is_aggregate_variable, is_construct_variable}, - ChaseAtom, ChaseFact, ChaseRule, Constructor, - }, - types::error::TypeError, - Identifier, ImportDirective, PrimitiveTerm, PrimitiveType, Term, TypeConstraint, Variable, -}; - -#[derive(Clone, Copy, Debug, PartialEq)] -pub(super) enum TypeRequirement { - Hard(PrimitiveType), - Soft(PrimitiveType), - None, -} - -impl TypeRequirement { - pub(super) fn stricter_requirement(self, other: Self) -> Option { - match self { - Self::Hard(t1) => match other { - Self::Hard(t2) => (t1 == t2).then_some(self), - Self::Soft(t2) => (t1 >= t1.max_type(&t2)).then_some(self), - Self::None => Some(self), - }, - Self::Soft(t1) => match other { - Self::Hard(t2) => (t1.max_type(&t2) <= t2).then_some(other), - Self::Soft(t2) => Some(Self::Soft(t1.max_type(&t2))), - Self::None => Some(self), - }, - Self::None => Some(other), - } - } - - pub(super) fn replace_with_max_type_if_compatible(self, other: Self) -> Option { - match self { - Self::Hard(t1) => match other { - Self::Hard(t2) => (t1 >= t2).then_some(self), - Self::Soft(t2) => { - let max = t1.max_type(&t2); - // check if the max type is compatible with both types via partial ord - (t1 >= max && max >= t2).then_some(self) - } - Self::None => Some(self), - }, - Self::Soft(t1) => match other { - Self::Hard(t2) | Self::Soft(t2) => { - let max = t1.max_type(&t2); - // check if the max type is compatible with both types via partial ord - (max >= t1 && max >= t2).then_some(Self::Soft(max)) - } - Self::None => Some(self), - }, - Self::None => match other { - Self::Hard(t2) | Self::Soft(t2) => Some(Self::Soft(t2)), - Self::None => Some(Self::None), - }, - } - } -} - -impl From for TypeRequirement { - fn from(value: TypeConstraint) -> Self { - match value { - TypeConstraint::None => TypeRequirement::None, - TypeConstraint::Exact(p) => TypeRequirement::Hard(p), - TypeConstraint::AtLeast(p) => TypeRequirement::Soft(p), - TypeConstraint::Tuple(_) => { - unimplemented!("currently nested type checking is not supported") - } - } - } -} - -impl From for Option { - fn from(source: TypeRequirement) -> Self { - match source { - TypeRequirement::Hard(t) => Some(t), - TypeRequirement::Soft(t) => Some(t), - TypeRequirement::None => None, - } - } -} - -pub(super) type PredicateTypeRequirements = HashMap>; - -fn add_type_requirements( - current_reqs: &mut PredicateTypeRequirements, - pred: Identifier, - reqs_to_add: Vec, - // force_use_of_stricter_requirement: bool, -) -> Result<(), TypeError> { - let mut types_not_in_conflict: Result<_, _> = Ok(()); - - current_reqs - .entry(pred.clone()) - .and_modify(|types| { - types - .iter_mut() - .zip(reqs_to_add.iter()) - .enumerate() - .for_each(|(index, (a, b))| { - let replacement = a.stricter_requirement(*b); - - match replacement { - Some(replacement) => *a = replacement, - None => { - types_not_in_conflict = Err(TypeError::InvalidRuleConflictingTypes( - pred.0.clone(), - index, - Option::::from(*a) - .expect("if the type requirement is none, there is a maximum"), - Option::::from(*b) - .expect("if the type requirement is none, there is a maximum"), - )) - } - } - }); - }) - .or_insert(reqs_to_add); - - types_not_in_conflict -} - -pub(super) fn merge_type_requirements( - a: &mut PredicateTypeRequirements, - b: PredicateTypeRequirements, - // force_use_of_stricter_requirement: bool, -) -> Result<(), TypeError> { - for (pred, reqs) in b { - add_type_requirements(a, pred, reqs)? - } - - Ok(()) -} - -pub(super) fn override_type_requirements( - a: &mut PredicateTypeRequirements, - b: PredicateTypeRequirements, -) { - a.extend(b) -} - -pub(super) fn requirements_from_pred_decls( - decls: &HashMap>, -) -> Result { - let mut type_requirements = HashMap::new(); - - for (pred, types) in decls { - add_type_requirements( - &mut type_requirements, - pred.clone(), - types - .iter() - .copied() - .map(TypeRequirement::Hard) - .collect::>(), - )?; - } - - Ok(type_requirements) -} - -pub(super) fn requirements_from_imports<'a, T: Iterator>( - imports: T, -) -> Result { - let mut type_requirements = HashMap::new(); - - for import_spec in imports { - add_type_requirements( - &mut type_requirements, - import_spec.predicate().clone(), - import_spec - .type_constraint() - .iter() - .cloned() - .map(TypeRequirement::from) - .collect(), - )?; - } - - Ok(type_requirements) -} - -pub(super) fn requirements_from_facts( - facts: &Vec, -) -> Result { - let mut fact_decls: PredicateTypeRequirements = HashMap::new(); - for fact in facts { - let reqs_for_fact: Vec = fact - .terms() - .iter() - .map(|c| c.primitive_type()) - .map(|maybe_type| match maybe_type { - Some(primitive_type) => TypeRequirement::Soft(primitive_type), - None => TypeRequirement::None, - }) - .collect(); - - add_type_requirements(&mut fact_decls, fact.predicate(), reqs_for_fact)?; - } - - Ok(fact_decls) -} - -pub(super) fn requirements_from_literals_in_rules( - rules: &Vec, -) -> Result { - let mut literal_decls: PredicateTypeRequirements = HashMap::new(); - - for chase_rule in rules { - let constructors = chase_rule.constructors(); - let constraints: Vec<_> = chase_rule.all_constraints().collect(); - - for chase_atom in chase_rule - .all_body() - .cloned() - .map(Into::into) - .chain(chase_rule.head().iter().cloned()) - { - let reqs_for_atom: Vec = chase_atom - .terms() - .iter() - .map(|term| match term { - // TODO: should we respect other things here? - // variables nested in terms in constraints in particular? - PrimitiveTerm::Variable(v) => constructors - .iter() - .filter_map(|c| (c.variable() == v).then_some(c.term())) - .chain(constraints.iter().filter_map(|c| { - match c.left() { - Term::Primitive(PrimitiveTerm::Variable(var)) => (*term - == PrimitiveTerm::Variable(var.clone())) - .then_some(c.right()), - _ => match c.right() { - Term::Primitive(PrimitiveTerm::Variable(var)) => (*term - == PrimitiveTerm::Variable(var.clone())) - .then_some(c.left()), - _ => None, - }, - } - })) - .fold(None, |acc, t| { - let b = t.primitive_type(); - acc.map(|a: PrimitiveType| b.map(|b| a.max_type(&b)).unwrap_or(a)) - .or(b) - }), - _ => term.primitive_type(), - }) - .map(|opt_t| { - opt_t - .map(TypeRequirement::Soft) - .unwrap_or(TypeRequirement::None) - }) - .collect(); - - add_type_requirements(&mut literal_decls, chase_atom.predicate(), reqs_for_atom)?; - } - } - - Ok(literal_decls) -} - -pub(super) fn aggregate_output_variables_in_constructor( - constructor: &Constructor, -) -> impl Iterator { - constructor - .term() - .primitive_terms() - .into_iter() - .filter_map(|term| match term { - PrimitiveTerm::Variable(variable) => { - if is_aggregate_variable(variable) { - Some(variable) - } else { - None - } - } - _ => None, - }) -} - -pub(super) fn requirements_from_aggregates_in_rules( - rules: &[ChaseRule], -) -> Result { - let mut type_requirements = HashMap::new(); - - for (rule, atom) in rules - .iter() - .flat_map(|rule| rule.head().iter().map(move |atom| (rule, atom))) - { - add_type_requirements( - &mut type_requirements, - atom.predicate(), - atom.terms() - .iter() - .map(|term| { - if let PrimitiveTerm::Variable(variable) = term { - // Add static output type to aggregate output variables and constructors using them - - let aggregate_output_variables_used = if is_aggregate_variable(variable) { - // If the term is a aggregate variable it self, add it - vec![variable] - } else if is_construct_variable(variable) { - let constructor = rule - .constructors() - .iter() - .find(|constructor| constructor.variable() == variable).expect("variable with constructor prefix is missing an associated constructor"); - - // Add aggregate variables contained in this constructor - aggregate_output_variables_in_constructor(constructor).collect() - } else { - vec![] - }; - - for aggregate_output_variable in aggregate_output_variables_used { - let aggregate = rule - .aggregates() - .iter() - .find(|aggregate| aggregate.output_variable == *aggregate_output_variable).expect("variable with aggregate prefix is missing an associated aggregate"); - if - aggregate.aggregate_operation == - AggregateOperation::Count - { - return TypeRequirement::Hard(PrimitiveType::Integer) - } else { - debug_assert_eq!(aggregate.aggregate_operation.static_output_type(), None) - } - } - } - TypeRequirement::None - }) - .collect(), - )?; - } - - Ok(type_requirements) -} - -pub(super) fn requirements_from_existentials_in_rules( - rules: &[ChaseRule], -) -> Result { - let mut type_requirements = HashMap::new(); - - for atom in rules.iter().flat_map(|r| r.head()) { - add_type_requirements( - &mut type_requirements, - atom.predicate(), - atom.terms() - .iter() - .map(|t| { - if matches!(t, PrimitiveTerm::Variable(Variable::Existential(_))) { - TypeRequirement::Hard(PrimitiveType::Any) - } else { - TypeRequirement::None - } - }) - .collect::>(), - )?; - } - - Ok(type_requirements) -} From c3fc21d8766d12124e9e664e7c8b3cfd980fb57a Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 5 Sep 2024 12:19:27 +0200 Subject: [PATCH 153/214] Add missing life times --- nemo/src/parser/ast.rs | 3 +-- nemo/src/parser/ast/comment/wsoc.rs | 4 ++-- nemo/src/parser/ast/directive/base.rs | 2 +- nemo/src/parser/ast/directive/declare.rs | 2 +- nemo/src/parser/ast/directive/export.rs | 2 +- nemo/src/parser/ast/directive/import.rs | 2 +- nemo/src/parser/ast/directive/output.rs | 2 +- nemo/src/parser/ast/directive/prefix.rs | 2 +- nemo/src/parser/ast/sequence.rs | 2 +- nemo/src/parser/ast/sequence/declare.rs | 2 +- nemo/src/parser/ast/sequence/key_value.rs | 2 +- nemo/src/parser/ast/statement.rs | 1 - nemo/src/parser/ast/token.rs | 4 ++-- nemo/src/parser/error.rs | 4 ++-- 14 files changed, 16 insertions(+), 18 deletions(-) diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 874411847..02b2354ac 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -15,12 +15,11 @@ use std::fmt::Debug; use super::{context::ParserContext, span::Span, ParserInput, ParserResult}; use ascii_tree::Tree; -use token::Token; /// Trait implemented by nodes in the abstract syntax tree pub trait ProgramAST<'a>: Debug + Sync { /// Return all children of this node. - fn children(&'a self) -> Vec<&'a dyn ProgramAST>; + fn children(&'a self) -> Vec<&'a dyn ProgramAST<'a>>; /// Return the region of text this node originates from. fn span(&self) -> Span<'a>; diff --git a/nemo/src/parser/ast/comment/wsoc.rs b/nemo/src/parser/ast/comment/wsoc.rs index 17a8a965e..1fc1df7da 100644 --- a/nemo/src/parser/ast/comment/wsoc.rs +++ b/nemo/src/parser/ast/comment/wsoc.rs @@ -37,11 +37,11 @@ impl<'a> WSoC<'a> { &self.comments } - fn parse_whitespace(input: ParserInput<'a>) -> ParserResult<'a, Option> { + fn parse_whitespace(input: ParserInput<'a>) -> ParserResult<'a, Option>> { Token::whitespace(input).map(|(rest, _)| (rest, None)) } - fn parse_comment(input: ParserInput<'a>) -> ParserResult<'a, Option> { + fn parse_comment(input: ParserInput<'a>) -> ParserResult<'a, Option>> { alt(( map(LineComment::parse, CommentType::Line), map(ClosedComment::parse, CommentType::Closed), diff --git a/nemo/src/parser/ast/directive/base.rs b/nemo/src/parser/ast/directive/base.rs index fbe391574..a7aea2f9f 100644 --- a/nemo/src/parser/ast/directive/base.rs +++ b/nemo/src/parser/ast/directive/base.rs @@ -26,7 +26,7 @@ impl<'a> Base<'a> { &self.iri } - pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, Iri> { + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, Iri<'a>> { Iri::parse(input) } } diff --git a/nemo/src/parser/ast/directive/declare.rs b/nemo/src/parser/ast/directive/declare.rs index e457afe2f..8f756f2c5 100644 --- a/nemo/src/parser/ast/directive/declare.rs +++ b/nemo/src/parser/ast/directive/declare.rs @@ -41,7 +41,7 @@ impl<'a> Declare<'a> { pub fn parse_body( input: ParserInput<'a>, - ) -> ParserResult<'a, (StructureTag, Sequence)> { + ) -> ParserResult<'a, (StructureTag<'a>, Sequence<'a, NameTypePair<'a>>)> { separated_pair( StructureTag::parse, WSoC::parse, diff --git a/nemo/src/parser/ast/directive/export.rs b/nemo/src/parser/ast/directive/export.rs index 210ecee30..ac3dece84 100644 --- a/nemo/src/parser/ast/directive/export.rs +++ b/nemo/src/parser/ast/directive/export.rs @@ -36,7 +36,7 @@ impl<'a> Export<'a> { &self.instructions } - pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (StructureTag, Map)> { + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (StructureTag<'a>, Map<'a>)> { tuple(( StructureTag::parse, WSoC::parse, diff --git a/nemo/src/parser/ast/directive/import.rs b/nemo/src/parser/ast/directive/import.rs index 2d44feb97..7f74472d1 100644 --- a/nemo/src/parser/ast/directive/import.rs +++ b/nemo/src/parser/ast/directive/import.rs @@ -36,7 +36,7 @@ impl<'a> Import<'a> { &self.instructions } - pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (StructureTag, Map)> { + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (StructureTag<'a>, Map<'a>)> { tuple(( StructureTag::parse, WSoC::parse, diff --git a/nemo/src/parser/ast/directive/output.rs b/nemo/src/parser/ast/directive/output.rs index 2f2af97aa..93331b118 100644 --- a/nemo/src/parser/ast/directive/output.rs +++ b/nemo/src/parser/ast/directive/output.rs @@ -32,7 +32,7 @@ impl<'a> Output<'a> { /// Parse the sequence of predicates that are marked as output. fn parse_predicate_sequence( input: ParserInput<'a>, - ) -> ParserResult<'a, Sequence<'a, StructureTag>> { + ) -> ParserResult<'a, Sequence<'a, StructureTag<'a>>> { Sequence::::parse(input) } } diff --git a/nemo/src/parser/ast/directive/prefix.rs b/nemo/src/parser/ast/directive/prefix.rs index a93badecb..bedeb89a6 100644 --- a/nemo/src/parser/ast/directive/prefix.rs +++ b/nemo/src/parser/ast/directive/prefix.rs @@ -38,7 +38,7 @@ impl<'a> Prefix<'a> { &self.iri } - pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (Token, Iri)> { + pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (Token<'a>, Iri<'a>)> { separated_pair( Token::name, tuple((WSoC::parse, Token::prefix_assignment, WSoC::parse)), diff --git a/nemo/src/parser/ast/sequence.rs b/nemo/src/parser/ast/sequence.rs index 24b233055..cd746eb0b 100644 --- a/nemo/src/parser/ast/sequence.rs +++ b/nemo/src/parser/ast/sequence.rs @@ -98,7 +98,7 @@ impl<'a, T: ProgramAST<'a> + 'a> Sequence<'a, T> { } impl<'a, T: std::fmt::Debug + Sync + ProgramAST<'a>> ProgramAST<'a> for Sequence<'a, T> { - fn children(&'a self) -> Vec<&'a dyn ProgramAST> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST<'a>> { let mut vec: Vec<&dyn ProgramAST> = Vec::new(); for elem in &self.elements { vec.push(elem); diff --git a/nemo/src/parser/ast/sequence/declare.rs b/nemo/src/parser/ast/sequence/declare.rs index 0fe376ffd..04961ec1c 100644 --- a/nemo/src/parser/ast/sequence/declare.rs +++ b/nemo/src/parser/ast/sequence/declare.rs @@ -26,7 +26,7 @@ pub struct NameTypePair<'a> { } impl<'a> ProgramAST<'a> for NameTypePair<'a> { - fn children(&'a self) -> Vec<&'a dyn ProgramAST> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST<'a>> { vec![&self.name, &self.datatype] } diff --git a/nemo/src/parser/ast/sequence/key_value.rs b/nemo/src/parser/ast/sequence/key_value.rs index 64d0c28a3..827839e2c 100644 --- a/nemo/src/parser/ast/sequence/key_value.rs +++ b/nemo/src/parser/ast/sequence/key_value.rs @@ -29,7 +29,7 @@ impl<'a> KeyValuePair<'a> { } impl<'a> ProgramAST<'a> for KeyValuePair<'a> { - fn children(&'a self) -> Vec<&'a dyn ProgramAST> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST<'a>> { vec![&self.key, &self.value] } diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs index ffe1820a8..5e604709e 100644 --- a/nemo/src/parser/ast/statement.rs +++ b/nemo/src/parser/ast/statement.rs @@ -5,7 +5,6 @@ use nom::{ combinator::{map, opt}, sequence::{pair, terminated}, }; -use nom_locate::LocatedSpan; use crate::parser::{ context::{context, ParserContext}, diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 2475dc571..d3a311a0c 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -270,7 +270,7 @@ impl<'a> Token<'a> { macro_rules! string_token { ($func_name: ident, $token: expr) => { /// Parse this token. - pub fn $func_name(input: ParserInput<'a>) -> ParserResult<'a, Token> { + pub fn $func_name(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { map( context(ParserContext::Token { kind: $token }, tag($token.name())), |input: ParserInput| Token { @@ -523,7 +523,7 @@ impl<'a> Token<'a> { }) } - pub fn comment(input: ParserInput<'a>) -> ParserResult<'a, Token> { + pub fn comment(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { context( ParserContext::token(TokenKind::Comment), verify( diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index c9ed8460d..00367262a 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -65,7 +65,7 @@ pub(crate) fn skip_statement<'a>(input: ParserInput<'a>) -> ParserResult<'a, Tok pub(crate) fn recover<'a>( mut parser: impl Parser, Statement<'a>, ParserErrorTree<'a>>, -) -> impl FnMut(ParserInput<'a>) -> ParserResult> { +) -> impl FnMut(ParserInput<'a>) -> ParserResult<'a, Statement<'a>> { move |input: ParserInput<'a>| match parser.parse(input.clone()) { Ok((rest, statement)) => Ok((rest, statement)), Err(err) if input.span.0.is_empty() => Err(err), @@ -86,7 +86,7 @@ pub(crate) fn recover<'a>( pub(crate) fn report_error<'a>( mut parser: impl Parser, Statement<'a>, ParserErrorTree<'a>>, -) -> impl FnMut(ParserInput<'a>) -> ParserResult> { +) -> impl FnMut(ParserInput<'a>) -> ParserResult<'a, Statement<'a>> { move |input| match parser.parse(input.clone()) { Ok(result) => Ok(result), Err(e) => { From 0d21ea0cdbe094b92b77614dc7c9bcdc7298fb2f Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 5 Sep 2024 12:20:06 +0200 Subject: [PATCH 154/214] Fix compiliation errors --- .../chase_model/analysis/program_analysis.rs | 65 +++++++++---------- nemo/src/chase_model/translation/aggregate.rs | 4 +- nemo/src/chase_model/translation/import.rs | 14 ++-- nemo/src/chase_model/translation/operation.rs | 2 +- .../rule_model/components/import_export.rs | 23 ++++--- .../components/import_export/attributes.rs | 2 +- .../components/import_export/file_formats.rs | 16 ++--- nemo/src/rule_model/components/rule.rs | 21 +++--- .../rule_model/components/term/aggregate.rs | 2 +- .../rule_model/components/term/operation.rs | 2 +- nemo/src/rule_model/error.rs | 20 ++++++ nemo/src/rule_model/error/hint/similar.rs | 10 ++- 12 files changed, 101 insertions(+), 80 deletions(-) diff --git a/nemo/src/chase_model/analysis/program_analysis.rs b/nemo/src/chase_model/analysis/program_analysis.rs index 54c530f28..afd7c9a1b 100644 --- a/nemo/src/chase_model/analysis/program_analysis.rs +++ b/nemo/src/chase_model/analysis/program_analysis.rs @@ -3,12 +3,13 @@ use std::collections::{HashMap, HashSet}; use nemo_physical::management::execution_plan::ColumnOrder; use crate::{ - error::Error, - model::chase_model::{ChaseProgram, ChaseRule}, - model::{ - chase_model::{ChaseAtom, PrimitiveAtom, VariableAtom}, - Constraint, Identifier, PrimitiveTerm, Term, Variable, + chase_model::components::{ + atom::{primitive_atom::PrimitiveAtom, ChaseAtom}, + program::ChaseProgram, + rule::ChaseRule, }, + error::Error, + rule_model::components::{tag::Tag, term::primitive::variable::Variable}, }; use super::variable_order::{ @@ -30,11 +31,11 @@ pub struct RuleAnalysis { pub has_aggregates: bool, /// Predicates appearing in the positive part of the body. - pub positive_body_predicates: HashSet, + pub positive_body_predicates: HashSet, /// Predicates appearing in the negative part of the body. - pub negative_body_predicates: HashSet, + pub negative_body_predicates: HashSet, /// Predicates appearing in the head. - pub head_predicates: HashSet, + pub head_predicates: HashSet, /// Variables occurring in the positive part of the body. pub positive_body_variables: HashSet, @@ -63,13 +64,13 @@ pub enum RuleAnalysisError { "predicate \"{predicate}\" required to have conflicting arities {arity1} and {arity2}" )] UnsupportedFeaturePredicateOverloading { - predicate: Identifier, + predicate: Tag, arity1: usize, arity2: usize, }, /// There is a predicate whose arity could not be determined #[error("arity of predicate \"{predicate}\" could not be derived")] - UnspecifiedPredicateArity { predicate: Identifier }, + UnspecifiedPredicateArity { predicate: Tag }, } /// Return true if there is a predicate in the positive part of the rule that also appears in the head of the rule. @@ -105,20 +106,18 @@ fn get_variables(atoms: &[Atom]) -> HashSet { result } -fn get_predicates(atoms: &[Atom]) -> HashSet { +fn get_predicates(atoms: &[Atom]) -> HashSet { atoms.iter().map(|a| a.predicate()).collect() } -pub(super) fn get_fresh_rule_predicate(rule_index: usize) -> Identifier { - Identifier(format!( - "FRESH_HEAD_MATCHES_IDENTIFIER_FOR_RULE_{rule_index}" - )) +pub(super) fn get_fresh_rule_predicate(rule_index: usize) -> Tag { + Tag::new(format!("FRESH_HEAD_MATCHES_Tag_FOR_RULE_{rule_index}")) } fn construct_existential_aux_rule( rule_index: usize, head_atoms: Vec, - column_orders: &HashMap>, + column_orders: &HashMap>, ) -> (ChaseRule, VariableOrder) { let mut new_body = Vec::new(); let mut constraints = Vec::new(); @@ -176,9 +175,9 @@ fn construct_existential_aux_rule( } let temp_rule = { - let temp_head_identifier = get_fresh_rule_predicate(rule_index); + let temp_head_Tag = get_fresh_rule_predicate(rule_index); - let temp_head_atom = PrimitiveAtom::new(temp_head_identifier, aux_predicate_terms); + let temp_head_atom = PrimitiveAtom::new(temp_head_Tag, aux_predicate_terms); ChaseRule::positive_rule(vec![temp_head_atom], new_body, constraints) }; @@ -197,7 +196,7 @@ fn construct_existential_aux_rule( fn analyze_rule( rule: &ChaseRule, promising_variable_orders: Vec, - promising_column_orders: &[HashMap>], + promising_column_orders: &[HashMap>], rule_index: usize, ) -> RuleAnalysis { let num_existential = count_distinct_existential_variables(rule); @@ -233,15 +232,15 @@ pub struct ProgramAnalysis { /// Analysis result for each rule. pub rule_analysis: Vec, /// Set of all the predicates that are derived in the chase. - pub derived_predicates: HashSet, + pub derived_predicates: HashSet, /// Set of all predicates and their arity. - pub all_predicates: HashMap, + pub all_predicates: HashMap, } impl ChaseProgram { /// Collect all predicates that appear in a head atom into a [HashSet] - fn get_head_predicates(&self) -> HashSet { - let mut result = HashSet::::new(); + fn get_head_predicates(&self) -> HashSet { + let mut result = HashSet::::new(); for rule in self.rules() { for head_atom in rule.head() { @@ -254,17 +253,15 @@ impl ChaseProgram { /// Collect all predicates in the program, and determine their arity. /// An error is returned if arities required for a predicate are not unique. - pub(super) fn get_all_predicates( - &self, - ) -> Result, RuleAnalysisError> { - let mut result = HashMap::::new(); - let mut missing = HashSet::::new(); + pub(super) fn get_all_predicates(&self) -> Result, RuleAnalysisError> { + let mut result = HashMap::::new(); + let mut missing = HashSet::::new(); fn add_arity( - predicate: Identifier, + predicate: Tag, arity: usize, - arities: &mut HashMap, - missing: &mut HashSet, + arities: &mut HashMap, + missing: &mut HashSet, ) -> Result<(), RuleAnalysisError> { if let Some(current) = arities.get(&predicate) { if *current != arity { @@ -280,11 +277,7 @@ impl ChaseProgram { } Ok(()) } - fn add_missing( - predicate: Identifier, - arities: &HashMap, - missing: &mut HashSet, - ) { + fn add_missing(predicate: Tag, arities: &HashMap, missing: &mut HashSet) { if arities.get(&predicate).is_none() { missing.insert(predicate); } diff --git a/nemo/src/chase_model/translation/aggregate.rs b/nemo/src/chase_model/translation/aggregate.rs index 337fac79f..f4a1d10a3 100644 --- a/nemo/src/chase_model/translation/aggregate.rs +++ b/nemo/src/chase_model/translation/aggregate.rs @@ -33,7 +33,7 @@ impl ProgramChaseTranslation { group_by_variables: &HashSet, ) -> ChaseAggregate { let origin = aggregate.origin().clone(); - let kind = aggregate.kind(); + let kind = aggregate.aggregate_kind(); let input_variable = match aggregate.aggregate_term() { Term::Primitive(Primitive::Variable(variable)) => variable.clone(), Term::Primitive(primitive) => { @@ -89,7 +89,7 @@ impl ProgramChaseTranslation { chase_aggregate: &mut Option, ) -> OperationTerm { let origin = operation.origin().clone(); - let kind = operation.kind(); + let kind = operation.operation_kind(); let mut subterms = Vec::new(); for argument in operation.arguments() { diff --git a/nemo/src/chase_model/translation/import.rs b/nemo/src/chase_model/translation/import.rs index 0e5a6c5aa..a2790c82d 100644 --- a/nemo/src/chase_model/translation/import.rs +++ b/nemo/src/chase_model/translation/import.rs @@ -34,16 +34,16 @@ impl ProgramChaseTranslation { ) -> ChaseImport { let origin = import.origin().clone(); let predicate = import.predicate().clone(); - let arity = predicate_arity.get(&predicate); + let arity = predicate_arity.get(&predicate).cloned(); let attributes = import.attributes(); let handler = match import.file_format() { FileFormat::CSV => { - Self::build_dsv_handler(Direction::Import, Some(','), arity, &attributes) + Self::build_dsv_handler(Direction::Import, Some(b','), arity, &attributes) } FileFormat::DSV => Self::build_dsv_handler(Direction::Import, None, arity, &attributes), FileFormat::TSV => { - Self::build_dsv_handler(Direction::Import, Some('\t'), arity, &attributes) + Self::build_dsv_handler(Direction::Import, Some(b'\t'), arity, &attributes) } FileFormat::JSON => todo!(), FileFormat::NTriples => todo!(), @@ -87,7 +87,7 @@ impl ProgramChaseTranslation { /// Read the [RdfValueFormats] from the attributes. fn read_rdf_value_formats( attributes: &HashMap, - ) -> Option { + ) -> Option { let term = attributes.get(&ImportExportAttribute::Format)?; if let Term::Tuple(tuple) = term { @@ -129,7 +129,7 @@ impl ProgramChaseTranslation { /// Read the iri base path from the attributes. fn read_base(attributes: &HashMap) -> Option> { let term = attributes.get(&ImportExportAttribute::Base)?; - Some(Iri::from( + Some(Iri::parse_unchecked( ImportExportDirective::plain_value(term) .expect("invalid program: base given in the wrong type"), )) @@ -145,7 +145,7 @@ impl ProgramChaseTranslation { let (mut compression_format, resource) = Self::read_resource(attributes); let value_formats = Self::read_dsv_value_formats(attributes) - .or_else(DsvValueFormats::default(arity.unwrap_or_default())); + .unwrap_or(DsvValueFormats::default(arity.unwrap_or_default())); let limit = Self::read_limit(attributes); @@ -188,7 +188,7 @@ impl ProgramChaseTranslation { } let value_formats = - Self::read_rdf_value_formats(attributes).or_else(DsvValueFormats::default(arity)); + Self::read_rdf_value_formats(attributes).unwrap_or(RdfValueFormats::default(arity)); let limit = Self::read_limit(attributes); diff --git a/nemo/src/chase_model/translation/operation.rs b/nemo/src/chase_model/translation/operation.rs index d95868d9d..912f8c1be 100644 --- a/nemo/src/chase_model/translation/operation.rs +++ b/nemo/src/chase_model/translation/operation.rs @@ -25,7 +25,7 @@ impl ProgramChaseTranslation { operation: &crate::rule_model::components::term::operation::Operation, ) -> OperationTerm { let origin = operation.origin().clone(); - let kind = operation.kind(); + let kind = operation.operation_kind(); let mut subterms = Vec::new(); for argument in operation.arguments() { diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index 0f1d31662..011a4f7e5 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -179,26 +179,31 @@ impl ImportExportDirective { attribute: attribute.name().to_string(), direction: direction.to_string(), }, - ) + ); } } - let expected_attributes = self.format.attributes().keys().collect::>(); - for (attribute, (attribute_origin, value)) in attributes.iter() { + let expected_attributes = self + .format + .attributes() + .keys() + .cloned() + .collect::>(); + for (attribute, (&attribute_origin, value)) in attributes.iter() { if !expected_attributes.contains(attribute) { builder .report_error( - attribute_origin, + attribute_origin.clone(), ValidationErrorKind::ImportExportUnrecognizedAttribute { format: self.format.name().to_string(), attribute: attribute.name().to_string(), }, ) - .add_hint(Hint::similar( + .add_hint_option(Hint::similar( "parameter", attribute.name(), expected_attributes.iter().map(|attribute| attribute.name()), - )) + )); } if attribute.value_type() != value.kind() { @@ -206,7 +211,7 @@ impl ImportExportDirective { value.origin().clone(), ValidationErrorKind::ImportExportAttributeValueType { parameter: attribute.name().to_string(), - given: value.kind().name.to_string(), + given: value.kind().name().to_string(), expected: attribute.value_type().name().to_string(), }, ); @@ -434,7 +439,7 @@ impl ProgramComponent for ImportDirective { where Self: Sized, { - todo!() + self.0.validate(Direction::Import, builder) } fn kind(&self) -> ProgramComponentKind { @@ -517,7 +522,7 @@ impl ProgramComponent for ExportDirective { where Self: Sized, { - todo!() + self.0.validate(Direction::Export, builder) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/import_export/attributes.rs b/nemo/src/rule_model/components/import_export/attributes.rs index b347e0d73..0c0fee9d2 100644 --- a/nemo/src/rule_model/components/import_export/attributes.rs +++ b/nemo/src/rule_model/components/import_export/attributes.rs @@ -31,7 +31,7 @@ pub enum ImportExportAttribute { /// Delimiter used to separate values #[assoc(name = attribute::DSV_DELIMITER)] #[assoc(from_name = attribute::DSV_DELIMITER)] - #[assoc(value_type = ProgramComponentKind::String)] + #[assoc(value_type = ProgramComponentKind::PlainString)] Delimiter, /// Compression format #[assoc(name = attribute::COMPRESSION)] diff --git a/nemo/src/rule_model/components/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs index cd9e4c354..84c6d7e0e 100644 --- a/nemo/src/rule_model/components/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -34,7 +34,7 @@ pub enum FileFormat { #[assoc(extension = file_format::EXTENSION_CSV)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required), - (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] @@ -46,7 +46,7 @@ pub enum FileFormat { #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required), (ImportExportAttribute::Delimiter, AttributeRequirement::Required), - (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] @@ -57,7 +57,7 @@ pub enum FileFormat { #[assoc(extension = file_format::EXTENSION_TSV)] #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required), - (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] @@ -78,7 +78,7 @@ pub enum FileFormat { #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required), (ImportExportAttribute::Base, AttributeRequirement::Optional), - (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] @@ -91,7 +91,7 @@ pub enum FileFormat { #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required), (ImportExportAttribute::Base, AttributeRequirement::Optional), - (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] @@ -104,7 +104,7 @@ pub enum FileFormat { #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required), (ImportExportAttribute::Base, AttributeRequirement::Optional), - (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] @@ -117,7 +117,7 @@ pub enum FileFormat { #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required), (ImportExportAttribute::Base, AttributeRequirement::Optional), - (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] @@ -130,7 +130,7 @@ pub enum FileFormat { #[assoc(attributes = HashMap::from([ (ImportExportAttribute::Resource, AttributeRequirement::Required), (ImportExportAttribute::Base, AttributeRequirement::Optional), - (ImportExportAttribute::Formats, AttributeRequirement::Optional), + (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), ]))] diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 414a6c876..0c75b8208 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -321,17 +321,16 @@ impl ProgramComponent for Rule { for variable in atom.variables() { if let Some(variable_name) = variable.name() { if !safe_variables.contains(variable) { - let info = builder.report_error( - variable.origin().clone(), - ValidationErrorKind::HeadUnsafe(variable.clone()), - ); - if let Some(hint) = Hint::similar( - "variable", - variable_name, - safe_variables.iter().flat_map(|variable| variable.name()), - ) { - info.add_hint(hint); - } + builder + .report_error( + variable.origin().clone(), + ValidationErrorKind::HeadUnsafe(variable.clone()), + ) + .add_hint_option(Hint::similar( + "variable", + variable_name, + safe_variables.iter().flat_map(|variable| variable.name()), + )); return Err(()); } diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index b7cde3273..7de0c7628 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -125,7 +125,7 @@ impl Aggregate { } /// Return the kind of aggregate. - pub fn kind(&self) -> AggregateKind { + pub fn aggregate_kind(&self) -> AggregateKind { self.kind } diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index 6aad88d90..dd31affb1 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -49,7 +49,7 @@ impl Operation { } /// Return the [OperationKind] of this operation. - pub fn kind(&self) -> OperationKind { + pub fn operation_kind(&self) -> OperationKind { self.kind } diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 07f411f2b..cf928409a 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -122,6 +122,16 @@ where self } + /// Add a new hint to the error if `hint` is Some. + /// Does nothing otherwise. + pub fn add_hint_option(&mut self, hint: Option) -> &mut Self { + if let Some(hint) = hint { + self.hints.push(hint); + } + + self + } + /// Add this information to a [ReportBuilder]. pub fn report<'a, Translation>( &self, @@ -246,6 +256,16 @@ impl TranslationError { self } + + /// Add a new hint to the error if `hint` is Some. + /// Does nothing otherwise. + pub fn add_hint_option(&mut self, hint: Option) -> &mut Self { + if let Some(hint) = hint { + self.info.add_hint(hint); + } + + self + } } /// Error that may occur while translating or validating a nemo program diff --git a/nemo/src/rule_model/error/hint/similar.rs b/nemo/src/rule_model/error/hint/similar.rs index 88b094aa8..43ef34911 100644 --- a/nemo/src/rule_model/error/hint/similar.rs +++ b/nemo/src/rule_model/error/hint/similar.rs @@ -11,16 +11,20 @@ const SIMILARITY_THRESHOLD: f64 = 0.8; impl Hint { /// Checks whether a similar string exist in a collection of source strings. /// Returns the most similar string, if there is one - pub fn similar>>( + pub fn similar>( kind: &str, target: impl AsRef, options: Options, - ) -> Option { + ) -> Option + where + S: AsRef, + { if target.as_ref().len() < SIMILARITY_MIN_LENGTH { return None; } - let (best, confidence) = find_best_similarity(target, options)?; + let options = options.into_iter().collect::>(); + let (best, confidence) = find_best_similarity(target, &options)?; if best.len() >= SIMILARITY_MIN_LENGTH && confidence >= SIMILARITY_THRESHOLD { return Some(Hint::SimilarExists { From 8e127a90a55388a046df59fffce9f15f75c2c5e7 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 5 Sep 2024 12:20:38 +0200 Subject: [PATCH 155/214] Remove old program_analysis module --- nemo/src/lib.rs | 1 - nemo/src/program_analysis.rs | 7 ------- 2 files changed, 8 deletions(-) delete mode 100644 nemo/src/program_analysis.rs diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index f5d12aab3..906e5f319 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -34,7 +34,6 @@ pub mod rule_model; pub mod util; mod chase_model; -mod program_analysis; mod table_manager; // we use datavalues and meta from nemo_physical in our API, so re-export it here. diff --git a/nemo/src/program_analysis.rs b/nemo/src/program_analysis.rs deleted file mode 100644 index 0bf7f55b7..000000000 --- a/nemo/src/program_analysis.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Functionality for collecting useful information about a existential rule program befire its execution. - -/// Computes useful information of a program before its execution -pub mod analysis; - -/// Functionality for computing promising variable orders from a program -pub mod variable_order; From b3aa45e7b281d940b42a7c6bb5bcb267cdd1fef3 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 5 Sep 2024 17:23:37 +0200 Subject: [PATCH 156/214] Move program analysis and variable order code to new chase model --- .../chase_model/analysis/program_analysis.rs | 187 +++-- .../chase_model/analysis/variable_order.rs | 666 +++++++++++------- nemo/src/chase_model/components/export.rs | 7 +- nemo/src/chase_model/components/filter.rs | 2 +- nemo/src/chase_model/components/import.rs | 7 +- nemo/src/chase_model/components/operation.rs | 2 +- nemo/src/chase_model/components/program.rs | 67 +- nemo/src/chase_model/components/rule.rs | 78 +- .../components/term/operation_term.rs | 4 +- nemo/src/error.rs | 2 +- 10 files changed, 667 insertions(+), 355 deletions(-) diff --git a/nemo/src/chase_model/analysis/program_analysis.rs b/nemo/src/chase_model/analysis/program_analysis.rs index afd7c9a1b..2e7a92f03 100644 --- a/nemo/src/chase_model/analysis/program_analysis.rs +++ b/nemo/src/chase_model/analysis/program_analysis.rs @@ -4,12 +4,22 @@ use nemo_physical::management::execution_plan::ColumnOrder; use crate::{ chase_model::components::{ - atom::{primitive_atom::PrimitiveAtom, ChaseAtom}, + atom::{primitive_atom::PrimitiveAtom, variable_atom::VariableAtom, ChaseAtom}, + filter::ChaseFilter, program::ChaseProgram, rule::ChaseRule, + term::operation_term::{Operation, OperationTerm}, + }, + rule_model::{ + components::{ + tag::Tag, + term::{ + operation::operation_kind::OperationKind, + primitive::{variable::Variable, Primitive}, + }, + }, + origin::Origin, }, - error::Error, - rule_model::components::{tag::Tag, term::primitive::variable::Variable}, }; use super::variable_order::{ @@ -87,7 +97,7 @@ fn count_distinct_existential_variables(rule: &ChaseRule) -> usize { for head_atom in rule.head() { for term in head_atom.terms() { - if let PrimitiveTerm::Variable(Variable::Existential(id)) = term { + if let Primitive::Variable(Variable::Existential(id)) = term { existentials.insert(Variable::Existential(id.clone())); } } @@ -99,7 +109,7 @@ fn count_distinct_existential_variables(rule: &ChaseRule) -> usize { fn get_variables(atoms: &[Atom]) -> HashSet { let mut result = HashSet::new(); for atom in atoms { - for variable in atom.get_variables() { + for variable in atom.variables().cloned() { result.insert(variable); } } @@ -119,14 +129,13 @@ fn construct_existential_aux_rule( head_atoms: Vec, column_orders: &HashMap>, ) -> (ChaseRule, VariableOrder) { - let mut new_body = Vec::new(); - let mut constraints = Vec::new(); + let mut result = ChaseRule::default(); let mut variable_index = 0; let mut generate_variable = move || { variable_index += 1; let name = format!("__GENERATED_HEAD_AUX_VARIABLE_{}", variable_index); - Variable::Universal(name) + Variable::universal(&name) }; let mut aux_predicate_terms = Vec::new(); @@ -136,20 +145,27 @@ fn construct_existential_aux_rule( for term in atom.terms() { match term { - PrimitiveTerm::Variable(variable) => { + Primitive::Variable(variable) => { if !used_variables.insert(variable.clone()) { let generated_variable = generate_variable(); new_terms.push(generated_variable.clone()); - let new_constraint = Constraint::Equals( - Term::Primitive(PrimitiveTerm::Variable(generated_variable)), - Term::Primitive(term.clone()), + let new_constraint = Operation::new( + Origin::default(), + OperationKind::Equal, + vec![ + OperationTerm::Primitive(Primitive::Variable(generated_variable)), + OperationTerm::Primitive(term.clone()), + ], ); - constraints.push(new_constraint); + result.add_positive_filter(ChaseFilter::new( + Origin::default(), + OperationTerm::Operation(new_constraint), + )); } else { if variable.is_universal() { - aux_predicate_terms.push(PrimitiveTerm::Variable(variable.clone())); + aux_predicate_terms.push(Primitive::Variable(variable.clone())); } new_terms.push(variable.clone()); @@ -157,40 +173,49 @@ fn construct_existential_aux_rule( if variable.is_universal() && used_variables.insert(variable.clone()) {} } - PrimitiveTerm::GroundTerm(_) => { + Primitive::Ground(_) => { let generated_variable = generate_variable(); new_terms.push(generated_variable.clone()); - let new_constraint = Constraint::Equals( - Term::Primitive(PrimitiveTerm::Variable(generated_variable)), - Term::Primitive(term.clone()), + let new_constraint = Operation::new( + Origin::default(), + OperationKind::Equal, + vec![ + OperationTerm::Primitive(Primitive::Variable(generated_variable)), + OperationTerm::Primitive(term.clone()), + ], ); - constraints.push(new_constraint); + result.add_positive_filter(ChaseFilter::new( + Origin::default(), + OperationTerm::Operation(new_constraint), + )); } } } - new_body.push(VariableAtom::new(atom.predicate(), new_terms)); + result.add_positive_atom(VariableAtom::new( + Origin::default(), + atom.predicate(), + new_terms, + )); } - let temp_rule = { - let temp_head_Tag = get_fresh_rule_predicate(rule_index); + let temp_head_tag = get_fresh_rule_predicate(rule_index); + let temp_head_atom = PrimitiveAtom::new(Origin::default(), temp_head_tag, aux_predicate_terms); + result.add_head_atom(temp_head_atom); - let temp_head_atom = PrimitiveAtom::new(temp_head_Tag, aux_predicate_terms); - ChaseRule::positive_rule(vec![temp_head_atom], new_body, constraints) - }; + let mut rule_program = ChaseProgram::default(); + rule_program.add_rule(result.clone()); - let variable_order = build_preferable_variable_orders( - &ChaseProgram::builder().rule(temp_rule.clone()).build(), - Some(column_orders.clone()), - ) - .all_variable_orders - .pop() - .and_then(|mut v| v.pop()) - .expect("This functions provides at least one variable order"); + let variable_order = + build_preferable_variable_orders(&rule_program, Some(column_orders.clone())) + .all_variable_orders + .pop() + .and_then(|mut v| v.pop()) + .expect("This functions provides at least one variable order"); - (temp_rule, variable_order) + (result, variable_order) } fn analyze_rule( @@ -211,7 +236,7 @@ fn analyze_rule( RuleAnalysis { is_existential: num_existential > 0, is_recursive: is_recursive(rule), - has_positive_constraints: !rule.positive_constraints().is_empty(), + has_positive_constraints: !rule.positive_filters().is_empty(), has_aggregates: rule.aggregate().is_some(), positive_body_predicates: get_predicates(rule.positive_body()), negative_body_predicates: get_predicates(rule.negative_body()), @@ -253,30 +278,19 @@ impl ChaseProgram { /// Collect all predicates in the program, and determine their arity. /// An error is returned if arities required for a predicate are not unique. - pub(super) fn get_all_predicates(&self) -> Result, RuleAnalysisError> { + pub(super) fn get_all_predicates(&self) -> HashMap { let mut result = HashMap::::new(); - let mut missing = HashSet::::new(); - - fn add_arity( - predicate: Tag, - arity: usize, - arities: &mut HashMap, - missing: &mut HashSet, - ) -> Result<(), RuleAnalysisError> { + + fn add_arity(predicate: Tag, arity: usize, arities: &mut HashMap) { if let Some(current) = arities.get(&predicate) { if *current != arity { - return Err(RuleAnalysisError::UnsupportedFeaturePredicateOverloading { - predicate, - arity1: *current, - arity2: arity, - }); + unreachable!("invalid program: same predicate used with different arities"); } } else { - missing.remove(&predicate); arities.insert(predicate, arity); } - Ok(()) } + fn add_missing(predicate: Tag, arities: &HashMap, missing: &mut HashSet) { if arities.get(&predicate).is_none() { missing.insert(predicate); @@ -284,51 +298,32 @@ impl ChaseProgram { } // Predicates in import statements - for (pred, handler) in self.imports() { - if let Some(arity) = handler.predicate_arity() { - add_arity(pred.clone(), arity, &mut result, &mut missing)?; - } else { - add_missing(pred.clone(), &result, &mut missing); - } + for import in self.imports() { + add_arity(import.predicate().clone(), import.arity(), &mut result); } // Predicates in export statements - for (pred, handler) in self.exports() { - if let Some(arity) = handler.predicate_arity() { - add_arity(pred.clone(), arity, &mut result, &mut missing)?; - } else { - add_missing(pred.clone(), &result, &mut missing); - } + for export in self.exports() { + add_arity(export.predicate().clone(), export.arity(), &mut result); } // Predicates in rules for rule in self.rules() { for atom in rule.head() { - add_arity( - atom.predicate(), - atom.terms().len(), - &mut result, - &mut missing, - )?; + add_arity(atom.predicate(), atom.arity(), &mut result); } - for atom in rule.all_body() { - add_arity( - atom.predicate(), - atom.terms().len(), - &mut result, - &mut missing, - )?; + for atom in rule + .positive_body() + .iter() + .chain(rule.negative_body().iter()) + { + add_arity(atom.predicate(), atom.arity(), &mut result); } } // Predicates in facts for fact in self.facts() { - add_arity( - fact.predicate(), - fact.terms().len(), - &mut result, - &mut missing, - )?; + add_arity(fact.predicate(), fact.arity(), &mut result); } // Additional predicates for existential rules @@ -344,40 +339,24 @@ impl ChaseProgram { let predicate = get_fresh_rule_predicate(rule_index); let arity = head_variables.difference(&body_variables).count(); - add_arity(predicate, arity, &mut result, &mut missing)?; + add_arity(predicate, arity, &mut result); } - if !missing.is_empty() { - return Err(RuleAnalysisError::UnspecifiedPredicateArity { - predicate: missing.iter().next().expect("not empty").clone(), - }); - } - - Ok(result) - } - - /// Check if the program contains rules with unsupported features. - /// This is always performed as part of [ChaseProgram::analyze]. - fn check_for_unsupported_features(&self) -> Result<(), RuleAnalysisError> { - // Currently no interesting checks here. Uniqueness of arities is already checked in the analysis phase. - // In general, should we maybe just do all checks in the analysis? - Ok(()) + result } /// Analyze the program and return a struct containing the results. /// This method also checks for structural problems that are not detected /// in parsing. - pub fn analyze(&self) -> Result { + pub fn analyze(&self) -> ProgramAnalysis { let BuilderResultVariants { all_variable_orders, all_column_orders, } = build_preferable_variable_orders(self, None); - let all_predicates = self.get_all_predicates()?; + let all_predicates = self.get_all_predicates(); let derived_predicates = self.get_head_predicates(); - self.check_for_unsupported_features()?; - let rule_analysis: Vec = self .rules() .iter() @@ -392,10 +371,10 @@ impl ChaseProgram { }) .collect(); - Ok(ProgramAnalysis { + ProgramAnalysis { rule_analysis, derived_predicates, all_predicates, - }) + } } } diff --git a/nemo/src/chase_model/analysis/variable_order.rs b/nemo/src/chase_model/analysis/variable_order.rs index 82d33c00b..7bf4cf781 100644 --- a/nemo/src/chase_model/analysis/variable_order.rs +++ b/nemo/src/chase_model/analysis/variable_order.rs @@ -4,15 +4,20 @@ use std::collections::{BTreeMap, HashMap, HashSet}; -use crate::model::{ - chase_model::{ChaseAtom, ChaseProgram, ChaseRule, VariableAtom}, - Identifier, Variable, -}; use nemo_physical::{ management::execution_plan::ColumnOrder, permutator::Permutator, util::mapping::permutation::Permutation, }; +use crate::{ + chase_model::components::{ + atom::{variable_atom::VariableAtom, ChaseAtom}, + program::ChaseProgram, + rule::ChaseRule, + }, + rule_model::components::{tag::Tag, term::primitive::variable::Variable}, +}; + /// Represents an ordering of variables as [HashMap]. #[repr(transparent)] #[derive(Clone, Default, PartialEq, Eq)] @@ -109,9 +114,7 @@ impl VariableOrder { /// Return [String] with the contents of this object for debugging. pub(crate) fn debug(&self) -> String { let mut variable_vector = Vec::::new(); - variable_vector.resize_with(self.0.len(), || { - Variable::Universal("PLACEHOLDER".to_string()) - }); + variable_vector.resize_with(self.0.len(), || Variable::universal("PLACEHOLDER")); for (variable, index) in &self.0 { if *index >= variable_vector.len() { @@ -125,11 +128,7 @@ impl VariableOrder { result += "["; for (index, variable) in variable_vector.iter().enumerate() { - let identifier = match variable { - Variable::Universal(id) => id.to_owned(), - Variable::Existential(id) => id.to_owned(), - Variable::UnnamedUniversal(id) => format!("_{}", id), - }; + let identifier = variable.name().unwrap_or(String::from("_")); result += &identifier; @@ -150,11 +149,7 @@ impl std::fmt::Debug for VariableOrder { &self .as_ordered_list() .iter() - .map(|var| match var { - Variable::Universal(v) => "?".to_string() + v.as_str(), - Variable::Existential(v) => "!".to_string() + v.as_str(), - Variable::UnnamedUniversal(v) => format!("_{}", v).to_string(), - }) + .map(|var| var.to_string()) .collect::>(), ) .finish() @@ -189,7 +184,6 @@ fn column_order_for(atom: &VariableAtom, var_order: &VariableOrder) -> ColumnOrd .iter() .flat_map(|var| { atom.terms() - .iter() .enumerate() .filter(move |(_, lit_var)| *lit_var == var) .map(|(i, _)| i) @@ -198,7 +192,6 @@ fn column_order_for(atom: &VariableAtom, var_order: &VariableOrder) -> ColumnOrd let mut remaining_vars: Vec = atom .terms() - .iter() .enumerate() .map(|(i, _)| i) .filter(|i| !partial_col_order.contains(i)) @@ -216,11 +209,11 @@ trait RuleVariableList { rule: &ChaseRule, ) -> Vec; - fn filter_tries bool>( + fn filter_tries bool>( self, partial_var_order: &VariableOrder, rule: &ChaseRule, - required_trie_column_orders: &HashMap>, + required_trie_column_orders: &HashMap>, predicate_filter: P, ) -> Vec; } @@ -235,7 +228,7 @@ impl RuleVariableList for Vec { .iter() .filter(|var| { rule.positive_body().iter().any(|atom| { - let predicate_vars: Vec = atom.terms().clone(); + let predicate_vars: Vec = atom.terms().cloned().collect(); predicate_vars.iter().any(|pred_var| pred_var == *var) && predicate_vars @@ -253,11 +246,11 @@ impl RuleVariableList for Vec { } } - fn filter_tries bool>( + fn filter_tries bool>( self, partial_var_order: &VariableOrder, rule: &ChaseRule, - required_trie_column_orders: &HashMap>, + required_trie_column_orders: &HashMap>, mut predicate_filter: P, ) -> Vec { let ratios: Vec<(usize, usize)> = self @@ -270,7 +263,7 @@ impl RuleVariableList for Vec { .positive_body() .iter() .filter(|atom| predicate_filter(&atom.predicate())) - .filter(|atom| atom.terms().iter().any(|atom_var| atom_var == var)); + .filter(|atom| atom.terms().any(|atom_var| atom_var == var)); let (atoms_requiring_new_orders, total_atoms) = atoms.fold((0, 0), |acc, atom| { let fitting_column_order_exists: bool = required_trie_column_orders @@ -314,22 +307,22 @@ impl RuleVariableList for Vec { struct VariableOrderBuilder<'a> { program: &'a ChaseProgram, iteration_order_within_rule: IterationOrder, - required_trie_column_orders: HashMap>, // maps predicates to sets of column orders - idb_preds: HashSet, + required_trie_column_orders: HashMap>, // maps predicates to sets of column orders + idb_preds: HashSet, } struct BuilderResult { /// A [Vec] where the ith entry contains a good variable order for rule i. variable_orders: Vec, /// A [HashMap] mapping each predicate to the set of [ColumnOrder]s that are supposed to be available. - column_orders: HashMap>, + column_orders: HashMap>, } impl VariableOrderBuilder<'_> { fn build_for( program: &ChaseProgram, iteration_order_within_rule: IterationOrder, - initial_column_orders: HashMap>, + initial_column_orders: HashMap>, ) -> BuilderResult { let mut builder = VariableOrderBuilder { program, @@ -466,7 +459,7 @@ impl VariableOrderBuilder<'_> { rule: &ChaseRule, ) { let atoms = rule.positive_body().iter().filter(|atom| { - let vars: Vec = atom.terms().clone(); + let vars: Vec = atom.terms().cloned().collect(); must_contain.iter().all(|var| vars.contains(var)) && vars.iter().all(|var| variable_order.contains(var)) }); @@ -483,7 +476,7 @@ impl VariableOrderBuilder<'_> { } } - fn get_column_orders(self) -> HashMap> { + fn get_column_orders(self) -> HashMap> { self.required_trie_column_orders } } @@ -494,12 +487,12 @@ pub(super) struct BuilderResultVariants { pub(super) all_variable_orders: Vec>, /// For each variant of the variable order computation /// contains one [HashSet] mapping each predicate to its available [ColumnOrder]s. - pub(super) all_column_orders: Vec>>, + pub(super) all_column_orders: Vec>>, } pub(super) fn build_preferable_variable_orders( program: &ChaseProgram, - initial_column_orders: Option>>, + initial_column_orders: Option>>, ) -> BuilderResultVariants { let iteration_orders = [ IterationOrder::Forward, @@ -508,12 +501,15 @@ pub(super) fn build_preferable_variable_orders( ]; let initial_column_orders = initial_column_orders.unwrap_or_else(|| { - let mut result: HashMap> = Default::default(); + let mut result: HashMap> = Default::default(); for fact in program.facts().iter() { result.insert(fact.predicate(), HashSet::from([ColumnOrder::default()])); } - for (pred, _handler) in program.imports() { - result.insert(pred.clone(), HashSet::from([ColumnOrder::default()])); + for import in program.imports() { + result.insert( + import.predicate().clone(), + HashSet::from([ColumnOrder::default()]), + ); } result }); @@ -548,20 +544,34 @@ pub(super) fn build_preferable_variable_orders( #[cfg(test)] mod test { + use crate::{ + chase_model::components::{ + atom::{primitive_atom::PrimitiveAtom, variable_atom::VariableAtom}, + import::ChaseImport, + program::ChaseProgram, + rule::ChaseRule, + }, + io::formats::{ + dsv::{value_format::DsvValueFormats, DsvHandler}, + Direction, ImportExportResource, + }, + rule_model::{ + components::{ + import_export::compression::CompressionFormat, + tag::Tag, + term::primitive::{variable::Variable, Primitive}, + }, + origin::Origin, + }, + }; + use super::{IterationOrder, RuleVariableList, VariableOrder}; - use crate::model::chase_model::{ChaseProgram, ChaseRule, PrimitiveAtom, VariableAtom}; - use crate::model::{ - FileFormat, Identifier, ImportDirective, ImportExportDirective, PrimitiveTerm, Variable, - PARAMETER_NAME_FORMAT, PARAMETER_NAME_RESOURCE, VALUE_FORMAT_ANY, - }; - use nemo_physical::datavalues::{AnyDataValue, MapDataValue, TupleDataValue}; use nemo_physical::management::execution_plan::ColumnOrder; use std::collections::{HashMap, HashSet}; - type TestRuleSetWithAdditionalInfo = - (Vec, Vec>, Vec<(Identifier, usize)>); + type TestRuleSetWithAdditionalInfo = (Vec, Vec>, Vec<(Tag, usize)>); impl VariableOrder { fn from_vec(vec: Vec) -> Self { @@ -615,53 +625,67 @@ mod test { } fn get_test_rule_with_vars_where_predicates_are_different() -> (ChaseRule, Vec) { - let a = Identifier("a".to_string()); - let b = Identifier("b".to_string()); - let c = Identifier("c".to_string()); - - let x = Variable::Universal("x".to_string()); - let y = Variable::Universal("y".to_string()); - let z = Variable::Universal("z".to_string()); - - let tx = PrimitiveTerm::Variable(x.clone()); - let _ty = PrimitiveTerm::Variable(y.clone()); - let tz = PrimitiveTerm::Variable(z.clone()); - - ( - ChaseRule::positive_rule( - vec![PrimitiveAtom::new(c, vec![tx.clone(), tz.clone()])], - vec![ - VariableAtom::new(a, vec![x.clone(), y.clone()]), - VariableAtom::new(b, vec![y.clone(), z.clone()]), - ], - vec![], - ), - vec![x, y, z], - ) + let a = Tag::new("a".to_string()); + let b = Tag::new("b".to_string()); + let c = Tag::new("c".to_string()); + + let x = Variable::universal("x"); + let y = Variable::universal("y"); + let z = Variable::universal("z"); + + let tx = Primitive::Variable(x.clone()); + let _ty = Primitive::Variable(y.clone()); + let tz = Primitive::Variable(z.clone()); + + let mut rule = ChaseRule::default(); + rule.add_head_atom(PrimitiveAtom::new( + Origin::default(), + c, + vec![tx.clone(), tz.clone()], + )); + rule.add_positive_atom(VariableAtom::new( + Origin::default(), + a, + vec![x.clone(), y.clone()], + )); + rule.add_positive_atom(VariableAtom::new( + Origin::default(), + b, + vec![y.clone(), z.clone()], + )); + + (rule, vec![x, y, z]) } fn get_test_rule_with_vars_where_predicates_are_the_same() -> (ChaseRule, Vec) { - let a = Identifier("a".to_string()); - - let x = Variable::Universal("x".to_string()); - let y = Variable::Universal("y".to_string()); - let z = Variable::Universal("z".to_string()); - - let tx = PrimitiveTerm::Variable(x.clone()); - let _ty = PrimitiveTerm::Variable(y.clone()); - let tz = PrimitiveTerm::Variable(z.clone()); - - ( - ChaseRule::positive_rule( - vec![PrimitiveAtom::new(a.clone(), vec![tx.clone(), tz.clone()])], - vec![ - VariableAtom::new(a.clone(), vec![x.clone(), y.clone()]), - VariableAtom::new(a, vec![y.clone(), z.clone()]), - ], - vec![], - ), - vec![x, y, z], - ) + let a = Tag::new("a".to_string()); + + let x = Variable::universal("x"); + let y = Variable::universal("y"); + let z = Variable::universal("z"); + + let tx = Primitive::Variable(x.clone()); + let _ty = Primitive::Variable(y.clone()); + let tz = Primitive::Variable(z.clone()); + + let mut rule = ChaseRule::default(); + rule.add_head_atom(PrimitiveAtom::new( + Origin::default(), + a.clone(), + vec![tx.clone(), tz.clone()], + )); + rule.add_positive_atom(VariableAtom::new( + Origin::default(), + a.clone(), + vec![x.clone(), y.clone()], + )); + rule.add_positive_atom(VariableAtom::new( + Origin::default(), + a, + vec![y.clone(), z.clone()], + )); + + (rule, vec![x, y, z]) } enum RulePredicateVariant { @@ -733,7 +757,7 @@ mod test { let (rule, vars) = get_test_rule_with_vars_where_predicates_are_different(); let y = vars[1].clone(); let empty_ord = VariableOrder::new(); - let empty_trie_cache: HashMap> = HashMap::new(); + let empty_trie_cache: HashMap> = HashMap::new(); let expected = vec![y]; @@ -747,7 +771,7 @@ mod test { let (rule, vars) = get_test_rule_with_vars_where_predicates_are_the_same(); let y = vars[1].clone(); let empty_ord = VariableOrder::new(); - let empty_trie_cache: HashMap> = HashMap::new(); + let empty_trie_cache: HashMap> = HashMap::new(); let expected = vec![y]; @@ -763,7 +787,10 @@ mod test { .into_iter() .unzip(); - let program = ChaseProgram::builder().rules(rules).build(); + let mut program = ChaseProgram::default(); + for rule in rules { + program.add_rule(rule); + } let rule_vars = &var_lists[0]; let rule_var_orders: Vec = vec![ @@ -792,7 +819,10 @@ mod test { .into_iter() .unzip(); - let program = ChaseProgram::builder().rules(rules).build(); + let mut program = ChaseProgram::default(); + for rule in rules { + program.add_rule(rule); + } let rule_vars = &var_lists[0]; let rule_var_orders: Vec = vec![ @@ -823,7 +853,10 @@ mod test { .into_iter() .unzip(); - let program = ChaseProgram::builder().rules(rules).build(); + let mut program = ChaseProgram::default(); + for rule in rules { + program.add_rule(rule); + } let rule_1_vars = &var_lists[0]; let rule_1_var_orders: Vec = vec![VariableOrder::from_vec(vec![ @@ -853,13 +886,13 @@ mod test { fn get_part_of_galen_test_ruleset_ie_first_5_rules_without_constant( ) -> TestRuleSetWithAdditionalInfo { - let init = Identifier("init".to_string()); - let sub_class_of = Identifier("sub_class_of".to_string()); - let is_main_class = Identifier("is_main_class".to_string()); - let conj = Identifier("conj".to_string()); - let is_sub_class = Identifier("is_sub_class".to_string()); - let xe = Identifier("xe".to_string()); - let exists = Identifier("exists".to_string()); + let init = Tag::new("init".to_string()); + let sub_class_of = Tag::new("sub_class_of".to_string()); + let is_main_class = Tag::new("is_main_class".to_string()); + let conj = Tag::new("conj".to_string()); + let is_sub_class = Tag::new("is_sub_class".to_string()); + let xe = Tag::new("xe".to_string()); + let exists = Tag::new("exists".to_string()); let predicates = vec![ (init.clone(), 1), @@ -871,25 +904,33 @@ mod test { (exists.clone(), 3), ]; - let c = Variable::Universal("c".to_string()); - let d1 = Variable::Universal("d1".to_string()); - let d2 = Variable::Universal("d2".to_string()); - let y = Variable::Universal("y".to_string()); - let r = Variable::Universal("r".to_string()); - let e = Variable::Universal("e".to_string()); + let c = Variable::universal("c"); + let d1 = Variable::universal("d1"); + let d2 = Variable::universal("d2"); + let y = Variable::universal("y"); + let r = Variable::universal("r"); + let e = Variable::universal("e"); - let tc = PrimitiveTerm::Variable(c.clone()); - let td1 = PrimitiveTerm::Variable(d1.clone()); - let td2 = PrimitiveTerm::Variable(d2.clone()); - let ty = PrimitiveTerm::Variable(y.clone()); - let tr = PrimitiveTerm::Variable(r.clone()); - let te = PrimitiveTerm::Variable(e.clone()); + let tc = Primitive::Variable(c.clone()); + let td1 = Primitive::Variable(d1.clone()); + let td2 = Primitive::Variable(d2.clone()); + let ty = Primitive::Variable(y.clone()); + let tr = Primitive::Variable(r.clone()); + let te = Primitive::Variable(e.clone()); let (rules, variables) = [ ( ChaseRule::positive_rule( - vec![PrimitiveAtom::new(init.clone(), vec![tc.clone()])], - vec![VariableAtom::new(is_main_class, vec![c.clone()])], + vec![PrimitiveAtom::new( + Origin::default(), + init.clone(), + vec![tc.clone()], + )], + vec![VariableAtom::new( + Origin::default(), + is_main_class, + vec![c.clone()], + )], vec![], ), vec![c.clone()], @@ -897,10 +938,11 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), sub_class_of.clone(), vec![tc.clone(), tc.clone()], )], - vec![VariableAtom::new(init, vec![c.clone()])], + vec![VariableAtom::new(Origin::default(), init, vec![c.clone()])], vec![], ), vec![c.clone()], @@ -908,12 +950,28 @@ mod test { ( ChaseRule::positive_rule( vec![ - PrimitiveAtom::new(sub_class_of.clone(), vec![tc.clone(), td1.clone()]), - PrimitiveAtom::new(sub_class_of.clone(), vec![tc.clone(), td2.clone()]), + PrimitiveAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![tc.clone(), td1.clone()], + ), + PrimitiveAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![tc.clone(), td2.clone()], + ), ], vec![ - VariableAtom::new(sub_class_of.clone(), vec![c.clone(), y.clone()]), - VariableAtom::new(conj.clone(), vec![y.clone(), d1.clone(), d2.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![c.clone(), y.clone()], + ), + VariableAtom::new( + Origin::default(), + conj.clone(), + vec![y.clone(), d1.clone(), d2.clone()], + ), ], vec![], ), @@ -922,14 +980,27 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), sub_class_of.clone(), vec![tc.clone(), ty.clone()], )], vec![ - VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d1.clone()]), - VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d2.clone()]), - VariableAtom::new(conj, vec![y.clone(), d1.clone(), d2.clone()]), - VariableAtom::new(is_sub_class, vec![y.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![c.clone(), d1.clone()], + ), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![c.clone(), d2.clone()], + ), + VariableAtom::new( + Origin::default(), + conj, + vec![y.clone(), d1.clone(), d2.clone()], + ), + VariableAtom::new(Origin::default(), is_sub_class, vec![y.clone()]), ], vec![], ), @@ -938,12 +1009,21 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), xe, vec![tc.clone(), tr.clone(), te.clone()], )], vec![ - VariableAtom::new(sub_class_of, vec![e.clone(), y.clone()]), - VariableAtom::new(exists, vec![y.clone(), r.clone(), c.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of, + vec![e.clone(), y.clone()], + ), + VariableAtom::new( + Origin::default(), + exists, + vec![y.clone(), r.clone(), c.clone()], + ), ], vec![], ), @@ -957,28 +1037,17 @@ mod test { } /// Helper function to create source-like imports - fn csv_import(predicate: Identifier, arity: usize) -> ImportDirective { - let attributes = MapDataValue::from_iter([ - ( - AnyDataValue::new_iri(PARAMETER_NAME_RESOURCE.to_string()), - AnyDataValue::new_plain_string("".to_string()), - ), - ( - AnyDataValue::new_iri(PARAMETER_NAME_FORMAT.to_string()), - TupleDataValue::from_iter( - vec![VALUE_FORMAT_ANY; arity] - .iter() - .map(|format| AnyDataValue::new_plain_string((*format).to_string())) - .collect::>(), - ) - .into(), - ), - ]); - ImportDirective::from(ImportExportDirective { - predicate, - format: FileFormat::CSV, - attributes, - }) + fn csv_import(predicate: Tag, arity: usize) -> ChaseImport { + let handler = DsvHandler::new( + b',', + ImportExportResource::Stdout, + DsvValueFormats::default(arity), + None, + CompressionFormat::None, + Direction::Import, + ); + + ChaseImport::new(Origin::default(), predicate, Box::new(handler)) } #[test] @@ -987,14 +1056,14 @@ mod test { let (rules, var_lists, predicates) = get_part_of_galen_test_ruleset_ie_first_5_rules_without_constant(); - let program = ChaseProgram::builder() - .imports( - [1usize, 2, 3, 4, 6] - .map(|idx| csv_import(predicates[idx].0.clone(), predicates[idx].1)), - ) - .expect("these imports should not lead to errors") - .rules(rules) - .build(); + let mut program = ChaseProgram::default(); + for predicate_index in [1usize, 2, 3, 4, 6] { + let (predicate, arity) = predicates[predicate_index].clone(); + program.add_import(csv_import(predicate, arity)); + } + for rule in rules { + program.add_rule(rule); + } let rule_1_vars = &var_lists[0]; let rule_1_var_orders: Vec = vec![ @@ -1046,18 +1115,18 @@ mod test { } fn get_el_test_ruleset_without_constants() -> TestRuleSetWithAdditionalInfo { - let init = Identifier("init".to_string()); - let sub_class_of = Identifier("sub_class_of".to_string()); - let is_main_class = Identifier("is_main_class".to_string()); - let conj = Identifier("conj".to_string()); - let is_sub_class = Identifier("is_sub_class".to_string()); - let xe = Identifier("xe".to_string()); - let exists = Identifier("exists".to_string()); - let aux_subsub_ext = Identifier("aux_subsub_ext".to_string()); - let sub_prop = Identifier("sub_prop".to_string()); - let aux = Identifier("aux".to_string()); - let sub_prop_chain = Identifier("sub_prop_chain".to_string()); - let main_sub_class_of = Identifier("main_sub_class_of".to_string()); + let init = Tag::new("init".to_string()); + let sub_class_of = Tag::new("sub_class_of".to_string()); + let is_main_class = Tag::new("is_main_class".to_string()); + let conj = Tag::new("conj".to_string()); + let is_sub_class = Tag::new("is_sub_class".to_string()); + let xe = Tag::new("xe".to_string()); + let exists = Tag::new("exists".to_string()); + let aux_subsub_ext = Tag::new("aux_subsub_ext".to_string()); + let sub_prop = Tag::new("sub_prop".to_string()); + let aux = Tag::new("aux".to_string()); + let sub_prop_chain = Tag::new("sub_prop_chain".to_string()); + let main_sub_class_of = Tag::new("main_sub_class_of".to_string()); let predicates = vec![ (init.clone(), 1), @@ -1074,41 +1143,49 @@ mod test { (main_sub_class_of.clone(), 2), ]; - let c = Variable::Universal("c".to_string()); - let d1 = Variable::Universal("d1".to_string()); - let d2 = Variable::Universal("d2".to_string()); - let y = Variable::Universal("y".to_string()); - let r = Variable::Universal("r".to_string()); - let e = Variable::Universal("e".to_string()); - let s = Variable::Universal("s".to_string()); - let r1 = Variable::Universal("r1".to_string()); - let r2 = Variable::Universal("r2".to_string()); - let s1 = Variable::Universal("s1".to_string()); - let s2 = Variable::Universal("s2".to_string()); - let d = Variable::Universal("d".to_string()); - let a = Variable::Universal("a".to_string()); - let b = Variable::Universal("b".to_string()); - - let tc = PrimitiveTerm::Variable(c.clone()); - let td1 = PrimitiveTerm::Variable(d1.clone()); - let td2 = PrimitiveTerm::Variable(d2.clone()); - let ty = PrimitiveTerm::Variable(y.clone()); - let tr = PrimitiveTerm::Variable(r.clone()); - let te = PrimitiveTerm::Variable(e.clone()); - let ts = PrimitiveTerm::Variable(s.clone()); - let _tr1 = PrimitiveTerm::Variable(r1.clone()); - let _tr2 = PrimitiveTerm::Variable(r2.clone()); - let _ts1 = PrimitiveTerm::Variable(s1.clone()); - let _ts2 = PrimitiveTerm::Variable(s2.clone()); - let td = PrimitiveTerm::Variable(d.clone()); - let ta = PrimitiveTerm::Variable(a.clone()); - let tb = PrimitiveTerm::Variable(b.clone()); + let c = Variable::universal("c"); + let d1 = Variable::universal("d1"); + let d2 = Variable::universal("d2"); + let y = Variable::universal("y"); + let r = Variable::universal("r"); + let e = Variable::universal("e"); + let s = Variable::universal("s"); + let r1 = Variable::universal("r1"); + let r2 = Variable::universal("r2"); + let s1 = Variable::universal("s1"); + let s2 = Variable::universal("s2"); + let d = Variable::universal("d"); + let a = Variable::universal("a"); + let b = Variable::universal("b"); + + let tc = Primitive::Variable(c.clone()); + let td1 = Primitive::Variable(d1.clone()); + let td2 = Primitive::Variable(d2.clone()); + let ty = Primitive::Variable(y.clone()); + let tr = Primitive::Variable(r.clone()); + let te = Primitive::Variable(e.clone()); + let ts = Primitive::Variable(s.clone()); + let _tr1 = Primitive::Variable(r1.clone()); + let _tr2 = Primitive::Variable(r2.clone()); + let _ts1 = Primitive::Variable(s1.clone()); + let _ts2 = Primitive::Variable(s2.clone()); + let td = Primitive::Variable(d.clone()); + let ta = Primitive::Variable(a.clone()); + let tb = Primitive::Variable(b.clone()); let (rules, variables) = [ ( ChaseRule::positive_rule( - vec![PrimitiveAtom::new(init.clone(), vec![tc.clone()])], - vec![VariableAtom::new(is_main_class.clone(), vec![c.clone()])], + vec![PrimitiveAtom::new( + Origin::default(), + init.clone(), + vec![tc.clone()], + )], + vec![VariableAtom::new( + Origin::default(), + is_main_class.clone(), + vec![c.clone()], + )], vec![], ), vec![c.clone()], @@ -1116,10 +1193,15 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), sub_class_of.clone(), vec![tc.clone(), tc.clone()], )], - vec![VariableAtom::new(init.clone(), vec![c.clone()])], + vec![VariableAtom::new( + Origin::default(), + init.clone(), + vec![c.clone()], + )], vec![], ), vec![c.clone()], @@ -1127,12 +1209,28 @@ mod test { ( ChaseRule::positive_rule( vec![ - PrimitiveAtom::new(sub_class_of.clone(), vec![tc.clone(), td1.clone()]), - PrimitiveAtom::new(sub_class_of.clone(), vec![tc.clone(), td2.clone()]), + PrimitiveAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![tc.clone(), td1.clone()], + ), + PrimitiveAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![tc.clone(), td2.clone()], + ), ], vec![ - VariableAtom::new(sub_class_of.clone(), vec![c.clone(), y.clone()]), - VariableAtom::new(conj.clone(), vec![y.clone(), d1.clone(), d2.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![c.clone(), y.clone()], + ), + VariableAtom::new( + Origin::default(), + conj.clone(), + vec![y.clone(), d1.clone(), d2.clone()], + ), ], vec![], ), @@ -1141,14 +1239,27 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), sub_class_of.clone(), vec![tc.clone(), ty.clone()], )], vec![ - VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d1.clone()]), - VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d2.clone()]), - VariableAtom::new(conj, vec![y.clone(), d1.clone(), d2.clone()]), - VariableAtom::new(is_sub_class.clone(), vec![y.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![c.clone(), d1.clone()], + ), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![c.clone(), d2.clone()], + ), + VariableAtom::new( + Origin::default(), + conj, + vec![y.clone(), d1.clone(), d2.clone()], + ), + VariableAtom::new(Origin::default(), is_sub_class.clone(), vec![y.clone()]), ], vec![], ), @@ -1157,12 +1268,21 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), xe.clone(), vec![tc.clone(), tr.clone(), te.clone()], )], vec![ - VariableAtom::new(sub_class_of.clone(), vec![e.clone(), y.clone()]), - VariableAtom::new(exists.clone(), vec![y.clone(), r.clone(), c.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![e.clone(), y.clone()], + ), + VariableAtom::new( + Origin::default(), + exists.clone(), + vec![y.clone(), r.clone(), c.clone()], + ), ], vec![], ), @@ -1171,13 +1291,22 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), aux_subsub_ext.clone(), vec![td.clone(), tr.clone(), ty.clone()], )], vec![ - VariableAtom::new(sub_prop.clone(), vec![r.clone(), s.clone()]), - VariableAtom::new(exists, vec![y.clone(), s.clone(), d.clone()]), - VariableAtom::new(is_sub_class, vec![y.clone()]), + VariableAtom::new( + Origin::default(), + sub_prop.clone(), + vec![r.clone(), s.clone()], + ), + VariableAtom::new( + Origin::default(), + exists, + vec![y.clone(), s.clone(), d.clone()], + ), + VariableAtom::new(Origin::default(), is_sub_class, vec![y.clone()]), ], vec![], ), @@ -1186,12 +1315,21 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), aux.clone(), vec![tc.clone(), tr.clone(), ty.clone()], )], vec![ - VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d.clone()]), - VariableAtom::new(aux_subsub_ext, vec![d.clone(), r.clone(), y.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![c.clone(), d.clone()], + ), + VariableAtom::new( + Origin::default(), + aux_subsub_ext, + vec![d.clone(), r.clone(), y.clone()], + ), ], vec![], ), @@ -1200,12 +1338,21 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), sub_class_of.clone(), vec![te.clone(), ty.clone()], )], vec![ - VariableAtom::new(xe.clone(), vec![c.clone(), r.clone(), e.clone()]), - VariableAtom::new(aux, vec![c.clone(), r.clone(), y.clone()]), + VariableAtom::new( + Origin::default(), + xe.clone(), + vec![c.clone(), r.clone(), e.clone()], + ), + VariableAtom::new( + Origin::default(), + aux, + vec![c.clone(), r.clone(), y.clone()], + ), ], vec![], ), @@ -1214,12 +1361,21 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), sub_class_of.clone(), vec![tc.clone(), te.clone()], )], vec![ - VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d.clone()]), - VariableAtom::new(sub_class_of.clone(), vec![d.clone(), e.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![c.clone(), d.clone()], + ), + VariableAtom::new( + Origin::default(), + sub_class_of.clone(), + vec![d.clone(), e.clone()], + ), ], vec![], ), @@ -1228,15 +1384,36 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), xe.clone(), vec![td.clone(), ts.clone(), te.clone()], )], vec![ - VariableAtom::new(xe.clone(), vec![c.clone(), r1.clone(), e.clone()]), - VariableAtom::new(xe.clone(), vec![d.clone(), r2.clone(), c.clone()]), - VariableAtom::new(sub_prop.clone(), vec![r1.clone(), s1.clone()]), - VariableAtom::new(sub_prop, vec![r2.clone(), s2.clone()]), - VariableAtom::new(sub_prop_chain, vec![s1.clone(), s2.clone(), s.clone()]), + VariableAtom::new( + Origin::default(), + xe.clone(), + vec![c.clone(), r1.clone(), e.clone()], + ), + VariableAtom::new( + Origin::default(), + xe.clone(), + vec![d.clone(), r2.clone(), c.clone()], + ), + VariableAtom::new( + Origin::default(), + sub_prop.clone(), + vec![r1.clone(), s1.clone()], + ), + VariableAtom::new( + Origin::default(), + sub_prop, + vec![r2.clone(), s2.clone()], + ), + VariableAtom::new( + Origin::default(), + sub_prop_chain, + vec![s1.clone(), s2.clone(), s.clone()], + ), ], vec![], ), @@ -1244,8 +1421,16 @@ mod test { ), ( ChaseRule::positive_rule( - vec![PrimitiveAtom::new(init, vec![tc.clone()])], - vec![VariableAtom::new(xe, vec![c.clone(), r.clone(), e.clone()])], + vec![PrimitiveAtom::new( + Origin::default(), + init, + vec![tc.clone()], + )], + vec![VariableAtom::new( + Origin::default(), + xe, + vec![c.clone(), r.clone(), e.clone()], + )], vec![], ), vec![c, r, e], @@ -1253,13 +1438,22 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( + Origin::default(), main_sub_class_of, vec![ta.clone(), tb.clone()], )], vec![ - VariableAtom::new(sub_class_of, vec![a.clone(), b.clone()]), - VariableAtom::new(is_main_class.clone(), vec![a.clone()]), - VariableAtom::new(is_main_class, vec![b.clone()]), + VariableAtom::new( + Origin::default(), + sub_class_of, + vec![a.clone(), b.clone()], + ), + VariableAtom::new( + Origin::default(), + is_main_class.clone(), + vec![a.clone()], + ), + VariableAtom::new(Origin::default(), is_main_class, vec![b.clone()]), ], vec![], ), @@ -1277,14 +1471,14 @@ mod test { fn build_preferable_variable_orders_with_el_without_constant() { let (rules, var_lists, predicates) = get_el_test_ruleset_without_constants(); - let program = ChaseProgram::builder() - .imports( - [1usize, 2, 3, 4, 6, 8, 10] - .map(|idx| csv_import(predicates[idx].0.clone(), predicates[idx].1)), - ) - .expect("these imports should not lead to errors") - .rules(rules) - .build(); + let mut program = ChaseProgram::default(); + for predicate_index in [1usize, 2, 3, 4, 6, 8, 10] { + let (predicate, arity) = predicates[predicate_index].clone(); + program.add_import(csv_import(predicate, arity)); + } + for rule in rules { + program.add_rule(rule); + } let rule_1_vars = &var_lists[0]; let rule_1_var_orders: Vec = vec![ diff --git a/nemo/src/chase_model/components/export.rs b/nemo/src/chase_model/components/export.rs index 78e922ce2..60e25ce75 100644 --- a/nemo/src/chase_model/components/export.rs +++ b/nemo/src/chase_model/components/export.rs @@ -8,7 +8,7 @@ use crate::{ use super::ChaseComponent; /// Component for handling exports -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct ChaseExport { /// Origin of this component origin: Origin, @@ -42,6 +42,11 @@ impl ChaseExport { pub(crate) fn handler(&self) -> &Box { &self.handler } + + /// Return the arity of this import. + pub(crate) fn arity(&self) -> usize { + self.handler.predicate_arity() + } } impl ChaseComponent for ChaseExport { diff --git a/nemo/src/chase_model/components/filter.rs b/nemo/src/chase_model/components/filter.rs index 8464fae90..44ef80df2 100644 --- a/nemo/src/chase_model/components/filter.rs +++ b/nemo/src/chase_model/components/filter.rs @@ -7,7 +7,7 @@ use super::term::operation_term::OperationTerm; /// Indicates that a new value must be created according to [OperationTerm]. /// /// The result will be "stored" in the given variable. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct ChaseFilter { /// Origin of this component origin: Origin, diff --git a/nemo/src/chase_model/components/import.rs b/nemo/src/chase_model/components/import.rs index dd9fd0033..7235c4404 100644 --- a/nemo/src/chase_model/components/import.rs +++ b/nemo/src/chase_model/components/import.rs @@ -8,7 +8,7 @@ use crate::{ use super::ChaseComponent; /// Component for handling imports -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct ChaseImport { /// Origin of this component origin: Origin, @@ -42,6 +42,11 @@ impl ChaseImport { pub(crate) fn handler(&self) -> &Box { &self.handler } + + /// Return the arity of this import. + pub(crate) fn arity(&self) -> usize { + self.handler.predicate_arity() + } } impl ChaseComponent for ChaseImport { diff --git a/nemo/src/chase_model/components/operation.rs b/nemo/src/chase_model/components/operation.rs index d9ab7bc29..b7d886899 100644 --- a/nemo/src/chase_model/components/operation.rs +++ b/nemo/src/chase_model/components/operation.rs @@ -7,7 +7,7 @@ use super::{term::operation_term::OperationTerm, ChaseComponent}; /// Indicates that a new value must be created according to [OperationTerm]. /// /// The result will be "stored" in the given variable. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct ChaseOperation { /// Origin of this component origin: Origin, diff --git a/nemo/src/chase_model/components/program.rs b/nemo/src/chase_model/components/program.rs index a7c01c171..89acf2540 100644 --- a/nemo/src/chase_model/components/program.rs +++ b/nemo/src/chase_model/components/program.rs @@ -1,13 +1,22 @@ //! This module defines [ChaseProgram]. +use std::collections::HashSet; + use crate::rule_model::components::tag::Tag; -use super::{atom::ground_atom::GroundAtom, rule::ChaseRule}; +use super::{ + atom::{ground_atom::GroundAtom, ChaseAtom}, + export::ChaseExport, + import::ChaseImport, + rule::ChaseRule, +}; #[derive(Debug, Default)] pub(crate) struct ChaseProgram { - // import_handlers: Vec<(Identifier, Box)>, - // export_handlers: Vec<(Identifier, Box)>, + /// Imports + imports: Vec, + /// Exports + exports: Vec, /// Rules rules: Vec, /// Facts @@ -19,11 +28,15 @@ pub(crate) struct ChaseProgram { impl ChaseProgram { /// Create a new [ChaseProgram]. pub(crate) fn new( + imports: Vec, + exports: Vec, rules: Vec, facts: Vec, output_predicates: Vec, ) -> Self { Self { + imports, + exports, rules, facts, output_predicates, @@ -44,4 +57,52 @@ impl ChaseProgram { pub(crate) fn add_output_predicate(&mut self, predicate: Tag) { self.output_predicates.push(predicate) } + + /// Add a new import to the program. + pub(crate) fn add_import(&mut self, import: ChaseImport) { + self.imports.push(import); + } + + /// Add a new export to the program. + pub(crate) fn add_export(&mut self, export: ChaseExport) { + self.exports.push(export); + } +} + +impl ChaseProgram { + /// Return a list of rules contained in this program. + pub(crate) fn rules(&self) -> &Vec { + &self.rules + } + + /// Return a list of imports contained in this program. + pub(crate) fn imports(&self) -> &Vec { + &self.imports + } + + /// Return a list of exports contained in this program. + pub(crate) fn exports(&self) -> &Vec { + &self.exports + } + + /// Return a list of facts contained in this program. + pub(crate) fn facts(&self) -> &Vec { + &self.facts + } + + /// Return a list of output predicates contained in this program. + pub(crate) fn output_predicates(&self) -> &Vec { + &self.output_predicates + } +} + +impl ChaseProgram { + /// Return a HashSet of all idb predicates (predicates occurring rule heads) in the program. + pub fn idb_predicates(&self) -> HashSet { + self.rules() + .iter() + .flat_map(|rule| rule.head()) + .map(|atom| atom.predicate()) + .collect() + } } diff --git a/nemo/src/chase_model/components/rule.rs b/nemo/src/chase_model/components/rule.rs index 6baa5317e..60a42c5ef 100644 --- a/nemo/src/chase_model/components/rule.rs +++ b/nemo/src/chase_model/components/rule.rs @@ -11,7 +11,7 @@ use super::{ }; /// The positive body of a [ChaseRule] -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] struct ChaseRuleBodyPositive { /// Atoms that bind variables atoms: Vec, @@ -22,7 +22,7 @@ struct ChaseRuleBodyPositive { } /// The negative body of a [ChaseRule] -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] struct ChaseRuleBodyNegative { /// Negated atoms atoms: Vec, @@ -31,7 +31,7 @@ struct ChaseRuleBodyNegative { } /// Handling of aggregation within a [ChaseRule] -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] struct ChaseRuleAggregation { /// Aggregate aggregate: Option, @@ -43,7 +43,7 @@ struct ChaseRuleAggregation { } /// Head of a [ChaseRule] -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] struct ChaseRuleHead { /// Head atoms of the rule atoms: Vec, @@ -53,7 +53,7 @@ struct ChaseRuleHead { /// Representation of a rule in a [ChaseProgram][super::program::ChaseProgram] #[allow(dead_code)] -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub(crate) struct ChaseRule { /// Origin of this component origin: Origin, @@ -68,6 +68,74 @@ pub(crate) struct ChaseRule { head: ChaseRuleHead, } +impl ChaseRule { + /// Create a simple positive rule. + pub(crate) fn positive_rule( + head: Vec, + body: Vec, + filters: Vec, + ) -> Self { + let mut result = Self::default(); + result.head.atoms = head; + result.positive.atoms = body; + result.positive.filters = filters; + + result + } +} + +impl ChaseRule { + /// Return the list of head atoms contained in this rule. + pub(crate) fn head(&self) -> &Vec { + &self.head.atoms + } + + /// Return the list of positive body atoms contained in this rule. + pub(crate) fn positive_body(&self) -> &Vec { + &self.positive.atoms + } + + /// Return the list of filters that will be applied to the positive part of the body. + pub(crate) fn positive_filters(&self) -> &Vec { + &self.positive.filters + } + + /// Return the list of operations that will be applied to the positive part of the body. + pub(crate) fn positive_operations(&self) -> &Vec { + &self.positive.operations + } + + /// Return the list of negative body atoms contained in this rule. + pub(crate) fn negative_body(&self) -> &Vec { + &self.negative.atoms + } + + /// Return the list of filters that will be applied to the negative part of the body. + pub(crate) fn negative_filters(&self) -> &Vec> { + &self.negative.filters + } + + /// Return the aggregation that will be evaluated during this rule's application. + pub(crate) fn aggregate(&self) -> Option<&ChaseAggregate> { + self.aggregation.aggregate.as_ref() + } + + /// Return the list of filters that will be applied to the negative part of the body. + pub(crate) fn aggregate_filters(&self) -> &Vec { + &self.aggregation.filters + } + + /// Return the list of operations that will be applied to the result of the aggregation. + pub(crate) fn aggregate_operations(&self) -> &Vec { + &self.aggregation.operations + } + + /// Return the index of the head atom that contains the aggregation + pub(crate) fn aggregate_head_index(&self) -> Option { + self.head.aggregate_head_index + } +} + impl ChaseRule { /// Add an atom to the positive part of the body. pub(crate) fn add_positive_atom(&mut self, atom: VariableAtom) { diff --git a/nemo/src/chase_model/components/term/operation_term.rs b/nemo/src/chase_model/components/term/operation_term.rs index 155bebed3..aba741c7d 100644 --- a/nemo/src/chase_model/components/term/operation_term.rs +++ b/nemo/src/chase_model/components/term/operation_term.rs @@ -9,7 +9,7 @@ use crate::rule_model::{ /// /// An action or computation performed on [Term]s. /// This can include for example arithmetic or string operations. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct Operation { /// Origin of this component origin: Origin, @@ -32,7 +32,7 @@ impl Operation { } /// Term that can be evaluated -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) enum OperationTerm { Primitive(Primitive), Operation(Operation), diff --git a/nemo/src/error.rs b/nemo/src/error.rs index 003fc5553..f80a3cdd3 100644 --- a/nemo/src/error.rs +++ b/nemo/src/error.rs @@ -6,8 +6,8 @@ use nemo_physical::datavalues::DataValueCreationError; use thiserror::Error; use crate::{ + chase_model::analysis::program_analysis::RuleAnalysisError, execution::selection_strategy::strategy::SelectionStrategyError, io::error::ImportExportError, - program_analysis::analysis::RuleAnalysisError, }; pub use nemo_physical::error::ReadingError; From cdb5d6f88c335d2372723953095f0bbaf73e2b0b Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 5 Sep 2024 17:42:20 +0200 Subject: [PATCH 157/214] Remove origin argument from constructors in chase model --- .../chase_model/analysis/program_analysis.rs | 37 +-- .../chase_model/analysis/variable_order.rs | 311 ++++-------------- nemo/src/chase_model/components/aggregate.rs | 5 + .../components/atom/ground_atom.rs | 4 +- .../components/atom/primitive_atom.rs | 4 +- .../components/atom/variable_atom.rs | 4 +- nemo/src/chase_model/components/export.rs | 8 +- nemo/src/chase_model/components/filter.rs | 23 +- nemo/src/chase_model/components/import.rs | 8 +- nemo/src/chase_model/components/operation.rs | 4 +- .../components/term/operation_term.rs | 27 +- nemo/src/chase_model/translation/aggregate.rs | 30 +- nemo/src/chase_model/translation/fact.rs | 4 +- nemo/src/chase_model/translation/filter.rs | 27 +- nemo/src/chase_model/translation/import.rs | 4 +- nemo/src/chase_model/translation/operation.rs | 5 +- nemo/src/chase_model/translation/rule.rs | 8 +- 17 files changed, 173 insertions(+), 340 deletions(-) diff --git a/nemo/src/chase_model/analysis/program_analysis.rs b/nemo/src/chase_model/analysis/program_analysis.rs index 2e7a92f03..1175c7cb4 100644 --- a/nemo/src/chase_model/analysis/program_analysis.rs +++ b/nemo/src/chase_model/analysis/program_analysis.rs @@ -10,15 +10,12 @@ use crate::{ rule::ChaseRule, term::operation_term::{Operation, OperationTerm}, }, - rule_model::{ - components::{ - tag::Tag, - term::{ - operation::operation_kind::OperationKind, - primitive::{variable::Variable, Primitive}, - }, + rule_model::components::{ + tag::Tag, + term::{ + operation::operation_kind::OperationKind, + primitive::{variable::Variable, Primitive}, }, - origin::Origin, }, }; @@ -151,7 +148,6 @@ fn construct_existential_aux_rule( new_terms.push(generated_variable.clone()); let new_constraint = Operation::new( - Origin::default(), OperationKind::Equal, vec![ OperationTerm::Primitive(Primitive::Variable(generated_variable)), @@ -159,10 +155,9 @@ fn construct_existential_aux_rule( ], ); - result.add_positive_filter(ChaseFilter::new( - Origin::default(), - OperationTerm::Operation(new_constraint), - )); + result.add_positive_filter(ChaseFilter::new(OperationTerm::Operation( + new_constraint, + ))); } else { if variable.is_universal() { aux_predicate_terms.push(Primitive::Variable(variable.clone())); @@ -178,7 +173,6 @@ fn construct_existential_aux_rule( new_terms.push(generated_variable.clone()); let new_constraint = Operation::new( - Origin::default(), OperationKind::Equal, vec![ OperationTerm::Primitive(Primitive::Variable(generated_variable)), @@ -186,23 +180,18 @@ fn construct_existential_aux_rule( ], ); - result.add_positive_filter(ChaseFilter::new( - Origin::default(), - OperationTerm::Operation(new_constraint), - )); + result.add_positive_filter(ChaseFilter::new(OperationTerm::Operation( + new_constraint, + ))); } } } - result.add_positive_atom(VariableAtom::new( - Origin::default(), - atom.predicate(), - new_terms, - )); + result.add_positive_atom(VariableAtom::new(atom.predicate(), new_terms)); } let temp_head_tag = get_fresh_rule_predicate(rule_index); - let temp_head_atom = PrimitiveAtom::new(Origin::default(), temp_head_tag, aux_predicate_terms); + let temp_head_atom = PrimitiveAtom::new(temp_head_tag, aux_predicate_terms); result.add_head_atom(temp_head_atom); let mut rule_program = ChaseProgram::default(); diff --git a/nemo/src/chase_model/analysis/variable_order.rs b/nemo/src/chase_model/analysis/variable_order.rs index 7bf4cf781..a1ef5f3e5 100644 --- a/nemo/src/chase_model/analysis/variable_order.rs +++ b/nemo/src/chase_model/analysis/variable_order.rs @@ -555,13 +555,10 @@ mod test { dsv::{value_format::DsvValueFormats, DsvHandler}, Direction, ImportExportResource, }, - rule_model::{ - components::{ - import_export::compression::CompressionFormat, - tag::Tag, - term::primitive::{variable::Variable, Primitive}, - }, - origin::Origin, + rule_model::components::{ + import_export::compression::CompressionFormat, + tag::Tag, + term::primitive::{variable::Variable, Primitive}, }, }; @@ -638,21 +635,9 @@ mod test { let tz = Primitive::Variable(z.clone()); let mut rule = ChaseRule::default(); - rule.add_head_atom(PrimitiveAtom::new( - Origin::default(), - c, - vec![tx.clone(), tz.clone()], - )); - rule.add_positive_atom(VariableAtom::new( - Origin::default(), - a, - vec![x.clone(), y.clone()], - )); - rule.add_positive_atom(VariableAtom::new( - Origin::default(), - b, - vec![y.clone(), z.clone()], - )); + rule.add_head_atom(PrimitiveAtom::new(c, vec![tx.clone(), tz.clone()])); + rule.add_positive_atom(VariableAtom::new(a, vec![x.clone(), y.clone()])); + rule.add_positive_atom(VariableAtom::new(b, vec![y.clone(), z.clone()])); (rule, vec![x, y, z]) } @@ -669,21 +654,9 @@ mod test { let tz = Primitive::Variable(z.clone()); let mut rule = ChaseRule::default(); - rule.add_head_atom(PrimitiveAtom::new( - Origin::default(), - a.clone(), - vec![tx.clone(), tz.clone()], - )); - rule.add_positive_atom(VariableAtom::new( - Origin::default(), - a.clone(), - vec![x.clone(), y.clone()], - )); - rule.add_positive_atom(VariableAtom::new( - Origin::default(), - a, - vec![y.clone(), z.clone()], - )); + rule.add_head_atom(PrimitiveAtom::new(a.clone(), vec![tx.clone(), tz.clone()])); + rule.add_positive_atom(VariableAtom::new(a.clone(), vec![x.clone(), y.clone()])); + rule.add_positive_atom(VariableAtom::new(a, vec![y.clone(), z.clone()])); (rule, vec![x, y, z]) } @@ -921,16 +894,8 @@ mod test { let (rules, variables) = [ ( ChaseRule::positive_rule( - vec![PrimitiveAtom::new( - Origin::default(), - init.clone(), - vec![tc.clone()], - )], - vec![VariableAtom::new( - Origin::default(), - is_main_class, - vec![c.clone()], - )], + vec![PrimitiveAtom::new(init.clone(), vec![tc.clone()])], + vec![VariableAtom::new(is_main_class, vec![c.clone()])], vec![], ), vec![c.clone()], @@ -938,11 +903,10 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), sub_class_of.clone(), vec![tc.clone(), tc.clone()], )], - vec![VariableAtom::new(Origin::default(), init, vec![c.clone()])], + vec![VariableAtom::new(init, vec![c.clone()])], vec![], ), vec![c.clone()], @@ -950,28 +914,12 @@ mod test { ( ChaseRule::positive_rule( vec![ - PrimitiveAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![tc.clone(), td1.clone()], - ), - PrimitiveAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![tc.clone(), td2.clone()], - ), + PrimitiveAtom::new(sub_class_of.clone(), vec![tc.clone(), td1.clone()]), + PrimitiveAtom::new(sub_class_of.clone(), vec![tc.clone(), td2.clone()]), ], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![c.clone(), y.clone()], - ), - VariableAtom::new( - Origin::default(), - conj.clone(), - vec![y.clone(), d1.clone(), d2.clone()], - ), + VariableAtom::new(sub_class_of.clone(), vec![c.clone(), y.clone()]), + VariableAtom::new(conj.clone(), vec![y.clone(), d1.clone(), d2.clone()]), ], vec![], ), @@ -980,27 +928,14 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), sub_class_of.clone(), vec![tc.clone(), ty.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![c.clone(), d1.clone()], - ), - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![c.clone(), d2.clone()], - ), - VariableAtom::new( - Origin::default(), - conj, - vec![y.clone(), d1.clone(), d2.clone()], - ), - VariableAtom::new(Origin::default(), is_sub_class, vec![y.clone()]), + VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d1.clone()]), + VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d2.clone()]), + VariableAtom::new(conj, vec![y.clone(), d1.clone(), d2.clone()]), + VariableAtom::new(is_sub_class, vec![y.clone()]), ], vec![], ), @@ -1009,21 +944,12 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), xe, vec![tc.clone(), tr.clone(), te.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of, - vec![e.clone(), y.clone()], - ), - VariableAtom::new( - Origin::default(), - exists, - vec![y.clone(), r.clone(), c.clone()], - ), + VariableAtom::new(sub_class_of, vec![e.clone(), y.clone()]), + VariableAtom::new(exists, vec![y.clone(), r.clone(), c.clone()]), ], vec![], ), @@ -1047,7 +973,7 @@ mod test { Direction::Import, ); - ChaseImport::new(Origin::default(), predicate, Box::new(handler)) + ChaseImport::new(predicate, Box::new(handler)) } #[test] @@ -1176,16 +1102,8 @@ mod test { let (rules, variables) = [ ( ChaseRule::positive_rule( - vec![PrimitiveAtom::new( - Origin::default(), - init.clone(), - vec![tc.clone()], - )], - vec![VariableAtom::new( - Origin::default(), - is_main_class.clone(), - vec![c.clone()], - )], + vec![PrimitiveAtom::new(init.clone(), vec![tc.clone()])], + vec![VariableAtom::new(is_main_class.clone(), vec![c.clone()])], vec![], ), vec![c.clone()], @@ -1193,15 +1111,10 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), sub_class_of.clone(), vec![tc.clone(), tc.clone()], )], - vec![VariableAtom::new( - Origin::default(), - init.clone(), - vec![c.clone()], - )], + vec![VariableAtom::new(init.clone(), vec![c.clone()])], vec![], ), vec![c.clone()], @@ -1209,28 +1122,12 @@ mod test { ( ChaseRule::positive_rule( vec![ - PrimitiveAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![tc.clone(), td1.clone()], - ), - PrimitiveAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![tc.clone(), td2.clone()], - ), + PrimitiveAtom::new(sub_class_of.clone(), vec![tc.clone(), td1.clone()]), + PrimitiveAtom::new(sub_class_of.clone(), vec![tc.clone(), td2.clone()]), ], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![c.clone(), y.clone()], - ), - VariableAtom::new( - Origin::default(), - conj.clone(), - vec![y.clone(), d1.clone(), d2.clone()], - ), + VariableAtom::new(sub_class_of.clone(), vec![c.clone(), y.clone()]), + VariableAtom::new(conj.clone(), vec![y.clone(), d1.clone(), d2.clone()]), ], vec![], ), @@ -1239,27 +1136,14 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), sub_class_of.clone(), vec![tc.clone(), ty.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![c.clone(), d1.clone()], - ), - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![c.clone(), d2.clone()], - ), - VariableAtom::new( - Origin::default(), - conj, - vec![y.clone(), d1.clone(), d2.clone()], - ), - VariableAtom::new(Origin::default(), is_sub_class.clone(), vec![y.clone()]), + VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d1.clone()]), + VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d2.clone()]), + VariableAtom::new(conj, vec![y.clone(), d1.clone(), d2.clone()]), + VariableAtom::new(is_sub_class.clone(), vec![y.clone()]), ], vec![], ), @@ -1268,21 +1152,12 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), xe.clone(), vec![tc.clone(), tr.clone(), te.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![e.clone(), y.clone()], - ), - VariableAtom::new( - Origin::default(), - exists.clone(), - vec![y.clone(), r.clone(), c.clone()], - ), + VariableAtom::new(sub_class_of.clone(), vec![e.clone(), y.clone()]), + VariableAtom::new(exists.clone(), vec![y.clone(), r.clone(), c.clone()]), ], vec![], ), @@ -1291,22 +1166,13 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), aux_subsub_ext.clone(), vec![td.clone(), tr.clone(), ty.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - sub_prop.clone(), - vec![r.clone(), s.clone()], - ), - VariableAtom::new( - Origin::default(), - exists, - vec![y.clone(), s.clone(), d.clone()], - ), - VariableAtom::new(Origin::default(), is_sub_class, vec![y.clone()]), + VariableAtom::new(sub_prop.clone(), vec![r.clone(), s.clone()]), + VariableAtom::new(exists, vec![y.clone(), s.clone(), d.clone()]), + VariableAtom::new(is_sub_class, vec![y.clone()]), ], vec![], ), @@ -1315,21 +1181,12 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), aux.clone(), vec![tc.clone(), tr.clone(), ty.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![c.clone(), d.clone()], - ), - VariableAtom::new( - Origin::default(), - aux_subsub_ext, - vec![d.clone(), r.clone(), y.clone()], - ), + VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d.clone()]), + VariableAtom::new(aux_subsub_ext, vec![d.clone(), r.clone(), y.clone()]), ], vec![], ), @@ -1338,21 +1195,12 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), sub_class_of.clone(), vec![te.clone(), ty.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - xe.clone(), - vec![c.clone(), r.clone(), e.clone()], - ), - VariableAtom::new( - Origin::default(), - aux, - vec![c.clone(), r.clone(), y.clone()], - ), + VariableAtom::new(xe.clone(), vec![c.clone(), r.clone(), e.clone()]), + VariableAtom::new(aux, vec![c.clone(), r.clone(), y.clone()]), ], vec![], ), @@ -1361,21 +1209,12 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), sub_class_of.clone(), vec![tc.clone(), te.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![c.clone(), d.clone()], - ), - VariableAtom::new( - Origin::default(), - sub_class_of.clone(), - vec![d.clone(), e.clone()], - ), + VariableAtom::new(sub_class_of.clone(), vec![c.clone(), d.clone()]), + VariableAtom::new(sub_class_of.clone(), vec![d.clone(), e.clone()]), ], vec![], ), @@ -1384,36 +1223,15 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), xe.clone(), vec![td.clone(), ts.clone(), te.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - xe.clone(), - vec![c.clone(), r1.clone(), e.clone()], - ), - VariableAtom::new( - Origin::default(), - xe.clone(), - vec![d.clone(), r2.clone(), c.clone()], - ), - VariableAtom::new( - Origin::default(), - sub_prop.clone(), - vec![r1.clone(), s1.clone()], - ), - VariableAtom::new( - Origin::default(), - sub_prop, - vec![r2.clone(), s2.clone()], - ), - VariableAtom::new( - Origin::default(), - sub_prop_chain, - vec![s1.clone(), s2.clone(), s.clone()], - ), + VariableAtom::new(xe.clone(), vec![c.clone(), r1.clone(), e.clone()]), + VariableAtom::new(xe.clone(), vec![d.clone(), r2.clone(), c.clone()]), + VariableAtom::new(sub_prop.clone(), vec![r1.clone(), s1.clone()]), + VariableAtom::new(sub_prop, vec![r2.clone(), s2.clone()]), + VariableAtom::new(sub_prop_chain, vec![s1.clone(), s2.clone(), s.clone()]), ], vec![], ), @@ -1421,16 +1239,8 @@ mod test { ), ( ChaseRule::positive_rule( - vec![PrimitiveAtom::new( - Origin::default(), - init, - vec![tc.clone()], - )], - vec![VariableAtom::new( - Origin::default(), - xe, - vec![c.clone(), r.clone(), e.clone()], - )], + vec![PrimitiveAtom::new(init, vec![tc.clone()])], + vec![VariableAtom::new(xe, vec![c.clone(), r.clone(), e.clone()])], vec![], ), vec![c, r, e], @@ -1438,22 +1248,13 @@ mod test { ( ChaseRule::positive_rule( vec![PrimitiveAtom::new( - Origin::default(), main_sub_class_of, vec![ta.clone(), tb.clone()], )], vec![ - VariableAtom::new( - Origin::default(), - sub_class_of, - vec![a.clone(), b.clone()], - ), - VariableAtom::new( - Origin::default(), - is_main_class.clone(), - vec![a.clone()], - ), - VariableAtom::new(Origin::default(), is_main_class, vec![b.clone()]), + VariableAtom::new(sub_class_of, vec![a.clone(), b.clone()]), + VariableAtom::new(is_main_class.clone(), vec![a.clone()]), + VariableAtom::new(is_main_class, vec![b.clone()]), ], vec![], ), diff --git a/nemo/src/chase_model/components/aggregate.rs b/nemo/src/chase_model/components/aggregate.rs index 55806bbcc..1e9667cbe 100644 --- a/nemo/src/chase_model/components/aggregate.rs +++ b/nemo/src/chase_model/components/aggregate.rs @@ -74,6 +74,11 @@ impl ChaseAggregate { pub fn group_by_variables(&self) -> &HashSet { &self.group_by_variables } + + /// Return which operation is performed. + pub fn aggregate_kind(&self) -> AggregateKind { + self.kind + } } impl ChaseComponent for ChaseAggregate { diff --git a/nemo/src/chase_model/components/atom/ground_atom.rs b/nemo/src/chase_model/components/atom/ground_atom.rs index 3d83dd204..f04e6ca77 100644 --- a/nemo/src/chase_model/components/atom/ground_atom.rs +++ b/nemo/src/chase_model/components/atom/ground_atom.rs @@ -30,9 +30,9 @@ pub(crate) struct GroundAtom { impl GroundAtom { /// Construct a new [GroundAtom]. - pub(crate) fn new(origin: Origin, predicate: Tag, terms: Vec) -> Self { + pub(crate) fn new(predicate: Tag, terms: Vec) -> Self { Self { - origin, + origin: Origin::default(), predicate, terms, } diff --git a/nemo/src/chase_model/components/atom/primitive_atom.rs b/nemo/src/chase_model/components/atom/primitive_atom.rs index f568b175a..01cc3e406 100644 --- a/nemo/src/chase_model/components/atom/primitive_atom.rs +++ b/nemo/src/chase_model/components/atom/primitive_atom.rs @@ -30,9 +30,9 @@ pub(crate) struct PrimitiveAtom { impl PrimitiveAtom { /// Construct a new [PrimitiveAtom]. - pub(crate) fn new(origin: Origin, predicate: Tag, terms: Vec) -> Self { + pub(crate) fn new(predicate: Tag, terms: Vec) -> Self { Self { - origin, + origin: Origin::default(), predicate, terms, } diff --git a/nemo/src/chase_model/components/atom/variable_atom.rs b/nemo/src/chase_model/components/atom/variable_atom.rs index 2ec8f8cf4..01c33e8b7 100644 --- a/nemo/src/chase_model/components/atom/variable_atom.rs +++ b/nemo/src/chase_model/components/atom/variable_atom.rs @@ -24,9 +24,9 @@ pub(crate) struct VariableAtom { impl VariableAtom { /// Construct a new [VariableAtom]. - pub(crate) fn new(origin: Origin, predicate: Tag, variables: Vec) -> Self { + pub(crate) fn new(predicate: Tag, variables: Vec) -> Self { Self { - origin, + origin: Origin::default(), predicate, variables, } diff --git a/nemo/src/chase_model/components/export.rs b/nemo/src/chase_model/components/export.rs index 60e25ce75..93f06f96c 100644 --- a/nemo/src/chase_model/components/export.rs +++ b/nemo/src/chase_model/components/export.rs @@ -21,13 +21,9 @@ pub(crate) struct ChaseExport { impl ChaseExport { /// Create a new [ChaseExport]. - pub(crate) fn new( - origin: Origin, - predicate: Tag, - handler: Box, - ) -> Self { + pub(crate) fn new(predicate: Tag, handler: Box) -> Self { Self { - origin, + origin: Origin::default(), predicate, handler, } diff --git a/nemo/src/chase_model/components/filter.rs b/nemo/src/chase_model/components/filter.rs index 44ef80df2..08365264d 100644 --- a/nemo/src/chase_model/components/filter.rs +++ b/nemo/src/chase_model/components/filter.rs @@ -2,7 +2,7 @@ use crate::rule_model::origin::Origin; -use super::term::operation_term::OperationTerm; +use super::{term::operation_term::OperationTerm, ChaseComponent}; /// Indicates that a new value must be created according to [OperationTerm]. /// @@ -18,8 +18,11 @@ pub(crate) struct ChaseFilter { impl ChaseFilter { /// Create a new [ChaseFilter]. - pub(crate) fn new(origin: Origin, filter: OperationTerm) -> Self { - Self { origin, filter } + pub(crate) fn new(filter: OperationTerm) -> Self { + Self { + origin: Origin::default(), + filter, + } } /// Return the filter that is being applied. @@ -27,3 +30,17 @@ impl ChaseFilter { &self.filter } } + +impl ChaseComponent for ChaseFilter { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} diff --git a/nemo/src/chase_model/components/import.rs b/nemo/src/chase_model/components/import.rs index 7235c4404..c181c7f20 100644 --- a/nemo/src/chase_model/components/import.rs +++ b/nemo/src/chase_model/components/import.rs @@ -21,13 +21,9 @@ pub(crate) struct ChaseImport { impl ChaseImport { /// Create a new [ChaseImport]. - pub(crate) fn new( - origin: Origin, - predicate: Tag, - handler: Box, - ) -> Self { + pub(crate) fn new(predicate: Tag, handler: Box) -> Self { Self { - origin, + origin: Origin::default(), predicate, handler, } diff --git a/nemo/src/chase_model/components/operation.rs b/nemo/src/chase_model/components/operation.rs index b7d886899..b87da449d 100644 --- a/nemo/src/chase_model/components/operation.rs +++ b/nemo/src/chase_model/components/operation.rs @@ -20,9 +20,9 @@ pub(crate) struct ChaseOperation { impl ChaseOperation { /// Create a new [ChaseOperation]. - pub(crate) fn new(origin: Origin, output_variable: Variable, operation: OperationTerm) -> Self { + pub(crate) fn new(output_variable: Variable, operation: OperationTerm) -> Self { Self { - origin, + origin: Origin::default(), output_variable, operation, } diff --git a/nemo/src/chase_model/components/term/operation_term.rs b/nemo/src/chase_model/components/term/operation_term.rs index aba741c7d..4b41971a9 100644 --- a/nemo/src/chase_model/components/term/operation_term.rs +++ b/nemo/src/chase_model/components/term/operation_term.rs @@ -1,8 +1,11 @@ //! This module defines [Operation] and [OperationTerm]. -use crate::rule_model::{ - components::term::{operation::operation_kind::OperationKind, primitive::Primitive}, - origin::Origin, +use crate::{ + chase_model::components::ChaseComponent, + rule_model::{ + components::term::{operation::operation_kind::OperationKind, primitive::Primitive}, + origin::Origin, + }, }; /// Operation @@ -22,15 +25,29 @@ pub(crate) struct Operation { impl Operation { /// Create a new [Operation]. - pub(crate) fn new(origin: Origin, kind: OperationKind, subterms: Vec) -> Self { + pub(crate) fn new(kind: OperationKind, subterms: Vec) -> Self { Self { - origin, + origin: Origin::default(), kind, subterms, } } } +impl ChaseComponent for Operation { + fn origin(&self) -> &Origin { + &self.origin + } + + fn set_origin(mut self, origin: Origin) -> Self + where + Self: Sized, + { + self.origin = origin; + self + } +} + /// Term that can be evaluated #[derive(Debug, Clone)] pub(crate) enum OperationTerm { diff --git a/nemo/src/chase_model/translation/aggregate.rs b/nemo/src/chase_model/translation/aggregate.rs index f4a1d10a3..ce94a3fbd 100644 --- a/nemo/src/chase_model/translation/aggregate.rs +++ b/nemo/src/chase_model/translation/aggregate.rs @@ -8,6 +8,7 @@ use crate::{ operation::ChaseOperation, rule::ChaseRule, term::operation_term::{Operation, OperationTerm}, + ChaseComponent, }, rule_model::components::{ term::{ @@ -38,21 +39,25 @@ impl ProgramChaseTranslation { Term::Primitive(Primitive::Variable(variable)) => variable.clone(), Term::Primitive(primitive) => { let new_variable = Variable::universal(&self.create_fresh_variable()); - result.add_positive_operation(ChaseOperation::new( - origin, - new_variable.clone(), - OperationTerm::Primitive(primitive.clone()), - )); + result.add_positive_operation( + ChaseOperation::new( + new_variable.clone(), + OperationTerm::Primitive(primitive.clone()), + ) + .set_origin(origin), + ); new_variable } Term::Operation(operation) => { let new_variable = Variable::universal(&self.create_fresh_variable()); - result.add_positive_operation(ChaseOperation::new( - origin, - new_variable.clone(), - Self::build_operation_term(operation), - )); + result.add_positive_operation( + ChaseOperation::new( + new_variable.clone(), + Self::build_operation_term(operation), + ) + .set_origin(origin), + ); new_variable } @@ -113,7 +118,7 @@ impl ProgramChaseTranslation { } } - OperationTerm::Operation(Operation::new(origin, kind, subterms)) + OperationTerm::Operation(Operation::new(kind, subterms).set_origin(origin)) } /// Create a [ChaseOperation] from a given @@ -137,6 +142,7 @@ impl ProgramChaseTranslation { group_by_variables, chase_aggregate, ); - ChaseOperation::new(operation.origin().clone(), output_variable, operation_term) + + ChaseOperation::new(output_variable, operation_term).set_origin(operation.origin().clone()) } } diff --git a/nemo/src/chase_model/translation/fact.rs b/nemo/src/chase_model/translation/fact.rs index f799a22a8..733661fcb 100644 --- a/nemo/src/chase_model/translation/fact.rs +++ b/nemo/src/chase_model/translation/fact.rs @@ -2,7 +2,7 @@ //! logical facts into chase facts. use crate::{ - chase_model::components::atom::ground_atom::GroundAtom, + chase_model::components::{atom::ground_atom::GroundAtom, ChaseComponent}, rule_model::components::{ term::{primitive::Primitive, Term}, ProgramComponent, @@ -38,6 +38,6 @@ impl ProgramChaseTranslation { } } - GroundAtom::new(origin, predicate, terms) + GroundAtom::new(predicate, terms).set_origin(origin) } } diff --git a/nemo/src/chase_model/translation/filter.rs b/nemo/src/chase_model/translation/filter.rs index cea73886b..fa47c10cf 100644 --- a/nemo/src/chase_model/translation/filter.rs +++ b/nemo/src/chase_model/translation/filter.rs @@ -4,6 +4,7 @@ use crate::{ chase_model::components::{ filter::ChaseFilter, term::operation_term::{Operation, OperationTerm}, + ChaseComponent, }, rule_model::components::{ term::{ @@ -26,29 +27,31 @@ impl ProgramChaseTranslation { let origin = operation.origin().clone(); let operation = Self::build_operation_term(operation); - let filter = OperationTerm::Operation(Operation::new( - origin.clone(), - OperationKind::Equal, - vec![ - OperationTerm::Primitive(Primitive::from(variable.clone())), - operation, - ], - )); + let filter = OperationTerm::Operation( + Operation::new( + OperationKind::Equal, + vec![ + OperationTerm::Primitive(Primitive::from(variable.clone())), + operation, + ], + ) + .set_origin(origin.clone()), + ); - ChaseFilter::new(origin, filter) + ChaseFilter::new(filter).set_origin(origin) } /// Create a new filter that binds the values of the variable to the provided primitive term. pub(crate) fn build_filter_primitive(variable: &Variable, term: &Primitive) -> ChaseFilter { let filter = Operation::new( - term.origin().clone(), OperationKind::Equal, vec![ OperationTerm::Primitive(Primitive::from(variable.clone())), OperationTerm::Primitive(term.clone()), ], - ); + ) + .set_origin(term.origin().clone()); - ChaseFilter::new(term.origin().clone(), OperationTerm::Operation(filter)) + ChaseFilter::new(OperationTerm::Operation(filter)).set_origin(term.origin().clone()) } } diff --git a/nemo/src/chase_model/translation/import.rs b/nemo/src/chase_model/translation/import.rs index a2790c82d..c823e2805 100644 --- a/nemo/src/chase_model/translation/import.rs +++ b/nemo/src/chase_model/translation/import.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use oxiri::Iri; use crate::{ - chase_model::components::import::ChaseImport, + chase_model::components::{import::ChaseImport, ChaseComponent}, io::formats::{ dsv::{value_format::DsvValueFormats, DsvHandler}, json::JsonHandler, @@ -53,7 +53,7 @@ impl ProgramChaseTranslation { FileFormat::TriG => todo!(), }; - ChaseImport::new(origin, predicate, handler) + ChaseImport::new(predicate, handler).set_origin(origin) } /// Read resource attribute and check compression. diff --git a/nemo/src/chase_model/translation/operation.rs b/nemo/src/chase_model/translation/operation.rs index 912f8c1be..043fd779b 100644 --- a/nemo/src/chase_model/translation/operation.rs +++ b/nemo/src/chase_model/translation/operation.rs @@ -5,6 +5,7 @@ use crate::{ chase_model::components::{ operation::ChaseOperation, term::operation_term::{Operation, OperationTerm}, + ChaseComponent, }, rule_model::components::{ term::{primitive::variable::Variable, Term}, @@ -40,7 +41,7 @@ impl ProgramChaseTranslation { } } - OperationTerm::Operation(Operation::new(origin, kind, subterms)) + OperationTerm::Operation(Operation::new(kind, subterms).set_origin(origin)) } /// Create a [ChaseOperation] form a given @@ -56,6 +57,6 @@ impl ProgramChaseTranslation { let origin = operation.origin().clone(); let operation = Self::build_operation_term(operation); - ChaseOperation::new(origin, output_variable.clone(), operation) + ChaseOperation::new(output_variable.clone(), operation).set_origin(origin) } } diff --git a/nemo/src/chase_model/translation/rule.rs b/nemo/src/chase_model/translation/rule.rs index ff7c0ba14..ed9b8da90 100644 --- a/nemo/src/chase_model/translation/rule.rs +++ b/nemo/src/chase_model/translation/rule.rs @@ -8,6 +8,7 @@ use crate::{ atom::{primitive_atom::PrimitiveAtom, variable_atom::VariableAtom}, filter::ChaseFilter, rule::ChaseRule, + ChaseComponent, }, rule_model::components::{ atom::Atom, @@ -168,7 +169,7 @@ impl ProgramChaseTranslation { } } - let variable_atom = VariableAtom::new(origin, predicate, variables); + let variable_atom = VariableAtom::new(predicate, variables).set_origin(origin); (variable_atom, filters) } @@ -229,7 +230,8 @@ impl ProgramChaseTranslation { if let Literal::Operation(operation) = literal { let new_operation = Self::build_operation_term(operation); - let new_filter = ChaseFilter::new(operation.origin().clone(), new_operation); + let new_filter = + ChaseFilter::new(new_operation).set_origin(operation.origin().clone()); result.add_positive_filter(new_filter); } @@ -277,7 +279,7 @@ impl ProgramChaseTranslation { } } - result.add_head_atom(PrimitiveAtom::new(origin, predicate, terms)) + result.add_head_atom(PrimitiveAtom::new(predicate, terms).set_origin(origin)) } } From 2423014fc874e0adc46059214e8c5214a3eb6ac0 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Fri, 6 Sep 2024 15:55:21 +0200 Subject: [PATCH 158/214] Replace old rule model in whole code base --- nemo-physical/src/function/evaluation.rs | 4 +- nemo-physical/src/function/tree.rs | 4 +- nemo/src/_model/chase_model.rs | 20 - nemo/src/_model/chase_model/aggregate.rs | 70 -- nemo/src/_model/chase_model/atom.rs | 278 ---- nemo/src/_model/chase_model/constructor.rs | 40 - nemo/src/_model/chase_model/program.rs | 226 ---- nemo/src/_model/chase_model/rule.rs | 628 ---------- nemo/src/_model/chase_model/variable.rs | 6 - nemo/src/_model/rule_model.rs | 28 - nemo/src/_model/rule_model/_syntax.rs | 63 - nemo/src/_model/rule_model/aggregate.rs | 54 - nemo/src/_model/rule_model/atom.rs | 88 -- nemo/src/_model/rule_model/constraint.rs | 161 --- nemo/src/_model/rule_model/import_export.rs | 151 --- nemo/src/_model/rule_model/literal.rs | 113 -- nemo/src/_model/rule_model/predicate.rs | 27 - nemo/src/_model/rule_model/program.rs | 270 ---- nemo/src/_model/rule_model/rule.rs | 384 ------ nemo/src/_model/rule_model/term.rs | 919 -------------- nemo/src/_model/types/complex_types.rs | 232 ---- nemo/src/_model/types/error.rs | 51 - .../_model/types/primitive_logical_value.rs | 1113 ----------------- nemo/src/_model/types/primitive_types.rs | 141 --- nemo/src/api.rs | 11 +- .../chase_model/analysis/program_analysis.rs | 31 +- .../chase_model/analysis/variable_order.rs | 8 +- nemo/src/chase_model/components/aggregate.rs | 43 +- nemo/src/chase_model/components/atom.rs | 4 +- .../components/atom/ground_atom.rs | 64 +- .../components/atom/primitive_atom.rs | 76 +- .../components/atom/variable_atom.rs | 60 +- nemo/src/chase_model/components/export.rs | 2 +- nemo/src/chase_model/components/filter.rs | 28 +- nemo/src/chase_model/components/operation.rs | 36 +- nemo/src/chase_model/components/program.rs | 40 +- nemo/src/chase_model/components/rule.rs | 181 ++- .../components/term/operation_term.rs | 78 +- nemo/src/chase_model/translation.rs | 30 +- nemo/src/chase_model/translation/aggregate.rs | 8 +- nemo/src/chase_model/translation/fact.rs | 2 + nemo/src/chase_model/translation/import.rs | 79 +- nemo/src/chase_model/translation/rule.rs | 17 +- nemo/src/execution/execution_engine.rs | 98 +- nemo/src/execution/planning/operations.rs | 3 +- .../planning/operations/aggregate.rs | 15 +- .../execution/planning/operations/append.rs | 19 +- .../execution/planning/operations/filter.rs | 33 +- .../planning/operations/functions.rs | 16 +- .../src/execution/planning/operations/join.rs | 11 +- .../execution/planning/operations/negation.rs | 12 +- .../planning/operations/operation.rs | 141 +++ .../src/execution/planning/operations/term.rs | 147 --- .../execution/planning/operations/union.rs | 6 +- nemo/src/execution/planning/plan_aggregate.rs | 26 +- .../execution/planning/plan_body_seminaive.rs | 30 +- .../execution/planning/plan_head_datalog.rs | 14 +- .../planning/plan_head_restricted.rs | 43 +- nemo/src/execution/planning/plan_tracing.rs | 72 +- nemo/src/execution/planning/strategy_body.rs | 2 +- nemo/src/execution/rule_execution.rs | 13 +- nemo/src/execution/selection_strategy.rs | 6 +- .../dependency_graph/graph_constructor.rs | 2 +- .../dependency_graph/graph_positive.rs | 8 +- .../execution/selection_strategy/strategy.rs | 2 +- .../selection_strategy/strategy_graph.rs | 2 +- .../selection_strategy/strategy_random.rs | 2 +- .../strategy_round_robin.rs | 2 +- .../strategy_stratified_negation.rs | 10 +- nemo/src/execution/tracing/trace.rs | 236 ++-- nemo/src/io/error.rs | 2 + nemo/src/io/export_manager.rs | 90 -- nemo/src/io/formats/dsv/value_format.rs | 2 +- nemo/src/io/formats/rdf/value_format.rs | 2 +- nemo/src/io/import_manager.rs | 46 - nemo/src/lib.rs | 4 +- nemo/src/rule_model.rs | 1 + nemo/src/rule_model/components.rs | 13 +- nemo/src/rule_model/components/atom.rs | 21 +- nemo/src/rule_model/components/fact.rs | 49 +- nemo/src/rule_model/components/literal.rs | 32 +- nemo/src/rule_model/components/rule.rs | 27 +- nemo/src/rule_model/components/term.rs | 140 ++- .../rule_model/components/term/aggregate.rs | 33 +- .../rule_model/components/term/function.rs | 24 +- nemo/src/rule_model/components/term/map.rs | 28 +- .../rule_model/components/term/operation.rs | 16 +- nemo/src/rule_model/components/term/tuple.rs | 22 +- nemo/src/rule_model/program.rs | 20 +- nemo/src/rule_model/term_map.rs | 41 + nemo/src/rule_model/util.rs | 2 +- nemo/src/syntax.rs | 1 + nemo/src/table_manager.rs | 16 +- nemo/src/util.rs | 2 + nemo/src/util/seperated_list.rs | 16 + 95 files changed, 1584 insertions(+), 5906 deletions(-) delete mode 100644 nemo/src/_model/chase_model.rs delete mode 100644 nemo/src/_model/chase_model/aggregate.rs delete mode 100644 nemo/src/_model/chase_model/atom.rs delete mode 100644 nemo/src/_model/chase_model/constructor.rs delete mode 100644 nemo/src/_model/chase_model/program.rs delete mode 100644 nemo/src/_model/chase_model/rule.rs delete mode 100644 nemo/src/_model/chase_model/variable.rs delete mode 100644 nemo/src/_model/rule_model.rs delete mode 100644 nemo/src/_model/rule_model/_syntax.rs delete mode 100644 nemo/src/_model/rule_model/aggregate.rs delete mode 100644 nemo/src/_model/rule_model/atom.rs delete mode 100644 nemo/src/_model/rule_model/constraint.rs delete mode 100644 nemo/src/_model/rule_model/import_export.rs delete mode 100644 nemo/src/_model/rule_model/literal.rs delete mode 100644 nemo/src/_model/rule_model/predicate.rs delete mode 100644 nemo/src/_model/rule_model/program.rs delete mode 100644 nemo/src/_model/rule_model/rule.rs delete mode 100644 nemo/src/_model/rule_model/term.rs delete mode 100644 nemo/src/_model/types/complex_types.rs delete mode 100644 nemo/src/_model/types/error.rs delete mode 100644 nemo/src/_model/types/primitive_logical_value.rs delete mode 100644 nemo/src/_model/types/primitive_types.rs create mode 100644 nemo/src/execution/planning/operations/operation.rs delete mode 100644 nemo/src/execution/planning/operations/term.rs create mode 100644 nemo/src/rule_model/term_map.rs create mode 100644 nemo/src/util/seperated_list.rs diff --git a/nemo-physical/src/function/evaluation.rs b/nemo-physical/src/function/evaluation.rs index cdd1d2398..96d7b0af8 100644 --- a/nemo-physical/src/function/evaluation.rs +++ b/nemo-physical/src/function/evaluation.rs @@ -422,14 +422,14 @@ mod test { ); evaluate_expect(&tree_regex, Some(AnyDataValue::new_boolean(true))); - let tree_substring_length = Function::string_subtstring_length( + let tree_substring_length = Function::string_substring_length( Function::constant(any_string("Hello World")), Function::constant(AnyDataValue::new_integer_from_u64(7)), Function::constant(AnyDataValue::new_integer_from_u64(3)), ); evaluate_expect(&tree_substring_length, Some(any_string("Wor"))); - let tree_substring = Function::string_subtstring( + let tree_substring = Function::string_substring( Function::constant(any_string("Hello World")), Function::constant(AnyDataValue::new_integer_from_u64(7)), ); diff --git a/nemo-physical/src/function/tree.rs b/nemo-physical/src/function/tree.rs index dcad3bb13..25c2b44ef 100644 --- a/nemo-physical/src/function/tree.rs +++ b/nemo-physical/src/function/tree.rs @@ -736,7 +736,7 @@ where /// This evaluates to a string containing the /// characters from the string that results from evaluating `string`, /// starting from the position that results from evaluating `start`. - pub fn string_subtstring(string: Self, start: Self) -> Self { + pub fn string_substring(string: Self, start: Self) -> Self { Self::Binary { function: BinaryFunctionEnum::StringSubstring(StringSubstring), left: Box::new(string), @@ -750,7 +750,7 @@ where /// characters from the string that results from evaluating `string`, /// starting from the position that results from evaluating `start` /// with the maximum length given by evaluating `length`. - pub fn string_subtstring_length(string: Self, start: Self, length: Self) -> Self { + pub fn string_substring_length(string: Self, start: Self, length: Self) -> Self { Self::Ternary { function: TernaryFunctionEnum::StringSubstringLength(StringSubstringLength), first: Box::new(string), diff --git a/nemo/src/_model/chase_model.rs b/nemo/src/_model/chase_model.rs deleted file mode 100644 index 796addd3d..000000000 --- a/nemo/src/_model/chase_model.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! A variant of the rule model suitable for computing the chase. - -mod aggregate; -pub(crate) use aggregate::*; - -mod program; -pub(crate) use program::*; - -mod rule; -pub(crate) use rule::*; - -mod atom; -pub(crate) use atom::*; -// TODO: pub needed in Python bindings. This should change. -pub use atom::{ChaseAtom, ChaseFact}; - -mod constructor; -pub(crate) use constructor::*; - -pub(crate) mod variable; diff --git a/nemo/src/_model/chase_model/aggregate.rs b/nemo/src/_model/chase_model/aggregate.rs deleted file mode 100644 index 80a76e53b..000000000 --- a/nemo/src/_model/chase_model/aggregate.rs +++ /dev/null @@ -1,70 +0,0 @@ -use std::collections::HashSet; - -use nemo_physical::aggregates::operation::AggregateOperation; - -use crate::model::{Aggregate, LogicalAggregateOperation, PrimitiveTerm, Term, Variable}; - -/// Specifies how the values for a placeholder aggregate variable will get computed. -/// -/// Terminology: -/// * `input_variables` are the distinct variables and the aggregated input variable, not including the group-by variables -/// * `output_variable` is the single aggregated output variable -/// -/// See [nemo_physical::tabular::operations::TrieScanAggregate] -#[derive(Debug, Clone)] -pub struct ChaseAggregate { - pub(crate) aggregate_operation: AggregateOperation, - - pub(crate) input_variable: Variable, - pub(crate) distinct_variables: Vec, - - pub(crate) group_by_variables: HashSet, - - pub(crate) output_variable: Variable, -} - -impl ChaseAggregate { - /// Convert an [Aggregate] to a [ChaseAggregate], given a placeholder name for the output variable - pub fn from_aggregate( - aggregate: Aggregate, - output_variable: Variable, - group_by_variables: HashSet, - ) -> ChaseAggregate { - let logical_aggregate_operation = aggregate.logical_aggregate_operation; - - let physical_operation = match logical_aggregate_operation { - LogicalAggregateOperation::CountValues => AggregateOperation::Count, - LogicalAggregateOperation::MaxNumber => AggregateOperation::Max, - LogicalAggregateOperation::MinNumber => AggregateOperation::Min, - LogicalAggregateOperation::SumOfNumbers => AggregateOperation::Sum, - }; - - let mut variables = aggregate - .terms - .into_iter() - .map(|t| { - if let Term::Primitive(PrimitiveTerm::Variable(variable)) = t { - variable - } else { - unreachable!("Non-variable terms are not allowed in chase aggregates."); - } - }) - .collect::>(); - - let input_variable = variables.remove(0); - let distinct_variables = variables; - - Self { - aggregate_operation: physical_operation, - output_variable, - input_variable, - distinct_variables, - group_by_variables, - } - } - - /// Return the aggregated input variable, which is the first of the input variables - pub fn aggregated_input_variable(&self) -> &Variable { - &self.input_variable - } -} diff --git a/nemo/src/_model/chase_model/atom.rs b/nemo/src/_model/chase_model/atom.rs deleted file mode 100644 index 29f60ba7b..000000000 --- a/nemo/src/_model/chase_model/atom.rs +++ /dev/null @@ -1,278 +0,0 @@ -use std::fmt::Display; - -use nemo_physical::datavalues::AnyDataValue; - -use crate::model::{Atom, Identifier, PrimitiveTerm, Term, Variable}; - -/// An atom used within a chase rule -pub trait ChaseAtom { - /// Type of the terms within the atom. - type TypeTerm; - - /// Return the predicate [Identifier]. - fn predicate(&self) -> Identifier; - - /// Return the terms in the atom - immutable. - fn terms(&self) -> &Vec; - - /// Return the terms in the atom - mutable. - fn terms_mut(&mut self) -> &mut Vec; - - /// Return the arity of the atom - fn arity(&self) -> usize { - self.terms().len() - } - - /// Return a set of all variables used in this atom - fn get_variables(&self) -> Vec; -} - -impl Display for dyn ChaseAtom { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.predicate().fmt(f)?; - f.write_str("(")?; - for (index, term) in self.terms().iter().enumerate() { - term.fmt(f)?; - if index < self.terms().len() - 1 { - f.write_str(", ")?; - } - } - f.write_str(")") - } -} - -impl From for Atom -where - Term: From, - T: ChaseAtom, -{ - fn from(chase_atom: T) -> Self { - Atom::new( - chase_atom.predicate(), - chase_atom.terms().iter().cloned().map(Term::from).collect(), - ) - } -} - -/// An atom which may only use [PrimitiveTerm]s -#[derive(Debug, Clone)] -pub struct PrimitiveAtom { - predicate: Identifier, - terms: Vec, -} - -impl PrimitiveAtom { - /// Construct a new [PrimitiveAtom]. - pub fn new(predicate: Identifier, terms: Vec) -> Self { - Self { predicate, terms } - } - - /// Construct a [PrimitiveAtom] from an [Atom]. - /// - /// # Panics - /// Panics if the provided atom contains complex terms. - pub fn from_flat_atom(atom: &Atom) -> Self { - Self { - predicate: atom.predicate(), - terms: atom - .terms() - .iter() - .map(|t| { - t.as_primitive() - .expect("Function assumes that input atom only contains primitive terms.") - }) - .collect(), - } - } - - /// Returns all [AnyDataValue]s used as constants in this atom - pub fn datavalues(&self) -> impl Iterator { - self.terms.iter().filter_map(|t| { - if let PrimitiveTerm::GroundTerm(v) = t { - Some(v) - } else { - None - } - }) - } -} - -impl ChaseAtom for PrimitiveAtom { - type TypeTerm = PrimitiveTerm; - - /// Return the predicate [Identifier]. - fn predicate(&self) -> Identifier { - self.predicate.clone() - } - - /// Return the terms in the atom - immutable. - fn terms(&self) -> &Vec { - &self.terms - } - - /// Return the terms in the atom - mutable. - fn terms_mut(&mut self) -> &mut Vec { - &mut self.terms - } - - /// Return a set of all variables used in this atom - fn get_variables(&self) -> Vec { - self.terms - .iter() - .filter_map(|t| { - if let PrimitiveTerm::Variable(v) = t { - Some(v.clone()) - } else { - None - } - }) - .collect() - } -} - -/// An atom which may only use [Variable]s. -#[derive(Debug, Clone)] -pub struct VariableAtom { - predicate: Identifier, - variables: Vec, -} - -impl VariableAtom { - /// Construct a new Atom. - pub fn new(predicate: Identifier, variables: Vec) -> Self { - Self { - predicate, - variables, - } - } - - /// Construct a [VariableAtom] from an [Atom]. - /// - /// # Panics - /// Panics if the provided atom contains terms that are not variables. - pub fn from_flat_atom(atom: &Atom) -> Self { - Self { - predicate: atom.predicate(), - variables: atom - .terms() - .iter() - .map(|t| { - if let Term::Primitive(PrimitiveTerm::Variable(variable)) = t { - variable.clone() - } else { - unreachable!("Function assumes that input atom only contains variables.") - } - }) - .collect(), - } - } -} - -impl From for PrimitiveAtom { - fn from(atom: VariableAtom) -> Self { - Self { - predicate: atom.predicate, - terms: atom - .variables - .into_iter() - .map(PrimitiveTerm::Variable) - .collect(), - } - } -} - -impl ChaseAtom for VariableAtom { - type TypeTerm = Variable; - - /// Return the predicate [Identifier]. - fn predicate(&self) -> Identifier { - self.predicate.clone() - } - - /// Return the variables in the atom - immutable. - fn terms(&self) -> &Vec { - &self.variables - } - - /// Return the variables in the atom - mutable. - fn terms_mut(&mut self) -> &mut Vec { - &mut self.variables - } - - /// Return a set of all variables used in this atom - fn get_variables(&self) -> Vec { - self.terms().to_vec() - } -} - -/// An atom which may only contain constants. -#[derive(Debug, Clone)] -pub struct ChaseFact { - predicate: Identifier, - constants: Vec, -} - -impl ChaseFact { - /// Create a new [ChaseFact]. - pub fn new(predicate: Identifier, constants: Vec) -> Self { - Self { - predicate, - constants, - } - } - - /// Construct a [ChaseFact] from an [Atom]. - /// - /// # Panics - /// Panics if the provided atom contains complex terms. - pub fn from_flat_atom(atom: &Atom) -> Self { - Self { - predicate: atom.predicate(), - constants: atom - .terms() - .iter() - .map(|t| { - if let Term::Primitive(PrimitiveTerm::GroundTerm(constant)) = t { - constant.clone() - } else { - unreachable!("Function assumes that input atom only contains constants.") - } - }) - .collect(), - } - } -} - -impl ChaseAtom for ChaseFact { - type TypeTerm = AnyDataValue; - - fn predicate(&self) -> Identifier { - self.predicate.clone() - } - - fn get_variables(&self) -> Vec { - vec![] - } - - fn terms(&self) -> &Vec { - &self.constants - } - - fn terms_mut(&mut self) -> &mut Vec { - &mut self.constants - } -} - -impl Display for ChaseFact { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.predicate().fmt(f)?; - f.write_str("(")?; - for (index, term) in self.terms().iter().enumerate() { - term.fmt(f)?; - if index < self.terms().len() - 1 { - f.write_str(", ")?; - } - } - f.write_str(")") - } -} diff --git a/nemo/src/_model/chase_model/constructor.rs b/nemo/src/_model/chase_model/constructor.rs deleted file mode 100644 index 05b4a0ba6..000000000 --- a/nemo/src/_model/chase_model/constructor.rs +++ /dev/null @@ -1,40 +0,0 @@ -use nemo_physical::datavalues::AnyDataValue; - -use crate::model::{Term, Variable}; - -/// Indicates that a new value must be creater accodring to [Term]. -/// The result will be "stored" in the given variable -#[derive(Debug, Clone)] -pub struct Constructor { - variable: Variable, - term: Term, -} - -impl Constructor { - /// Create a new [Constructor]. - /// - /// # Panics - /// Pancis if the provided term is an aggregate (We handle those in a separate construct). - pub fn new(variable: Variable, term: Term) -> Self { - if let Term::Aggregation(_) = term { - panic!("An aggregate is not a constructor"); - } - - Self { variable, term } - } - - /// Return the variable which associated with the result of this constructor. - pub fn variable(&self) -> &Variable { - &self.variable - } - - /// Return the term which computes the result of this constructor. - pub fn term(&self) -> &Term { - &self.term - } - - /// Returns all [AnyDataValue]s used in this expression. - pub fn datavalues(&self) -> impl Iterator { - self.term().datavalues() - } -} diff --git a/nemo/src/_model/chase_model/program.rs b/nemo/src/_model/chase_model/program.rs deleted file mode 100644 index 6e204f1f8..000000000 --- a/nemo/src/_model/chase_model/program.rs +++ /dev/null @@ -1,226 +0,0 @@ -//! Defines a variant of [crate::model::Program], suitable for computing the chase. - -use std::collections::{HashMap, HashSet}; - -use nemo_physical::datavalues::AnyDataValue; - -use crate::{io::formats::ImportExportHandler, model::Identifier}; - -use super::{ChaseAtom, ChaseFact, ChaseRule}; - -/// Representation of a datalog program that is used for generating execution plans for the physical layer. -#[derive(Debug, Default, Clone)] -pub(crate) struct ChaseProgram { - base: Option, - prefixes: HashMap, - import_handlers: Vec<(Identifier, Box)>, - export_handlers: Vec<(Identifier, Box)>, - rules: Vec, - facts: Vec, - output_predicates: Vec, -} - -/// A Builder for a [ChaseProgram]. -#[derive(Debug, Default)] -pub(crate) struct ChaseProgramBuilder { - program: ChaseProgram, -} - -#[allow(dead_code)] -impl ChaseProgramBuilder { - /// Construct a new builder. - pub fn new() -> Self { - Default::default() - } - - /// Construct a [Program] from this builder. - pub fn build(self) -> ChaseProgram { - self.program - } - - /// Set the base IRI. - pub fn base(mut self, base: String) -> Self { - self.program.base = Some(base); - self - } - - /// Add a prefix. - pub fn prefix(mut self, prefix: String, iri: String) -> Self { - self.program.prefixes.insert(prefix, iri); - self - } - - /// Add prefixes. - pub fn prefixes(mut self, prefixes: T) -> Self - where - T: IntoIterator, - { - self.program.prefixes.extend(prefixes); - self - } - - // /// Add an imported table. - // pub fn import(mut self, import: &ImportDirective) -> Result { - // let handler = ImportExportHandlers::import_handler(import)?; - // self.program - // .import_handlers - // .push((import.predicate().clone(), handler)); - // Ok(self) - // } - - // /// Add imported tables. - // pub fn imports(self, imports: T) -> Result - // where - // T: IntoIterator, - // { - // let mut cur_self: Self = self; - // for import in imports { - // cur_self = cur_self.import(&import)?; - // } - // Ok(cur_self) - // } - - // /// Add an exported table. - // pub fn export(mut self, export: &ExportDirective) -> Result { - // let handler = ImportExportHandlers::export_handler(export)?; - // self.program - // .export_handlers - // .push((export.predicate().clone(), handler)); - // Ok(self) - // } - - // /// Add exported tables. - // pub fn exports(self, exports: T) -> Result - // where - // T: IntoIterator, - // { - // let mut cur_self: Self = self; - // for export in exports { - // cur_self = cur_self.export(&export)?; - // } - // Ok(cur_self) - // } - - /// Add a rule. - pub fn rule(mut self, rule: ChaseRule) -> Self { - self.program.rules.push(rule); - self - } - - /// Add rules. - pub fn rules(mut self, rules: T) -> Self - where - T: IntoIterator, - { - self.program.rules.extend(rules); - self - } - - /// Add a fact. - pub fn fact(mut self, fact: ChaseFact) -> Self { - self.program.facts.push(fact); - self - } - - /// Add facts. - pub fn facts(mut self, facts: T) -> Self - where - T: IntoIterator, - { - self.program.facts.extend(facts); - self - } - - /// Select an IDB predicate for output. - pub fn output_predicate(self, predicate: Identifier) -> Self { - self.output_predicates([predicate]) - } - - /// Select IDB predicates for output. - pub fn output_predicates(mut self, predicates: T) -> Self - where - T: IntoIterator, - { - self.program.output_predicates.extend(predicates); - self - } -} - -impl ChaseProgram { - /// Return a [builder][ChaseProgramBuilder] for a [ChaseProgram]. - pub(crate) fn builder() -> ChaseProgramBuilder { - Default::default() - } - - /// Return all rules in the program - immutable. - #[must_use] - pub fn rules(&self) -> &Vec { - &self.rules - } - - /// Return all facts in the program. - #[must_use] - pub fn facts(&self) -> &Vec { - &self.facts - } - - /// Return a HashSet of all idb predicates (predicates occuring rule heads) in the program. - #[must_use] - pub fn idb_predicates(&self) -> HashSet { - self.rules() - .iter() - .flat_map(|rule| rule.head()) - .map(|atom| atom.predicate()) - .collect() - } - - // /// Return all imports in the program. - // pub(crate) fn imports( - // &self, - // ) -> impl Iterator)> { - // self.import_handlers.iter() - // } - - // /// Return all exports in the program. - // pub fn exports(&self) -> impl Iterator)> { - // self.export_handlers.iter() - // } - - /// Returns the [AnyDataValue]s used as constants in the rules of the program. - pub fn all_datavalues(&self) -> impl Iterator { - self.rules.iter().flat_map(|rule| rule.all_datavalues()) - } -} - -// impl TryFrom for ChaseProgram { -// type Error = Error; - -// fn try_from(program: Program) -> Result { -// let mut builder = Self::builder() -// .prefixes(program.prefixes().clone()) -// .imports(program.imports().cloned())? -// .exports(program.exports().cloned())? -// .rules( -// program -// .rules() -// .iter() -// .cloned() -// .map(ChaseRule::try_from) -// .collect::, Error>>()?, -// ) -// .facts( -// program -// .facts() -// .iter() -// .map(|fact| ChaseFact::from_flat_atom(&fact.0)), -// ); - -// if let Some(base) = program.base() { -// builder = builder.base(base); -// } - -// builder = builder.output_predicates(program.output_predicates().cloned()); - -// Ok(builder.build()) -// } -// } diff --git a/nemo/src/_model/chase_model/rule.rs b/nemo/src/_model/chase_model/rule.rs deleted file mode 100644 index 574164caf..000000000 --- a/nemo/src/_model/chase_model/rule.rs +++ /dev/null @@ -1,628 +0,0 @@ -//! Defines a variant of [crate::model::Rule], suitable for computing the chase. - -use std::collections::{HashMap, HashSet}; - -use nemo_physical::datavalues::AnyDataValue; - -use crate::{ - error::Error, - model::{ - chase_model::variable::{AGGREGATE_VARIABLE_PREFIX, CONSTRUCT_VARIABLE_PREFIX}, - Aggregate, Constraint, Literal, PrimitiveTerm, Rule, Term, Variable, - }, -}; - -use super::{ - variable::EQUALITY_VARIABLE_PREFIX, ChaseAggregate, ChaseAtom, Constructor, PrimitiveAtom, - VariableAtom, -}; - -/// Representation of a rule in a [super::ChaseProgram]. -/// -/// Chase rules may include placeholder variables, which start with `_` -/// * Additional constraints: `_EQUALITY_{term_counter}` -/// * Additional values: `_CONSTRUCT_{term_counter}` -/// * Aggregates: `_AGGREGATE_{term_counter}` -#[allow(dead_code)] -#[derive(Debug, Clone, Default)] -pub struct ChaseRule { - /// Positive part of the body - positive_body: Vec, - /// Derived bindings from the positive body - /// These should appear in order, - /// i.e. such that the computation of a value - /// does not depend on values constructed later - positive_constructors: Vec, - /// Restriction on the positive part of the body - positive_constraints: Vec, - - /// Negative part of the body - negative_body: Vec, - /// For each [VariableAtom] in `negative_body`, - /// the associated filter statements - negative_constraints: Vec>, - - /// Aggregate - aggregate: Option, - - /// Constructors from aggregate results - aggregate_constructors: Vec, - /// Restraints on values constructed from aggregate results - aggregate_constraints: Vec, - - /// Head atoms of the rule - head: Vec, - /// Index of the head atom which contains the aggregate - aggregate_head_index: Option, -} - -#[allow(dead_code)] -impl ChaseRule { - /// Construct a new [ChaseRule]. - pub fn positive_rule( - head: Vec, - positive_body: Vec, - positive_constraints: Vec, - ) -> Self { - Self { - positive_body, - positive_constructors: vec![], - positive_constraints, - negative_body: vec![], - negative_constraints: vec![], - aggregate: None, - aggregate_constructors: vec![], - aggregate_constraints: vec![], - head, - aggregate_head_index: None, - } - } - - /// Return the head atoms of the rule - immutable. - #[must_use] - pub fn head(&self) -> &Vec { - &self.head - } - - /// Return the head atoms of the rule - mutable. - #[must_use] - pub fn head_mut(&mut self) -> &mut Vec { - &mut self.head - } - - /// Return the positive constructors of the rule. - pub fn positive_constructors(&self) -> &Vec { - &self.positive_constructors - } - - /// Return the aggregate of the rule. - pub fn aggregate(&self) -> &Option { - &self.aggregate - } - - /// Return the index of the aggregate head atom. - pub fn aggregate_head_index(&self) -> Option { - self.aggregate_head_index - } - - /// Return the all the atoms of the rules. - /// This does not distinguish between positive and negative atoms. - pub fn all_body(&self) -> impl Iterator { - self.positive_body.iter().chain(self.negative_body.iter()) - } - - /// Return the positive body atoms of the rule - immutable. - #[must_use] - pub fn positive_body(&self) -> &Vec { - &self.positive_body - } - - /// Return the positive body atoms of the rule - mutable. - #[must_use] - pub fn positive_body_mut(&mut self) -> &mut Vec { - &mut self.positive_body - } - - /// Return all the constraints of the rule. - pub fn all_constraints(&self) -> impl Iterator { - self.positive_constraints - .iter() - .chain(self.negative_constraints.iter().flatten()) - .chain(self.aggregate_constraints.iter()) - } - - /// Return the positive constraints of the rule - immutable. - #[must_use] - pub fn positive_constraints(&self) -> &Vec { - &self.positive_constraints - } - - /// Return the positive constraints of the rule - mutable. - #[must_use] - pub fn positive_constraints_mut(&mut self) -> &mut Vec { - &mut self.positive_constraints - } - - /// Return the negative body atons of the rule - immutable. - #[must_use] - pub fn negative_body(&self) -> &Vec { - &self.negative_body - } - - /// Return the negative body atoms of the rule - mutable. - #[must_use] - pub fn negative_body_mut(&mut self) -> &mut Vec { - &mut self.negative_body - } - - /// Return the negative constraints of the rule - immutable. - #[must_use] - pub fn negative_constraints(&self) -> &Vec> { - &self.negative_constraints - } - - /// Return the negative constraints of the rule - mutable. - #[must_use] - pub fn negative_constraints_mut(&mut self) -> &mut Vec> { - &mut self.negative_constraints - } - - /// Return the aggregate constraints of the rule. - #[must_use] - pub fn aggregate_constraints(&self) -> &Vec { - &self.aggregate_constraints - } - - /// Return the aggregate constraints of the rule. - #[must_use] - pub fn aggregate_constructors(&self) -> &Vec { - &self.aggregate_constructors - } - - /// Return all [Variable]s used in this rule. - pub fn all_variables(&self) -> Vec { - let variables_body = self.all_body().flat_map(|atom| atom.get_variables()); - let variables_head = self.head.iter().flat_map(|atom| atom.get_variables()); - let variables_constructors = self - .positive_constructors - .iter() - .chain(self.aggregate_constructors.iter()) - .map(|constructor| constructor.variable().clone()); - let variables_aggregates = self - .aggregate - .iter() - .map(|aggregate| aggregate.output_variable.clone()); - - variables_body - .chain(variables_head) - .chain(variables_constructors) - .chain(variables_aggregates) - .collect() - } - - /// Returns the [AnyDataValue]s used as constants in this rule. - pub fn all_datavalues(&self) -> impl Iterator { - let datavalues_head = self.head.iter().flat_map(|atom| atom.datavalues()); - let datavalues_constructors = self - .positive_constructors - .iter() - .chain(self.aggregate_constructors.iter()) - .flat_map(|constructor| constructor.datavalues()); - let datavalues_constraints = self - .positive_constraints - .iter() - .chain(self.negative_constraints.iter().flatten()) - .chain(self.aggregate_constraints.iter()) - .flat_map(|constraint| constraint.datavalues()); - - datavalues_head - .chain(datavalues_constructors) - .chain(datavalues_constraints) - } -} - -/// Helper structure defining several categories of constraints -#[derive(Debug)] -struct ConstraintCategories { - positive_constructors: Vec, - positive_constraints: Vec, - negative_constraints: Vec>, - aggregate_constructors: Vec, - aggregate_constraints: Vec, -} - -impl ConstraintCategories { - pub fn new(num_negative_body: usize) -> Self { - Self { - positive_constructors: Vec::::new(), - positive_constraints: Vec::::new(), - negative_constraints: vec![Vec::::new(); num_negative_body], - aggregate_constructors: Vec::::new(), - aggregate_constraints: Vec::::new(), - } - } -} - -impl ChaseRule { - /// Increments `next_variable_id`, but returns it's old value with a prefix. - fn generate_incrementing_variable_name(prefix: &str, next_variable_id: &mut usize) -> String { - let result = format!("{}{}", prefix, next_variable_id); - *next_variable_id += 1; - result - } - - // Remove constraints of the form ?X = ?Y from the rule - // and apply the corresponding substitution - fn apply_equality(rule: &mut Rule) { - let mut assignment = HashMap::::new(); - - rule.constraints_mut().retain(|constraint| { - if let Constraint::Equals( - Term::Primitive(PrimitiveTerm::Variable(left)), - Term::Primitive(PrimitiveTerm::Variable(right)), - ) = constraint - { - if let Some(assigned) = assignment.get(left) { - assignment.insert(right.clone(), assigned.clone()); - } else if let Some(assigned) = assignment.get(right) { - assignment.insert(left.clone(), assigned.clone()); - } else { - assignment.insert( - left.clone(), - Term::Primitive(PrimitiveTerm::Variable(right.clone())), - ); - } - - return false; - } - - true - }); - - rule.apply_assignment(&assignment); - } - - /// Modify the rule in such a way - /// that it only contains primitive terms in the head - /// and variables in the body. - /// - /// This transformation may introduce new [Constraint]s. - fn flatten_atoms( - rule: &mut Rule, - aggregate: &mut Option, - aggregate_head_index: &mut Option, - ) -> ConstraintCategories { - let num_negative_body = rule.num_negative_body(); - let mut new_constraints = ConstraintCategories::new(num_negative_body); - - let mut rule_next_variable_id: usize = 0; - - // Head atoms may only contain primitive terms - // Aggregates need to be separated - for (atom_index, atom) in rule.head_mut().iter_mut().enumerate() { - struct AggregateInformation { - term_index: usize, - aggregate: Aggregate, - output_variable: Variable, - surrounding_term: Option, - } - let mut aggregate_information: Option = None; - - for (term_index, term) in atom.terms_mut().iter_mut().enumerate() { - // Replace aggregate terms or aggregates inside of arithmetic expressions with placeholder variables - term.update_subterms_recursively(&mut |subterm| match subterm { - Term::Aggregation(aggregate) => { - let output_variable = - Variable::Universal(Self::generate_incrementing_variable_name( - AGGREGATE_VARIABLE_PREFIX, - &mut rule_next_variable_id, - )); - - for aggregate_subterm in &mut aggregate.terms { - if !aggregate_subterm.is_variable() { - let new_variable = - Variable::Universal(Self::generate_incrementing_variable_name( - CONSTRUCT_VARIABLE_PREFIX, - &mut rule_next_variable_id, - )); - let new_term = - Term::Primitive(PrimitiveTerm::Variable(new_variable.clone())); - - new_constraints.positive_constructors.push(Constructor::new( - new_variable, - aggregate_subterm.clone(), - )); - *aggregate_subterm = new_term; - } - } - - aggregate_information = Some(AggregateInformation { - term_index, - aggregate: aggregate.clone(), - output_variable: output_variable.clone(), - surrounding_term: None, - }); - - *subterm = Term::Primitive(PrimitiveTerm::Variable(output_variable)); - - false - } - _ => true, - }); - - debug_assert!( - !matches!(term, Term::Aggregation(_)), - "Aggregate terms should have been replaced with placeholder variables" - ); - - if !term.is_primitive() { - let new_variable = - Variable::Universal(Self::generate_incrementing_variable_name( - CONSTRUCT_VARIABLE_PREFIX, - &mut rule_next_variable_id, - )); - let new_term = Term::Primitive(PrimitiveTerm::Variable(new_variable.clone())); - - let is_aggregate = - if let Some(aggregate_information) = &mut aggregate_information { - aggregate_information.surrounding_term = Some(term.clone()); - - aggregate_information.term_index == term_index - } else { - false - }; - - if is_aggregate { - new_constraints - .aggregate_constructors - .push(Constructor::new(new_variable, term.clone())); - } else { - new_constraints - .positive_constructors - .push(Constructor::new(new_variable, term.clone())); - } - - *term = new_term; - } - } - - if let Some(information) = aggregate_information { - let mut group_by_variables = HashSet::::new(); - for (term_index, term) in atom.terms().iter().enumerate() { - if term_index == information.term_index { - continue; - } - - if let Term::Primitive(PrimitiveTerm::Variable(variable)) = term { - group_by_variables.insert(variable.clone()); - } - } - - if let Some(surrounding_term) = information.surrounding_term { - group_by_variables.extend(surrounding_term.variables().cloned()); - group_by_variables.remove(&information.output_variable); - } - - *aggregate = Some(ChaseAggregate::from_aggregate( - information.aggregate, - information.output_variable, - group_by_variables, - )); - *aggregate_head_index = Some(atom_index); - } - } - - // Body literals must only contain variables - // and may not repeat variables within one atom - let mut negative_index = 0; - for literal in rule.body_mut() { - let is_positive = literal.is_positive(); - if !is_positive { - negative_index += 1; - } - - let atom = literal.atom_mut(); - let mut current_variables = HashSet::::new(); - - for term in atom.terms_mut() { - if let Term::Primitive(PrimitiveTerm::Variable(variable)) = term.clone() { - if !current_variables.contains(&variable) { - current_variables.insert(variable); - - continue; - } - } - - let new_variable = Variable::Universal(Self::generate_incrementing_variable_name( - EQUALITY_VARIABLE_PREFIX, - &mut rule_next_variable_id, - )); - let new_term = Term::Primitive(PrimitiveTerm::Variable(new_variable.clone())); - - if is_positive { - new_constraints - .positive_constraints - .push(Constraint::Equals(new_term.clone(), term.clone())); - } else { - new_constraints.negative_constraints[negative_index - 1] - .push(Constraint::Equals(new_term.clone(), term.clone())) - } - - *term = new_term; - } - } - - new_constraints - } - - fn compute_derived_variables( - rule: &Rule, - aggregate: &Option, - constraints: &mut ConstraintCategories, - assigned_constraints: &mut HashSet, - ) -> HashSet { - let mut derived_variables = rule.safe_variables(); - let mut aggregate_variables = HashSet::::new(); - if let Some(aggregate) = aggregate { - aggregate_variables.insert(aggregate.output_variable.clone()); - } - - let mut update = true; - while update { - let num_assigned_constraints = assigned_constraints.len(); - - for (constraint_index, constraint) in rule.constraints().iter().enumerate() { - if assigned_constraints.contains(&constraint_index) { - continue; - } - - if let Some((variable, term)) = constraint.has_form_assignment() { - if derived_variables.contains(variable) - || aggregate_variables.contains(variable) - { - continue; - } - - if !derived_variables.contains(variable) - && term - .variables() - .all(|variable| derived_variables.contains(variable)) - { - derived_variables.insert(variable.clone()); - constraints - .positive_constructors - .push(Constructor::new(variable.clone(), term.clone())); - assigned_constraints.insert(constraint_index); - continue; - } - - if !aggregate_variables.contains(variable) - && term.variables().all(|variable| { - derived_variables.contains(variable) - || aggregate_variables.contains(variable) - }) - { - aggregate_variables.insert(variable.clone()); - constraints - .aggregate_constructors - .push(Constructor::new(variable.clone(), term.clone())); - assigned_constraints.insert(constraint_index); - continue; - } - } - } - - update = num_assigned_constraints != assigned_constraints.len(); - } - - derived_variables - } - - /// Seperate different [Constraint]s of the given [Rule] into several categories. - fn seperate_constraints( - rule: &Rule, - aggregate: &Option, - negative_body: &[VariableAtom], - constraints: &mut ConstraintCategories, - ) { - let mut assigned_constraints = HashSet::::new(); - let derived_variables = Self::compute_derived_variables( - rule, - aggregate, - constraints, - &mut assigned_constraints, - ); - - let mut negative_variables = HashMap::::new(); - for (body_index, negative_atom) in negative_body.iter().enumerate() { - for variable in negative_atom.terms() { - if !derived_variables.contains(variable) { - negative_variables.insert(variable.clone(), body_index); - } - } - } - - for (constraint_index, constraint) in rule.constraints().iter().enumerate() { - if assigned_constraints.contains(&constraint_index) { - continue; - } - - // Constraint on derived variables - if constraint - .variables() - .all(|variable| derived_variables.contains(variable)) - { - constraints.positive_constraints.push(constraint.clone()); - assigned_constraints.insert(constraint_index); - continue; - } - - // Constraint on negative variables - for variable in constraint.variables() { - if let Some(negative_index) = negative_variables.get(variable) { - constraints.negative_constraints[*negative_index].push(constraint.clone()); - assigned_constraints.insert(constraint_index); - continue; - } - } - - // Constraints on aggregates are currently not expressible - } - - debug_assert!(assigned_constraints.len() == rule.constraints().len()); - } -} - -impl TryFrom for ChaseRule { - type Error = Error; - - fn try_from(mut rule: Rule) -> Result { - // Preprocess rule in order to make the translation simpler - let mut aggregate: Option = None; - let mut aggregate_head_index: Option = None; - - Self::apply_equality(&mut rule); - let mut constraints = - Self::flatten_atoms(&mut rule, &mut aggregate, &mut aggregate_head_index); - - // Build chase rule elements from flattend atoms - let head = rule - .head() - .iter() - .map(PrimitiveAtom::from_flat_atom) - .collect::>(); - - let mut positive_body = Vec::new(); - let mut negative_body = Vec::new(); - for literal in rule.body() { - match literal { - Literal::Positive(atom) => positive_body.push(VariableAtom::from_flat_atom(atom)), - Literal::Negative(atom) => negative_body.push(VariableAtom::from_flat_atom(atom)), - } - } - - // Seperate constraints into different categories - Self::seperate_constraints(&rule, &aggregate, &negative_body, &mut constraints); - - let ConstraintCategories { - positive_constructors, - positive_constraints, - negative_constraints, - aggregate_constructors, - aggregate_constraints, - } = constraints; - - Ok(Self { - positive_body, - positive_constructors, - positive_constraints, - negative_body, - negative_constraints, - aggregate, - aggregate_constructors, - aggregate_constraints, - head, - aggregate_head_index, - }) - } -} diff --git a/nemo/src/_model/chase_model/variable.rs b/nemo/src/_model/chase_model/variable.rs deleted file mode 100644 index fd611f913..000000000 --- a/nemo/src/_model/chase_model/variable.rs +++ /dev/null @@ -1,6 +0,0 @@ -/// Prefix used for generated aggregate variables in a [super::ChaseRule] -pub(super) const AGGREGATE_VARIABLE_PREFIX: &str = "_AGGREGATE_"; -/// Prefix used for generated variables encoding equality constraints in a [super::ChaseRule] -pub(super) const EQUALITY_VARIABLE_PREFIX: &str = "_EQUALITY_"; -/// Prefix used for generated variables for storing the value of complex terms in a [super::ChaseRule]. -pub(super) const CONSTRUCT_VARIABLE_PREFIX: &str = "_CONSTRUCT_"; diff --git a/nemo/src/_model/rule_model.rs b/nemo/src/_model/rule_model.rs deleted file mode 100644 index 38a234dda..000000000 --- a/nemo/src/_model/rule_model.rs +++ /dev/null @@ -1,28 +0,0 @@ -//! The data model. - -mod aggregate; -pub use aggregate::*; - -mod atom; -pub use atom::*; - -mod literal; -pub use literal::*; - -mod program; -pub use program::*; - -mod rule; -pub use rule::*; - -mod term; -pub use term::*; - -mod predicate; -pub use predicate::*; - -mod constraint; -pub use constraint::*; - -pub mod import_export; -pub use import_export::*; diff --git a/nemo/src/_model/rule_model/_syntax.rs b/nemo/src/_model/rule_model/_syntax.rs deleted file mode 100644 index e3b9cad78..000000000 --- a/nemo/src/_model/rule_model/_syntax.rs +++ /dev/null @@ -1,63 +0,0 @@ -//! Constants for strings that are relevant to the syntax of rules. -//! These are kept in one location, since they are required in various -//! places related to parsing and display. - -/// The "predicate name" used for the CSV format in import/export directives. -pub(crate) const FILE_FORMAT_CSV: &str = "csv"; -/// The "predicate name" used for the DSV format in import/export directives. -pub(crate) const FILE_FORMAT_DSV: &str = "dsv"; -/// The "predicate name" used for the TSV format in import/export directives. -pub(crate) const FILE_FORMAT_TSV: &str = "tsv"; -/// The "predicate name" used for the generic RDF format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_UNSPECIFIED: &str = "rdf"; -/// The "predicate name" used for the Ntriples format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_NTRIPLES: &str = "ntriples"; -/// The "predicate name" used for the NQuads format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_NQUADS: &str = "nquads"; -/// The "predicate name" used for the Turtle format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_TURTLE: &str = "turtle"; -/// The "predicate name" used for the TriG format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_TRIG: &str = "trig"; -/// The "predicate name" used for the RDF/XML format in import/export directives. -pub(crate) const FILE_FORMAT_RDF_XML: &str = "rdfxml"; -/// The "predicate name" used for the json format in import/export directives. -pub(crate) const FILE_FORMAT_JSON: &str = "json"; - -/// Name of the parameter for specifying the resource in import/export directives. -pub(crate) const PARAMETER_NAME_RESOURCE: &str = "resource"; -/// Name of the parameter for specifying the format in import/export directives. -pub(crate) const PARAMETER_NAME_FORMAT: &str = "format"; -/// Name of the parameter for specifying a base IRI in import/export directives. -pub(crate) const PARAMETER_NAME_BASE: &str = "base"; -/// Name of the parameter for specifying a delimiter in import/export directives for delimiter-separated values format. -pub(crate) const PARAMETER_NAME_DSV_DELIMITER: &str = "delimiter"; -/// Name of the parameter for specifying the compression in import/export directives. -pub(crate) const PARAMETER_NAME_COMPRESSION: &str = "compression"; -/// Name of the parameter for specifying the limit in import/export directives. -pub(crate) const PARAMETER_NAME_LIMIT: &str = "limit"; - -/// The name of the general, best-effort value format. Importers/exporters suporting this format will usually -/// accept "any" input value and interpret it in the most natural way. Likewise, any value should be writable -/// in this format. -pub(crate) const VALUE_FORMAT_ANY: &str = "any"; -/// The name of the value format that interprets all values as plain strings. Importers/exporters suporting this -/// format will usually accept any input value and interpret it as strings in the most literal way. Only strings -/// can be written in this format. -pub(crate) const VALUE_FORMAT_STRING: &str = "string"; -/// The name of the value format that interprets values as integers whenever possible. Importers/exporters suporting -/// this format will usually only accept input values that are formatted like integers. Conversely, only integer values -/// can be written in this format. -pub(crate) const VALUE_FORMAT_INT: &str = "int"; -/// The name of the value format that interprets values as double-precision floating point numbers whenever possible. -/// Importers/exporters suporting this format will usually only accept input values that are formatted like decimal numbers, -/// integers, or floating-point numbers in scientific notation. Conversely, only double values -/// can be written in this format. -pub(crate) const VALUE_FORMAT_DOUBLE: &str = "double"; -/// The name of the special value format that indicates that a vlaue should be ignored altogether. -/// The respective column/parameter will be skiped in reading/writing. -pub(crate) const VALUE_FORMAT_SKIP: &str = "skip"; - -/// The name of the compression format that means "no compression". -pub(crate) const VALUE_COMPRESSION_NONE: &str = "none"; -/// The name of the compression format that means "no compression". -pub(crate) const VALUE_COMPRESSION_GZIP: &str = "gzip"; diff --git a/nemo/src/_model/rule_model/aggregate.rs b/nemo/src/_model/rule_model/aggregate.rs deleted file mode 100644 index 9674adbca..000000000 --- a/nemo/src/_model/rule_model/aggregate.rs +++ /dev/null @@ -1,54 +0,0 @@ -use crate::model::VariableAssignment; - -use super::{Identifier, Term}; - -/// Aggregate operation on logical values -#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub enum LogicalAggregateOperation { - /// Count of distinct values - CountValues, - /// Minimum numerical value - MinNumber, - /// Maximum numerical value - MaxNumber, - /// Sum of numerical values - SumOfNumbers, -} - -impl From<&Identifier> for Option { - fn from(value: &Identifier) -> Self { - match value.name().as_str() { - "count" => Some(LogicalAggregateOperation::CountValues), - "min" => Some(LogicalAggregateOperation::MinNumber), - "max" => Some(LogicalAggregateOperation::MaxNumber), - "sum" => Some(LogicalAggregateOperation::SumOfNumbers), - _ => None, - } - } -} - -/// Aggregate occurring in a predicate in the head -#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct Aggregate { - pub(crate) logical_aggregate_operation: LogicalAggregateOperation, - pub(crate) terms: Vec, -} - -impl Aggregate { - /// Replaces [super::Variable]s with [Term]s according to the provided assignment. - pub fn apply_assignment(&mut self, assignment: &VariableAssignment) { - for term in &mut self.terms { - term.apply_assignment(assignment); - } - } -} - -impl std::fmt::Display for Aggregate { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "#{:?}({:?})", - self.logical_aggregate_operation, self.terms - ) - } -} diff --git a/nemo/src/_model/rule_model/atom.rs b/nemo/src/_model/rule_model/atom.rs deleted file mode 100644 index ab900a4c7..000000000 --- a/nemo/src/_model/rule_model/atom.rs +++ /dev/null @@ -1,88 +0,0 @@ -use crate::model::VariableAssignment; - -use super::{Aggregate, Identifier, PrimitiveTerm, Term, Variable}; - -/// An atom. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct Atom { - /// The predicate. - predicate: Identifier, - /// The terms. - terms: Vec, -} - -impl Atom { - /// Construct a new Atom. - pub fn new(predicate: Identifier, terms: Vec) -> Self { - Self { predicate, terms } - } - - /// Return the predicate [Identifier]. - #[must_use] - pub fn predicate(&self) -> Identifier { - self.predicate.clone() - } - - /// Return the terms in the atom - immutable. - #[must_use] - pub fn terms(&self) -> &Vec { - &self.terms - } - - /// Return the terms trees in the atom - mutable. - #[must_use] - pub fn terms_mut(&mut self) -> &mut Vec { - &mut self.terms - } - - /// Returns all terms at the leave of the term trees of the atom. - pub fn primitive_terms(&self) -> impl Iterator { - self.terms.iter().flat_map(|t| t.primitive_terms()) - } - - /// Return all variables in the atom. - pub fn variables(&self) -> impl Iterator { - self.terms.iter().flat_map(|t| t.variables()) - } - - /// Return all universally quantified variables in the atom. - pub fn universal_variables(&self) -> impl Iterator { - self.terms.iter().flat_map(|t| t.universal_variables()) - } - - /// Return all existentially quantified variables in the atom. - pub fn existential_variables(&self) -> impl Iterator { - self.terms.iter().flat_map(|t| t.existential_variables()) - } - - /// Return all aggregates in the atom. - pub fn aggregates(&self) -> Vec { - let mut result = Vec::new(); - for term in self.terms() { - result.extend(term.aggregates()); - } - - result - } - - /// Replaces [super::Variable]s with [Term]s according to the provided assignment. - pub fn apply_assignment(&mut self, assignment: &VariableAssignment) { - for tree in &mut self.terms { - tree.apply_assignment(assignment); - } - } -} - -impl std::fmt::Display for Atom { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.predicate.fmt(f)?; - f.write_str("(")?; - for (index, term) in self.terms().iter().enumerate() { - term.fmt(f)?; - if index < self.terms.len() - 1 { - f.write_str(", ")?; - } - } - f.write_str(")") - } -} diff --git a/nemo/src/_model/rule_model/constraint.rs b/nemo/src/_model/rule_model/constraint.rs deleted file mode 100644 index a71505a4b..000000000 --- a/nemo/src/_model/rule_model/constraint.rs +++ /dev/null @@ -1,161 +0,0 @@ -use nemo_physical::datavalues::AnyDataValue; - -use crate::model::{Term, Variable, VariableAssignment}; - -use super::{Aggregate, BinaryOperation, PrimitiveTerm}; - -/// Represents a constraint which is expressed as a binary operator applied to two terms -#[derive(Debug, Eq, PartialEq, Clone, PartialOrd, Ord)] -pub enum Constraint { - /// Two terms are equal. - Equals(Term, Term), - /// Two terms are unequal. - Unequals(Term, Term), - /// Value of the left term is less than the value of the right term. - LessThan(Term, Term), - /// Value of the left term is greater than the value of the right term. - GreaterThan(Term, Term), - /// Value of the left term is less than or equal to the value of the right term. - LessThanEq(Term, Term), - /// Value of the left term is greater than or equal to the value of the right term. - GreaterThanEq(Term, Term), -} - -impl Constraint { - /// Convert to [BinaryOperation]. - pub fn as_binary_term(self) -> Term { - let operation = match self { - Constraint::Equals(_, _) => BinaryOperation::Equal, - Constraint::Unequals(_, _) => BinaryOperation::Unequals, - Constraint::LessThan(_, _) => BinaryOperation::NumericLessthan, - Constraint::GreaterThan(_, _) => BinaryOperation::NumericGreaterthan, - Constraint::LessThanEq(_, _) => BinaryOperation::NumericLessthaneq, - Constraint::GreaterThanEq(_, _) => BinaryOperation::NumericGreaterthaneq, - }; - - let (left, right) = match self { - Constraint::Equals(left, right) - | Constraint::Unequals(left, right) - | Constraint::LessThan(left, right) - | Constraint::GreaterThan(left, right) - | Constraint::LessThanEq(left, right) - | Constraint::GreaterThanEq(left, right) => (left, right), - }; - - Term::Binary { - operation, - lhs: Box::new(left), - rhs: Box::new(right), - } - } - - /// Return the left and right term used in the constraint. - pub fn terms(&self) -> (&Term, &Term) { - match &self { - Constraint::Equals(left, right) - | Constraint::Unequals(left, right) - | Constraint::LessThan(left, right) - | Constraint::GreaterThan(left, right) - | Constraint::LessThanEq(left, right) - | Constraint::GreaterThanEq(left, right) => (left, right), - } - } - - /// Return a mutable reference to the left and right term used in the constraint. - pub fn terms_mut(&mut self) -> (&mut Term, &mut Term) { - match self { - Constraint::Equals(left, right) - | Constraint::Unequals(left, right) - | Constraint::LessThan(left, right) - | Constraint::GreaterThan(left, right) - | Constraint::LessThanEq(left, right) - | Constraint::GreaterThanEq(left, right) => (left, right), - } - } - - /// Return the left term used in the constraint. - pub fn left(&self) -> &Term { - self.terms().0 - } - - /// Return the right term used in the constraint. - pub fn right(&self) -> &Term { - self.terms().1 - } - - /// Return All the variables used within this constraint. - pub fn variables(&self) -> impl Iterator { - self.left().variables().chain(self.right().variables()) - } - - /// Returns all [AnyDataValue]s used within this constraint. - pub fn datavalues(&self) -> impl Iterator { - self.left().datavalues().chain(self.right().datavalues()) - } - - /// Return whether this type of constraints only works on numeric values - pub fn is_numeric(&self) -> bool { - !(matches!(self, Constraint::Equals(_, _)) || matches!(self, Constraint::Unequals(_, _))) - } - - /// Replaces [Variable]s with [Term]s according to the provided assignment. - pub fn apply_assignment(&mut self, assignment: &VariableAssignment) { - let (left, right) = self.terms_mut(); - - left.apply_assignment(assignment); - right.apply_assignment(assignment); - } - - /// Return all aggregates in this constraint. - pub fn aggregates(&self) -> Vec { - let (left, right) = self.terms(); - let mut result = left.aggregates(); - result.extend(right.aggregates()); - - result - } - - /// Return whether the constraint could be interpreted as an assignment, - /// i.e. has the form `?Variable = Term`. - /// - /// If so returns a tuple containing the variable and the term. - /// Returns `None` otherwise. - pub fn has_form_assignment(&self) -> Option<(&Variable, &Term)> { - if let Constraint::Equals(Term::Primitive(PrimitiveTerm::Variable(variable)), term) = self { - Some((variable, term)) - } else if let Constraint::Equals(term, Term::Primitive(PrimitiveTerm::Variable(variable))) = - self - { - Some((variable, term)) - } else { - None - } - } -} - -impl Constraint { - /// Returns a string representation for the operation defined by this constraint. - fn operator_string(&self) -> &'static str { - match self { - Constraint::Equals(_, _) => "=", - Constraint::Unequals(_, _) => "!=", - Constraint::LessThan(_, _) => "<", - Constraint::GreaterThan(_, _) => ">", - Constraint::LessThanEq(_, _) => "<=", - Constraint::GreaterThanEq(_, _) => ">=", - } - } -} - -impl std::fmt::Display for Constraint { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let (left, right) = self.terms(); - - f.write_fmt(format_args!( - "{} {} {}", - left, - self.operator_string(), - right - )) - } -} diff --git a/nemo/src/_model/rule_model/import_export.rs b/nemo/src/_model/rule_model/import_export.rs deleted file mode 100644 index 85ce847fd..000000000 --- a/nemo/src/_model/rule_model/import_export.rs +++ /dev/null @@ -1,151 +0,0 @@ -//! Import and export directives are a direct representation of the syntactic information -//! given in rule files. - -use nemo_physical::datavalues::MapDataValue; - -use crate::model::Identifier; - -/// An import/export specification. This object captures all information that is typically -/// present in an import or export directive in a Nemo program, including the main format, -/// optional attributes that define additional parameters, and an indentifier to map the data -/// to or from (i.e., a predicate name). -#[derive(Clone, Debug)] -pub(crate) struct ImportExportDirective { - /// The predicate we're handling. - pub(crate) predicate: Identifier, - /// The file format and resource we're using. - pub(crate) format: FileFormat, - /// The attributes we've been given. - pub(crate) attributes: MapDataValue, -} - -impl PartialEq for ImportExportDirective { - fn eq(&self, other: &Self) -> bool { - self.predicate == other.predicate - && self.format == other.format - && self.attributes == other.attributes - } -} - -impl Eq for ImportExportDirective {} - -/// An import specification. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ImportDirective(pub(crate) ImportExportDirective); - -impl ImportDirective { - /// Return the predicate. - pub fn predicate(&self) -> &Identifier { - &self.0.predicate - } - - /// Return the file format. - pub fn file_format(&self) -> FileFormat { - self.0.format - } - - /// Return the attributes. - pub fn attributes(&self) -> &MapDataValue { - &self.0.attributes - } -} - -impl From for ImportDirective { - fn from(value: ImportExportDirective) -> Self { - Self(value) - } -} - -/// An export specification. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ExportDirective(pub(crate) ImportExportDirective); - -impl ExportDirective { - /// Return the predicate. - pub fn predicate(&self) -> &Identifier { - &self.0.predicate - } - - /// Return the file format. - pub fn file_format(&self) -> FileFormat { - self.0.format - } - - /// Return the attributes. - pub fn attributes(&self) -> &MapDataValue { - &self.0.attributes - } - - /// Obtain a default [ExportDirective] for the given predicate. - pub fn default(predicate: Identifier) -> ExportDirective { - ExportDirective(ImportExportDirective { - format: FileFormat::CSV, - predicate, - attributes: MapDataValue::from_iter([]), - }) - } -} - -impl From for ExportDirective { - fn from(value: ImportExportDirective) -> Self { - Self(value) - } -} - -/// The different supported variants of the RDF format. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] -pub enum RdfVariant { - /// An unspecified format, using the resource name as a heuristic. - #[default] - Unspecified, - /// RDF 1.1 N-Triples - NTriples, - /// RDF 1.1 N-Quads - NQuads, - /// RDF 1.1 Turtle - Turtle, - /// RDF 1.1 RDF/XML - RDFXML, - /// RDF 1.1 TriG - TriG, -} - -impl std::fmt::Display for RdfVariant { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::NTriples => write!(f, "RDF N-Triples"), - Self::NQuads => write!(f, "RDF N-Quads"), - Self::Turtle => write!(f, "RDF Turtle"), - Self::RDFXML => write!(f, "RDF/XML"), - Self::TriG => write!(f, "RDF TriG"), - Self::Unspecified => write!(f, "RDF"), - } - } -} - -/// Supported file formats. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum FileFormat { - /// Comma-separated values - CSV, - /// Delimiter-separated values - DSV, - /// Tab-separated values - TSV, - /// RDF Triples or Quads, with the given format variant. - RDF(RdfVariant), - /// JSON objects - JSON, -} - -impl std::fmt::Display for FileFormat { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::DSV => write!(f, "DSV"), - Self::CSV => write!(f, "CSV"), - Self::TSV => write!(f, "TSV"), - Self::JSON => write!(f, "JSON"), - Self::RDF(variant) => write!(f, "{variant}"), - } - } -} diff --git a/nemo/src/_model/rule_model/literal.rs b/nemo/src/_model/rule_model/literal.rs deleted file mode 100644 index 68f4ae327..000000000 --- a/nemo/src/_model/rule_model/literal.rs +++ /dev/null @@ -1,113 +0,0 @@ -use std::ops::Neg; - -use crate::model::VariableAssignment; - -use super::{Aggregate, Atom, Identifier, PrimitiveTerm, Term, Variable}; - -/// A literal. -#[derive(Debug, Eq, PartialEq, Clone)] -pub enum Literal { - /// A non-negated literal. - Positive(Atom), - /// A negated literal. - Negative(Atom), -} - -impl Literal { - /// Check if the literal is positive. - pub fn is_positive(&self) -> bool { - matches!(self, Self::Positive(_)) - } - - /// Check if the literal is negative. - pub fn is_negative(&self) -> bool { - matches!(self, Self::Negative(_)) - } - - /// Returns a reference to the underlying atom. - pub fn atom(&self) -> &Atom { - match self { - Self::Positive(atom) => atom, - Self::Negative(atom) => atom, - } - } - - /// Returns a mutable reference to the underlying atom. - pub fn atom_mut(&mut self) -> &mut Atom { - match self { - Self::Positive(atom) => atom, - Self::Negative(atom) => atom, - } - } -} - -impl Neg for Literal { - type Output = Self; - - fn neg(self) -> Self::Output { - match self { - Literal::Positive(atom) => Self::Negative(atom), - Literal::Negative(atom) => Self::Positive(atom), - } - } -} - -generate_forwarder!(forward_to_atom; Positive, Negative); - -impl Literal { - /// Return the predicate [Identifier]. - #[must_use] - pub fn predicate(&self) -> Identifier { - forward_to_atom!(self, predicate) - } - - /// Return the terms in the literal. - #[must_use] - pub fn terms(&self) -> &Vec { - forward_to_atom!(self, terms) - } - - /// Return the variables in the literal. - pub fn variables(&self) -> impl Iterator { - forward_to_atom!(self, variables) - } - - /// Return the universally quantified variables in the literal. - pub fn universal_variables(&self) -> impl Iterator { - forward_to_atom!(self, universal_variables) - } - - /// Return the existentially quantified variables in the literal. - pub fn existential_variables(&self) -> impl Iterator { - forward_to_atom!(self, existential_variables) - } - - /// Returns all terms at the leave of the term trees of the atom. - pub fn primitive_terms(&self) -> impl Iterator { - forward_to_atom!(self, primitive_terms) - } - - /// Return all aggregates in the literal. - pub fn aggregates(&self) -> Vec { - forward_to_atom!(self, aggregates) - } - - /// Replaces [Variable]s with [Term]s according to the provided assignment. - pub fn apply_assignment(&mut self, assignment: &VariableAssignment) { - match self { - Literal::Positive(atom) => atom.apply_assignment(assignment), - Literal::Negative(atom) => atom.apply_assignment(assignment), - } - } -} - -impl std::fmt::Display for Literal { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Literal::Positive(_) => {} - Literal::Negative(_) => f.write_str("~")?, - } - - self.atom().fmt(f) - } -} diff --git a/nemo/src/_model/rule_model/predicate.rs b/nemo/src/_model/rule_model/predicate.rs deleted file mode 100644 index 69618da65..000000000 --- a/nemo/src/_model/rule_model/predicate.rs +++ /dev/null @@ -1,27 +0,0 @@ -/// An identifier for, e.g., a Term or a Predicate. -#[derive(Debug, Eq, PartialEq, Hash, Clone, PartialOrd, Ord)] -pub struct Identifier(pub(crate) String); - -impl Identifier { - /// Create a new [Identifier]. - pub fn new(name: String) -> Self { - Identifier(name) - } - - /// Returns the associated name - pub fn name(&self) -> String { - self.0.clone() - } -} - -impl std::fmt::Display for Identifier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", &self.name()) - } -} - -impl From for Identifier { - fn from(value: String) -> Self { - Identifier(value) - } -} diff --git a/nemo/src/_model/rule_model/program.rs b/nemo/src/_model/rule_model/program.rs deleted file mode 100644 index b91e1672d..000000000 --- a/nemo/src/_model/rule_model/program.rs +++ /dev/null @@ -1,270 +0,0 @@ -// use std::collections::{HashMap, HashSet}; - -// use crate::model::{ExportDirective, ImportDirective}; - -// use super::{Atom, Identifier, Rule}; - -// /// A (ground) fact. -// #[derive(Debug, Eq, PartialEq, Clone)] -// pub struct Fact(pub Atom); - -// impl std::fmt::Display for Fact { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// self.0.fmt(f) -// } -// } - -// /// A statement that can occur in the program. -// #[derive(Debug, Eq, PartialEq, Clone)] -// pub enum Statement { -// /// A fact. -// Fact(Fact), -// /// A rule. -// Rule(Rule), -// } - -// /// A complete program. -// #[derive(Debug, Default, Clone)] -// pub struct Program { -// base: Option, -// prefixes: HashMap, -// rules: Vec, -// facts: Vec, -// imports: Vec, -// exports: Vec, -// output_predicates: Vec, -// } - -// /// A Builder for a program. -// #[derive(Debug, Default)] -// pub struct ProgramBuilder { -// program: Program, -// } - -// impl ProgramBuilder { -// /// Construct a new builder. -// pub fn new() -> Self { -// Default::default() -// } - -// /// Construct a [Program] from this builder. -// pub fn build(self) -> Program { -// self.program -// } - -// /// Set the base IRI. -// pub fn base(mut self, base: String) -> Self { -// self.program.base = Some(base); -// self -// } - -// /// Add a prefix. -// pub fn prefix(mut self, prefix: String, iri: String) -> Self { -// self.program.prefixes.insert(prefix, iri); -// self -// } - -// /// Add prefixes. -// pub fn prefixes(mut self, prefixes: T) -> Self -// where -// T: IntoIterator, -// { -// self.program.prefixes.extend(prefixes); -// self -// } - -// /// Add an imported table. -// pub fn import(mut self, import: ImportDirective) -> Self { -// self.program.imports.push(import); -// self -// } - -// /// Add imported tables. -// pub fn imports(mut self, imports: T) -> Self -// where -// T: IntoIterator, -// { -// self.program.imports.extend(imports); -// self -// } - -// /// Add an exported table. -// pub fn export(mut self, export: ExportDirective) -> Self { -// self.program.exports.push(export); -// self -// } - -// /// Add exported tables. -// pub fn exports(mut self, exports: T) -> Self -// where -// T: IntoIterator, -// { -// self.program.exports.extend(exports); -// self -// } - -// /// Add a rule. -// pub fn rule(mut self, rule: Rule) -> Self { -// self.program.rules.push(rule); -// self -// } - -// /// Add rules. -// pub fn rules(mut self, rules: T) -> Self -// where -// T: IntoIterator, -// { -// self.program.rules.extend(rules); -// self -// } - -// /// Add a fact. -// pub fn fact(mut self, fact: Fact) -> Self { -// self.program.facts.push(fact); -// self -// } - -// /// Add facts. -// pub fn facts(mut self, facts: T) -> Self -// where -// T: IntoIterator, -// { -// self.program.facts.extend(facts); -// self -// } - -// /// Mark predicate as output predicate. -// pub fn output_predicate(self, predicate: Identifier) -> Self { -// self.output_predicates([predicate]) -// } - -// /// Mark predicates as output predicates. -// pub fn output_predicates(mut self, predicates: T) -> Self -// where -// T: IntoIterator, -// { -// self.program.output_predicates.extend(predicates); -// self -// } -// } - -// impl Program { -// /// Return a [builder][ProgramBuilder] for the [Program]. -// pub fn builder() -> ProgramBuilder { -// Default::default() -// } - -// /// Get the base IRI, if set. -// #[must_use] -// pub fn base(&self) -> Option { -// self.base.clone() -// } - -// /// Return all rules in the program - immutable. -// #[must_use] -// pub fn rules(&self) -> &Vec { -// &self.rules -// } - -// /// Return all facts in the program. -// #[must_use] -// pub fn facts(&self) -> &Vec { -// &self.facts -// } - -// /// Return a HashSet of all predicates in the program (in rules and facts). -// #[must_use] -// pub fn predicates(&self) -> HashSet { -// self.rules() -// .iter() -// .flat_map(|rule| { -// rule.head() -// .iter() -// .map(|atom| atom.predicate()) -// .chain(rule.body().iter().map(|literal| literal.predicate())) -// }) -// .chain(self.facts().iter().map(|atom| atom.0.predicate())) -// .collect() -// } - -// /// Return a HashSet of all idb predicates (predicates occuring rule heads) in the program. -// #[must_use] -// pub fn idb_predicates(&self) -> HashSet { -// self.rules() -// .iter() -// .flat_map(|rule| rule.head()) -// .map(|atom| atom.predicate()) -// .collect() -// } - -// /// Return a HashSet of all edb predicates (all predicates minus idb predicates) in the program. -// #[must_use] -// pub fn edb_predicates(&self) -> HashSet { -// self.predicates() -// .difference(&self.idb_predicates()) -// .cloned() -// .collect() -// } - -// /// Return an Iterator over all output predicates that -// /// were explicitly marked in output directives. -// pub fn output_predicates(&self) -> impl Iterator { -// self.output_predicates.iter() -// } - -// /// Add output predicates to the program. -// pub fn add_output_predicates(&mut self, predicates: T) -// where -// T: IntoIterator, -// { -// self.output_predicates.extend(predicates); -// } - -// /// Remove all output predicates of the program. -// pub fn clear_output_predicates(&mut self) { -// self.output_predicates.clear(); -// } - -// /// Return all prefixes in the program. -// #[must_use] -// pub fn prefixes(&self) -> &HashMap { -// &self.prefixes -// } - -// /// Return all [ImportDirective]s of the program. -// pub fn imports(&self) -> impl Iterator { -// self.imports.iter() -// } - -// /// Add [ImportDirective]s to the program. -// pub fn add_imports(&mut self, imports: T) -// where -// T: IntoIterator, -// { -// self.imports.extend(imports); -// } - -// /// Return all [ExportDirective]s of the program. -// pub fn exports(&self) -> impl Iterator { -// self.exports.iter() -// } - -// /// Add [ExportDirective]s to the program. -// pub fn add_exports(&mut self, exports: T) -// where -// T: IntoIterator, -// { -// self.exports.extend(exports); -// } - -// /// Remove all [ExportDirective]s of the program. -// pub fn clear_exports(&mut self) { -// self.exports.clear(); -// } - -// /// Look up a given prefix. -// #[must_use] -// pub fn resolve_prefix(&self, tag: &str) -> Option { -// self.prefixes.get(tag).cloned() -// } -// } diff --git a/nemo/src/_model/rule_model/rule.rs b/nemo/src/_model/rule_model/rule.rs deleted file mode 100644 index 35a6ca077..000000000 --- a/nemo/src/_model/rule_model/rule.rs +++ /dev/null @@ -1,384 +0,0 @@ -// use std::collections::{HashMap, HashSet}; - -// use crate::model::VariableAssignment; - -// use super::{Atom, Constraint, Literal, PrimitiveTerm, Term, Variable}; - -// /// A rule. -// #[derive(Debug, Eq, PartialEq, Clone)] -// pub struct Rule { -// /// Head atoms of the rule -// head: Vec, -// /// Body literals of the rule -// body: Vec, -// /// Constraints on the body of the rule -// constraints: Vec, -// } - -// impl Rule { -// /// Construct a new rule. -// pub fn new(head: Vec, body: Vec, constraints: Vec) -> Self { -// Self { -// head, -// body, -// constraints, -// } -// } - -// fn calculate_derived_variables( -// safe_variables: &HashSet, -// constraints: &[Constraint], -// ) -> HashSet { -// let mut derived_variables = safe_variables.clone(); - -// let mut satisfied_constraints = HashSet::::new(); -// while satisfied_constraints.len() < constraints.len() { -// let num_satisified_constraints = satisfied_constraints.len(); - -// for (constraint_index, constraint) in constraints.iter().enumerate() { -// if satisfied_constraints.contains(&constraint_index) { -// continue; -// } - -// if let Some((variable, term)) = constraint.has_form_assignment() { -// if !derived_variables.contains(variable) -// && term -// .variables() -// .all(|term_variable| derived_variables.contains(term_variable)) -// { -// derived_variables.insert(variable.clone()); -// satisfied_constraints.insert(constraint_index); -// continue; -// } -// } -// } - -// if satisfied_constraints.len() == num_satisified_constraints { -// return derived_variables; -// } -// } - -// derived_variables -// } - -// /// Return all variables that appear in negative literals -// /// but cannot be derived from positive literals. -// /// -// /// For each variable also returns the associated index of the literal. -// /// -// /// Returns an error if one negative variable is associated with multiple literals. -// fn calculate_negative_variables( -// negative: &[Literal], -// safe_variables: &HashSet, -// ) -> Result, ParseError> { -// let mut negative_variables = HashMap::::new(); - -// for (literal_index, negative_literal) in negative.iter().enumerate() { -// let mut current_unsafe = HashMap::::new(); - -// for negative_term in negative_literal.terms() { -// if let Term::Primitive(PrimitiveTerm::Variable(variable)) = negative_term { -// if safe_variables.contains(variable) { -// continue; -// } - -// current_unsafe.insert(variable.clone(), literal_index); - -// if negative_variables.contains_key(variable) { -// return Err(ParseError::UnsafeVariableInMultipleNegativeLiterals( -// variable.clone(), -// )); -// } -// } -// } - -// negative_variables.extend(current_unsafe) -// } - -// Ok(negative_variables) -// } - -// /// Construct a new rule, validating constraints on variable usage. -// pub(crate) fn new_validated( -// head: Vec, -// body: Vec, -// constraints: Vec, -// ) -> Result { -// // All the existential variables used in the rule -// let existential_variable_names = head -// .iter() -// .flat_map(|a| a.existential_variables().flat_map(|v| v.name())) -// .collect::>(); - -// for variable in body -// .iter() -// .flat_map(|l| l.variables()) -// .chain(constraints.iter().flat_map(|c| c.variables())) -// { -// // Existential variables may only occur in the head -// if variable.is_existential() { -// return Err(ParseError::BodyExistential(variable.clone())); -// } - -// // There may not be a universal variable whose name is the same that of an existential -// if let Some(name) = variable.name() { -// if existential_variable_names.contains(&name) { -// return Err(ParseError::BothQuantifiers(name)); -// } -// } -// } - -// // Divide the literals into a positive and a negative part -// let (positive, negative): (Vec<_>, Vec<_>) = body -// .iter() -// .cloned() -// .partition(|literal| literal.is_positive()); - -// // Safe variables are considered to be -// // all variables occuring as primitive terms in a positive body literal -// // or every value that is equal to such a variable -// let safe_variables = Self::safe_variables_literals(&positive); - -// // Derived variables are variables that result from functional expressions -// // expressed as ?Variable = Term constraints, -// // where the term only contains safe or derived variables. -// let derived_variables = Self::calculate_derived_variables(&safe_variables, &constraints); - -// // Negative variables are variables that occur as primitive terms in negative literals -// // bot cannot be derived -// let negative_variables = Self::calculate_negative_variables(&negative, &derived_variables)?; - -// // Each constraint must only use derived variables -// // or if it contains negative variables, then all variables in the constraint -// // must be from the same atom -// for constraint in &constraints { -// let unknown = constraint.variables().find(|variable| { -// !derived_variables.contains(variable) && !negative_variables.contains_key(variable) -// }); - -// if let Some(variable) = unknown { -// return Err(ParseError::UnsafeComplexTerm( -// constraint.to_string(), -// variable.clone(), -// )); -// } - -// if let Some(negative_variable) = constraint -// .variables() -// .find(|variable| negative_variables.contains_key(variable)) -// { -// let negative_literal = &negative[*negative_variables -// .get(negative_variable) -// .expect("Map must contain key")]; -// let allowed_variables = negative_literal -// .variables() -// .cloned() -// .collect::>(); - -// if let Some(not_allowed) = constraint -// .variables() -// .find(|variable| !allowed_variables.contains(variable)) -// { -// return Err(ParseError::ConstraintOutsideVariable( -// constraint.to_string(), -// negative_variable.clone(), -// negative_literal.to_string(), -// not_allowed.clone(), -// )); -// } -// } -// } - -// // Each complex term in the body and head must only use safe or derived variables -// for term in body -// .iter() -// .flat_map(|l| l.terms()) -// .chain(head.iter().flat_map(|a| a.terms())) -// { -// if term.is_primitive() { -// continue; -// } - -// for variable in term.variables() { -// if !derived_variables.contains(variable) { -// return Err(ParseError::UnsafeComplexTerm( -// term.to_string(), -// variable.clone(), -// )); -// } -// } -// } - -// let mut is_existential = false; - -// // Head atoms may only use variables that are safe or derived -// for variable in head.iter().flat_map(|a| a.variables()) { -// if variable.is_existential() { -// is_existential = true; -// } - -// if variable.is_unnamed() { -// return Err(ParseError::UnnamedInHead); -// } - -// if variable.is_universal() && !derived_variables.contains(variable) { -// return Err(ParseError::UnsafeHeadVariable(variable.clone())); -// } -// } - -// // Check for aggregates in the body of a rule -// for literal in &body { -// #[allow(clippy::never_loop)] -// for aggregate in literal.aggregates() { -// return Err(ParseError::AggregateInBody(aggregate.clone())); -// } -// } -// for constraint in &constraints { -// #[allow(clippy::never_loop)] -// for aggregate in constraint.aggregates() { -// return Err(ParseError::AggregateInBody(aggregate.clone())); -// } -// } - -// // We only allow one aggregate per rule, -// // and do not allow them to appear together with existential variables -// let mut aggregate_count = 0; -// for head_atom in &head { -// for term in head_atom.terms() { -// aggregate_count += term.aggregates().len(); - -// if aggregate_count > 1 { -// return Err(ParseError::MultipleAggregates); -// } -// } -// } - -// if aggregate_count > 0 && is_existential { -// return Err(ParseError::AggregatesPlusExistentials); -// } - -// Ok(Rule { -// head, -// body, -// constraints, -// }) -// } - -// /// Return all variables that are "safe". -// /// A variable is safe if it occurs in a positive body literal. -// fn safe_variables_literals(literals: &[Literal]) -> HashSet { -// let mut result = HashSet::new(); - -// for literal in literals { -// if let Literal::Positive(atom) = literal { -// for term in atom.terms() { -// if let Term::Primitive(PrimitiveTerm::Variable(variable)) = term { -// result.insert(variable.clone()); -// } -// } -// } -// } - -// result -// } - -// /// Return all variables that are "safe". -// /// A variable is safe if it occurs in a positive body literal, -// /// or is equal to such a value. -// pub fn safe_variables(&self) -> HashSet { -// Self::safe_variables_literals(&self.body) -// } - -// /// Return the head atoms of the rule - immutable. -// #[must_use] -// pub fn head(&self) -> &Vec { -// &self.head -// } - -// /// Return the head atoms of the rule - mutable. -// #[must_use] -// pub fn head_mut(&mut self) -> &mut Vec { -// &mut self.head -// } - -// /// Return the body literals of the rule - immutable. -// #[must_use] -// pub fn body(&self) -> &Vec { -// &self.body -// } - -// /// Return the body literals of the rule - mutable. -// #[must_use] -// pub fn body_mut(&mut self) -> &mut Vec { -// &mut self.body -// } - -// /// Return the constraints of the rule - immutable. -// #[must_use] -// pub fn constraints(&self) -> &Vec { -// &self.constraints -// } - -// /// Return the filters of the rule - mutable. -// #[must_use] -// pub fn constraints_mut(&mut self) -> &mut Vec { -// &mut self.constraints -// } - -// /// Replaces [Variable]s with [super::Term]s according to the provided assignment. -// pub fn apply_assignment(&mut self, assignment: &VariableAssignment) { -// self.body -// .iter_mut() -// .for_each(|l| l.apply_assignment(assignment)); -// self.head -// .iter_mut() -// .for_each(|a| a.apply_assignment(assignment)); -// self.constraints -// .iter_mut() -// .for_each(|f| f.apply_assignment(assignment)); -// } - -// /// Return the number of negative body atoms contained in the rule. -// pub fn num_negative_body(&self) -> usize { -// self.body -// .iter() -// .filter(|literal| literal.is_negative()) -// .count() -// } -// } - -// impl std::fmt::Display for Rule { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// for (index, atom) in self.head.iter().enumerate() { -// atom.fmt(f)?; - -// if index < self.head.len() - 1 { -// f.write_str(", ")?; -// } -// } - -// f.write_str(" :- ")?; - -// for (index, literal) in self.body.iter().enumerate() { -// literal.fmt(f)?; - -// if index < self.body.len() - 1 { -// f.write_str(", ")?; -// } -// } - -// if !self.constraints.is_empty() { -// f.write_str(", ")?; -// } - -// for (index, constraint) in self.constraints.iter().enumerate() { -// constraint.fmt(f)?; - -// if index < self.constraints.len() - 1 { -// f.write_str(", ")?; -// } -// } - -// f.write_str(" .") -// } -// } diff --git a/nemo/src/_model/rule_model/term.rs b/nemo/src/_model/rule_model/term.rs deleted file mode 100644 index dea064c34..000000000 --- a/nemo/src/_model/rule_model/term.rs +++ /dev/null @@ -1,919 +0,0 @@ -use std::fmt::{Debug, Display}; - -use nemo_physical::datavalues::AnyDataValue; - -use crate::{error::Error, model::VariableAssignment}; - -use super::{Aggregate, Identifier}; - -/// Variable that can be bound to a specific value. -/// Variables are identified by a string name or (in the case of -/// invented variable names) by numeric ids. -#[derive(Debug, Eq, PartialEq, Hash, Clone, PartialOrd, Ord)] -pub enum Variable { - /// A universally quantified variable. - Universal(String), - /// An existentially quantified variable. - Existential(String), - /// An unnamed variable identified by a numeric id. - UnnamedUniversal(usize), -} - -impl Variable { - /// Return the string name of the variable, or `None` if - /// the variable is unnamed. - /// - /// Note: Use `Display` or `Debug` for error messages etc. - pub fn name(&self) -> Option { - match self { - Self::Universal(identifier) | Self::Existential(identifier) => { - Some(identifier.to_owned()) - } - Self::UnnamedUniversal(_) => None, - } - } - - /// Return whether this is a universal variable. - pub fn is_universal(&self) -> bool { - matches!(self, Variable::Universal(_)) - } - - /// Return whether this is an existential variable. - pub fn is_existential(&self) -> bool { - matches!(self, Variable::Existential(_)) - } - - /// Return whether this variable was generated by a wildcard pattern. - pub fn is_unnamed(&self) -> bool { - match self { - Self::Universal(_) | Self::Existential(_) => false, - Self::UnnamedUniversal(_) => true, - } - } - - /// Make an unnamed variable with the given unique index. - pub fn new_unamed(index: usize) -> Variable { - Self::UnnamedUniversal(index) - } -} - -impl Display for Variable { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Universal(var_name) => write!(f, "?{}", var_name), - Self::Existential(var_name) => write!(f, "!{}", var_name), - Self::UnnamedUniversal(_) => write!(f, "_"), - } - } -} - -/// Simple term that is either a constant or a variable -#[derive(Debug, Eq, PartialEq, Clone, PartialOrd, Ord)] -pub enum PrimitiveTerm { - /// A constant. - GroundTerm(AnyDataValue), - /// A variable. - Variable(Variable), -} - -impl From for PrimitiveTerm { - fn from(value: AnyDataValue) -> Self { - Self::GroundTerm(value) - } -} - -impl PrimitiveTerm { - /// Return `true` if term is not a variable. - pub fn is_ground(&self) -> bool { - !matches!(self, PrimitiveTerm::Variable(_)) - } -} - -impl Display for PrimitiveTerm { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - PrimitiveTerm::GroundTerm(term) => write!(f, "{}", term), - PrimitiveTerm::Variable(term) => write!(f, "{}", term), - } - } -} - -/// Binary operation between two [Term]s. -#[derive(Debug, Eq, PartialEq, Copy, Clone, PartialOrd, Ord)] -pub enum BinaryOperation { - /// Equality - Equal, - /// Inequality - Unequals, - /// Addition between two numeric values - NumericAddition, - /// Subtraction between two numeric values - NumericSubtraction, - /// Multiplication between two numeric values - NumericMultiplication, - /// Division between two numeric values - NumericDivision, - /// Logarithm of a numeric value to some numeric base - NumericLogarithm, - /// Numeric value raised to another numeric value - NumericPower, - /// Remainder of a division between two numeric values - NumericRemainder, - /// Numeric greater than comparison - NumericGreaterthan, - /// Numeric greater than or equals comparison - NumericGreaterthaneq, - /// Numeric less than comparison - NumericLessthan, - /// Numeric less than or equals comparison - NumericLessthaneq, - /// Lexicographic comparison between strings - StringCompare, - /// Check whether string is contained in another (SPARQL function CONTAINS) - StringContains, - /// Check whether a pattern is matched within a string (SPARQL function REGEX) - StringRegex, - /// String starting at some start position - StringSubstring, - /// First part of a string split by some other string - StringBefore, - /// Second part of a string split by some other string - StringAfter, - /// Whether string starts with a certain string - StringStarts, - /// Whether string ends with a certain string - StringEnds, -} - -impl BinaryOperation { - /// Return a function which is able to construct the respective term based on the function name. - /// Returns `None` if the provided function name does not correspond to a known binary function. - pub fn construct_from_name(name: &str) -> Option { - Some(match name.to_uppercase().as_str() { - "LOG" => Self::NumericLogarithm, - "POW" => Self::NumericPower, - "COMPARE" => Self::StringCompare, - "CONTAINS" => Self::StringContains, - "REGEX" => Self::StringRegex, - "SUBSTR" => Self::StringSubstring, - "STRSTARTS" => Self::StringStarts, - "STRENDS" => Self::StringEnds, - "STRBEFORE" => Self::StringBefore, - "STRAFTER" => Self::StringAfter, - "REM" => Self::NumericRemainder, - _ => return None, - }) - } - - /// Return the name of the operation. - pub fn name(&self) -> String { - let name = match self { - Self::NumericAddition => "Addition", - Self::NumericSubtraction => "Subtraction", - Self::NumericMultiplication => "Multiplication", - Self::NumericDivision => "Division", - Self::NumericPower => "POW", - Self::NumericRemainder => "Remainder", - Self::NumericLogarithm => "Logarithm", - Self::StringCompare => "StringCompare", - Self::StringContains => "CONTAINS", - Self::StringRegex => "REGEX", - Self::StringSubstring => "SUBSTR", - Self::Equal => "Equals", - Self::Unequals => "Unequals", - Self::NumericGreaterthan => "GreaterThan", - Self::NumericGreaterthaneq => "GreaterThanEq", - Self::NumericLessthan => "LessThan", - Self::NumericLessthaneq => "LessThanEq", - Self::StringBefore => "STRBEFORE", - Self::StringAfter => "STRAFTER", - Self::StringStarts => "STRSTARTS", - Self::StringEnds => "STRENDS", - }; - - String::from(name) - } - - /// Return the infix operator for this operation - /// or `None` if this is not an infix operation - pub fn infix(&self) -> Option<&'static str> { - match self { - Self::NumericAddition => Some("+"), - Self::NumericSubtraction => Some("-"), - Self::NumericMultiplication => Some("*"), - Self::NumericDivision => Some("/"), - Self::Equal => Some("="), - Self::Unequals => Some("!="), - Self::NumericGreaterthan => Some(">"), - Self::NumericGreaterthaneq => Some(">="), - Self::NumericLessthan => Some("<"), - Self::NumericLessthaneq => Some("<="), - Self::NumericRemainder => Some("%"), - Self::NumericLogarithm - | Self::NumericPower - | Self::StringCompare - | Self::StringContains - | Self::StringRegex - | Self::StringSubstring - | Self::StringStarts - | Self::StringEnds - | Self::StringBefore - | Self::StringAfter => None, - } - } -} - -/// Ternary operation applied to a [Term] -#[derive(Debug, Eq, PartialEq, Copy, Clone, PartialOrd, Ord)] -pub enum TernaryOperation { - /// String starting at some start position with a given maximum length - StringSubstringLength, -} - -impl TernaryOperation { - /// Return a function which is able to construct the respective term based on the function name. - /// Returns `None` if the provided function name does not correspond to a known binary function. - pub fn construct_from_name(name: &str) -> Option { - Some(match name.to_uppercase().as_str() { - "SUBSTRING" => Self::StringSubstringLength, - _ => return None, - }) - } - - /// Return the name of the operation. - pub fn name(&self) -> String { - let name = match self { - TernaryOperation::StringSubstringLength => "SUBSTRING", - }; - - String::from(name) - } -} - -/// N-ary operation applied to a [Term] -#[derive(Debug, Eq, PartialEq, Copy, Clone, PartialOrd, Ord)] -pub enum NaryOperation { - /// Bitwise and operation - BitAnd, - /// Bitwise or operation - BitOr, - /// Bitwise xor operation - BitXor, - /// Conjunction of boolean values - BooleanConjunction, - /// Disjunction of boolean values - BooleanDisjunction, - /// Sum of numeric values - NumericSum, - /// Product of numeric values - NumericProduct, - /// Minimum of numeric values - NumericMinimum, - /// Maximum of numeric values - NumericMaximum, - /// Lukasiewicz norm of numeric values - NumericLukasiewicz, - /// Concatentation of two string values, correspondng to SPARQL function CONCAT. - StringConcatenation, -} - -impl NaryOperation { - /// Return a function which is able to construct the respective term based on the function name. - /// Returns `None` if the provided function name does not correspond to a known binary function. - pub fn construct_from_name(name: &str) -> Option { - Some(match name.to_uppercase().as_str() { - "BITAND" => Self::BitAnd, - "BITOR" => Self::BitOr, - "BITXOR" => Self::BitXor, - "MAX" => Self::NumericMaximum, - "MIN" => Self::NumericMinimum, - "LUKA" => Self::NumericLukasiewicz, - "SUM" => Self::NumericSum, - "PROD" => Self::NumericProduct, - "AND" => Self::BooleanConjunction, - "OR" => Self::BooleanDisjunction, - "CONCAT" => Self::StringConcatenation, - _ => return None, - }) - } - - /// Return the name of the operation. - pub fn name(&self) -> String { - let name = match self { - Self::StringConcatenation => "CONCAT", - Self::BooleanConjunction => "AND", - Self::BooleanDisjunction => "OR", - Self::BitAnd => "BITAND", - Self::BitOr => "BITOR", - Self::BitXor => "BITXOR", - Self::NumericSum => "SUM", - Self::NumericProduct => "PROD", - Self::NumericMinimum => "MIN", - Self::NumericMaximum => "MAX", - Self::NumericLukasiewicz => "LUKA", - }; - - String::from(name) - } -} - -/// Unary operation applied to a [Term] -#[derive(Debug, Eq, PartialEq, Copy, Clone, PartialOrd, Ord)] -pub enum UnaryOperation { - /// Boolean negation - BooleanNegation, - /// Cast to double - CastToDouble, - /// Cast to float - CastToFloat, - /// Cast to integer - CastToInteger, - /// Canonical string representation of a value - CanonicalString, - /// Check if value is an integer - CheckIsInteger, - /// Check if value is a float - CheckIsFloat, - /// Check if value is a double - CheckIsDouble, - /// Check if value is an iri - CheckIsIri, - /// Check if value is numeric - CheckIsNumeric, - /// Check if value is a null - CheckIsNull, - /// Check if value is a string - CheckIsString, - /// Get datatype of a value - Datatype, - /// Get language tag of a languaged tagged string - LanguageTag, - /// Lexical value - LexicalValue, - /// Absolute value of a numeric value - NumericAbsolute, - /// Cosine of a numeric value - NumericCosine, - /// Rounding up of a numeric value - NumericCeil, - /// Rounding down of a numeric value - NumericFloor, - /// Additive inverse of a numeric value - NumericNegation, - /// Rounding of a numeric value - NumericRound, - /// Sine of a numeric value - NumericSine, - /// Square root of a numeric value - NumericSquareroot, - /// Tangent of a numeric value - NumericTangent, - /// Length of a string value - StringLength, - /// Reverse of a string value - StringReverse, - /// String converted to lowercase letters - StringLowercase, - /// String converted to uppercase letters - StringUppercase, -} - -impl UnaryOperation { - /// Return a function which is able to construct the respective term based on the function name. - /// Returns `None` if the provided function name does not correspond to a know unary function. - pub fn construct_from_name(name: &str) -> Result { - match name { - "isInteger" => Ok(UnaryOperation::CheckIsInteger), - "isFloat" => Ok(UnaryOperation::CheckIsFloat), - "isDouble" => Ok(UnaryOperation::CheckIsDouble), - "isIri" => Ok(UnaryOperation::CheckIsIri), - "isNumeric" => Ok(UnaryOperation::CheckIsNumeric), - "isNull" => Ok(UnaryOperation::CheckIsNull), - "isString" => Ok(UnaryOperation::CheckIsString), - "ABS" => Ok(UnaryOperation::NumericAbsolute), - "SQRT" => Ok(UnaryOperation::NumericSquareroot), - "NOT" => Ok(UnaryOperation::BooleanNegation), - "fullStr" => Ok(UnaryOperation::CanonicalString), - "STR" => Ok(UnaryOperation::LexicalValue), - "SIN" => Ok(UnaryOperation::NumericSine), - "COS" => Ok(UnaryOperation::NumericCosine), - "TAN" => Ok(UnaryOperation::NumericTangent), - "STRLEN" => Ok(UnaryOperation::StringLength), - "STRREV" => Ok(UnaryOperation::StringReverse), - "UCASE" => Ok(UnaryOperation::StringLowercase), - "LCASE" => Ok(UnaryOperation::StringUppercase), - "ROUND" => Ok(UnaryOperation::NumericRound), - "CEIL" => Ok(UnaryOperation::NumericCeil), - "FLOOR" => Ok(UnaryOperation::NumericFloor), - "DATATYPE" => Ok(UnaryOperation::Datatype), - "LANG" => Ok(UnaryOperation::LanguageTag), - "INT" => Ok(UnaryOperation::CastToInteger), - "DOUBLE" => Ok(UnaryOperation::CastToDouble), - "FLOAT" => Ok(UnaryOperation::CastToFloat), - s => Err(Error::UnknownUnaryOpertation { - operation: s.into(), - }), - } - } - - /// Return the name of the operation. - pub fn name(&self) -> String { - let name = match self { - Self::NumericSquareroot => "SQRT", - Self::NumericNegation => "MINUS", - Self::NumericAbsolute => "ABS", - Self::BooleanNegation => "NOT", - Self::CanonicalString => "fullStr", - Self::NumericCosine => "COS", - Self::NumericSine => "SIN", - Self::NumericTangent => "TAN", - Self::StringLength => "STRLEN", - Self::StringReverse => "STRREV", - Self::StringLowercase => "LCASE", - Self::StringUppercase => "UCASE", - Self::NumericCeil => "CEIL", - Self::NumericFloor => "FLOOR", - Self::NumericRound => "ROUND", - Self::CastToInteger => "INT", - Self::CastToDouble => "DOUBLE", - Self::CastToFloat => "FLOAT", - Self::CheckIsInteger => "isInteger", - Self::CheckIsFloat => "isFloat", - Self::CheckIsDouble => "isDouble", - Self::CheckIsIri => "isIri", - Self::CheckIsNumeric => "IsNumeric", - Self::CheckIsNull => "isNull", - Self::CheckIsString => "isString", - Self::Datatype => "DATATYPE", - Self::LanguageTag => "LANG", - Self::LexicalValue => "STR", - }; - - String::from(name) - } -} - -/// Possibly complex term that may occur within an [super::Atom] -#[derive(Eq, PartialEq, Clone, PartialOrd, Ord)] -pub enum Term { - /// Primitive term. - Primitive(PrimitiveTerm), - /// Unary operation. - Unary(UnaryOperation, Box), - /// Binary operation. - Binary { - /// The operation to be executed. - operation: BinaryOperation, - /// The left hand side operand. - lhs: Box, - /// The right hand side operand. - rhs: Box, - }, - /// Ternary operation. - Ternary { - /// The operation to be executed. - operation: TernaryOperation, - /// The first operand. - first: Box, - /// The second operand. - second: Box, - /// The third operand. - third: Box, - }, - /// An n-ary operation. - Nary { - /// The operation to be executed. - operation: NaryOperation, - /// Its parameters - parameters: Vec, - }, - /// Aggregation. - Aggregation(Aggregate), - /// Abstract Function. - Function(Identifier, Vec), -} - -impl Term { - /// If the term is a simple [PrimitiveTerm] then return it. - /// Otherwise return `None`. - pub(crate) fn as_primitive(&self) -> Option { - match self { - Term::Primitive(primitive) => Some(primitive.clone()), - _ => None, - } - } - - /// Returns `true` if term is primitive. - /// Returns `false` if term is composite. - pub(crate) fn is_primitive(&self) -> bool { - self.as_primitive().is_some() - } - - /// Return whether this term is a variable. - pub(crate) fn is_variable(&self) -> bool { - matches!(self, Term::Primitive(PrimitiveTerm::Variable(_))) - } - - /// Return all [PrimitiveTerm]s that make up this term. - pub(crate) fn primitive_terms(&self) -> Vec<&PrimitiveTerm> { - match self { - Term::Primitive(primitive) => { - vec![primitive] - } - Term::Binary { lhs, rhs, .. } => { - let mut terms = lhs.primitive_terms(); - terms.extend(rhs.primitive_terms()); - - terms - } - Term::Ternary { - first, - second, - third, - .. - } => { - let mut terms = first.primitive_terms(); - terms.extend(second.primitive_terms()); - terms.extend(third.primitive_terms()); - - terms - } - Term::Nary { parameters, .. } => parameters - .iter() - .flat_map(|p| p.primitive_terms()) - .collect(), - Term::Unary(_, inner) => inner.primitive_terms(), - Term::Function(_, subterms) => { - subterms.iter().flat_map(|t| t.primitive_terms()).collect() - } - Term::Aggregation(aggregate) => aggregate - .terms - .iter() - .flat_map(|term| term.primitive_terms()) - .collect(), - } - } - - /// Return all variables in the term. - pub(crate) fn variables(&self) -> impl Iterator { - self.primitive_terms() - .into_iter() - .filter_map(|term| match term { - PrimitiveTerm::Variable(var) => Some(var), - _ => None, - }) - } - - /// Return all [AnyDataValue]s that appear in the term. - pub(crate) fn datavalues(&self) -> impl Iterator { - self.primitive_terms() - .into_iter() - .filter_map(|term| match term { - PrimitiveTerm::GroundTerm(datavalue) => Some(datavalue), - _ => None, - }) - } - - /// Return all universally quantified variables in the term. - pub(crate) fn universal_variables(&self) -> impl Iterator { - self.variables() - .filter(|var| matches!(var, Variable::Universal(_))) - } - - /// Return all existentially quantified variables in the term. - pub(crate) fn existential_variables(&self) -> impl Iterator { - self.variables() - .filter(|var| matches!(var, Variable::Existential(_))) - } - - /// Replaces [Variable]s with [Term]s according to the provided assignment. - pub(crate) fn apply_assignment(&mut self, assignment: &VariableAssignment) { - match self { - Term::Primitive(primitive) => { - if let PrimitiveTerm::Variable(variable) = primitive { - if let Some(value) = assignment.get(variable) { - *self = value.clone(); - } - } - } - Term::Binary { lhs, rhs, .. } => { - lhs.apply_assignment(assignment); - rhs.apply_assignment(assignment); - } - Term::Unary(_, inner) => inner.apply_assignment(assignment), - Term::Aggregation(aggregate) => aggregate.apply_assignment(assignment), - Term::Function(_, subterms) => subterms - .iter_mut() - .for_each(|t| t.apply_assignment(assignment)), - Term::Ternary { - first, - second, - third, - .. - } => { - first.apply_assignment(assignment); - second.apply_assignment(assignment); - third.apply_assignment(assignment); - } - Term::Nary { parameters, .. } => { - parameters - .iter_mut() - .for_each(|t| t.apply_assignment(assignment)); - } - } - } - - fn subterms_mut(&mut self) -> Vec<&mut Term> { - match self { - Term::Primitive(_primitive) => Vec::new(), - Term::Unary(_, ref mut inner) => vec![inner], - Term::Binary { - ref mut lhs, - ref mut rhs, - .. - } => { - vec![lhs, rhs] - } - Term::Ternary { - ref mut first, - ref mut second, - ref mut third, - .. - } => vec![first, second, third], - Term::Nary { - operation: _, - parameters, - } => parameters.iter_mut().collect(), - Term::Aggregation(_aggregate) => Vec::new(), - Term::Function(_, subterms) => subterms.iter_mut().collect(), - } - } - - /// Mutate the term in place, calling the function `f` on itself and recursively on it's subterms if the function `f` returns true - /// - /// This is used e.g. to rewrite aggregates inside of constructors with placeholder variables - pub(crate) fn update_subterms_recursively(&mut self, f: &mut F) - where - F: FnMut(&mut Term) -> bool, - { - f(self); - - for subterm in self.subterms_mut() { - let should_recurse = f(subterm); - - if should_recurse { - subterm.update_subterms_recursively(f); - } - } - } - - /// Return all aggreagtes constained in this term. - pub(crate) fn aggregates(&self) -> Vec { - match self { - Term::Primitive(_) => vec![], - Term::Unary(_, subterm) => subterm.aggregates(), - Term::Binary { - operation: _, - lhs, - rhs, - } => { - let mut result = lhs.aggregates(); - result.extend(rhs.aggregates()); - result - } - Term::Ternary { - operation: _, - first, - second, - third, - } => { - let mut result = first.aggregates(); - result.extend(second.aggregates()); - result.extend(third.aggregates()); - - result - } - Term::Nary { - operation: _, - parameters, - } => { - let mut result = Vec::::new(); - for subterm in parameters { - result.extend(subterm.aggregates()); - } - result - } - Term::Aggregation(aggregate) => { - let mut result = vec![aggregate.clone()]; - - for subterm in &aggregate.terms { - result.extend(subterm.aggregates()); - } - - result - } - Term::Function(_, _) => panic!("Function symbols not supported"), - } - } -} - -impl From for Term { - fn from(value: PrimitiveTerm) -> Self { - Term::Primitive(value) - } -} - -impl Term { - fn ascii_tree(&self) -> ascii_tree::Tree { - match self { - Term::Primitive(primitive) => ascii_tree::Tree::Leaf(vec![format!("{:?}", primitive)]), - Term::Binary { - operation, - lhs, - rhs, - } => ascii_tree::Tree::Node(operation.name(), vec![lhs.ascii_tree(), rhs.ascii_tree()]), - Term::Unary(operation, inner) => { - ascii_tree::Tree::Node(operation.name(), vec![inner.ascii_tree()]) - } - Term::Aggregation(aggregate) => { - ascii_tree::Tree::Leaf(vec![format!("{:?}", aggregate)]) - } - Term::Function(function, subterms) => ascii_tree::Tree::Node( - function.to_string(), - subterms.iter().map(|s| s.ascii_tree()).collect(), - ), - Term::Ternary { - operation, - first, - second, - third, - } => ascii_tree::Tree::Node( - operation.name(), - vec![first.ascii_tree(), second.ascii_tree(), third.ascii_tree()], - ), - Term::Nary { - operation, - parameters, - } => ascii_tree::Tree::Node( - operation.name(), - parameters.iter().map(|p| p.ascii_tree()).collect(), - ), - } - } - - /// Defines the precedence of the term operations. - /// This is only relevant for the [Display] implementation. - fn precedence(&self) -> usize { - match self { - Term::Primitive(_) => 0, - Term::Binary { - operation: BinaryOperation::NumericAddition, - .. - } => 1, - Term::Binary { - operation: BinaryOperation::NumericSubtraction, - .. - } => 1, - Term::Binary { - operation: BinaryOperation::NumericMultiplication, - .. - } => 2, - Term::Binary { - operation: BinaryOperation::NumericDivision, - .. - } => 2, - Term::Binary { .. } => 3, - Term::Ternary { .. } => 3, - Term::Nary { .. } => 5, - Term::Unary(_, _) => 5, - Term::Aggregation(_) => 5, - Term::Function(_, _) => 5, - } - } - - fn format_braces_priority( - &self, - f: &mut std::fmt::Formatter<'_>, - term: &Term, - ) -> std::fmt::Result { - let need_braces = self.precedence() > term.precedence() && !term.is_primitive(); - - if need_braces { - self.format_braces(f, term) - } else { - write!(f, "{}", term) - } - } - - fn format_braces(&self, f: &mut std::fmt::Formatter<'_>, term: &Term) -> std::fmt::Result { - f.write_str("(")?; - write!(f, "{}", term)?; - f.write_str(")") - } - - fn format_nary_operation( - &self, - f: &mut std::fmt::Formatter<'_>, - terms: &[Term], - delimiter: &str, - ) -> std::fmt::Result { - for (index, term) in terms.iter().enumerate() { - self.format_braces_priority(f, term)?; - - if index < terms.len() - 1 { - f.write_str(delimiter)?; - } - } - - Ok(()) - } - - fn format_binary_operation( - &self, - f: &mut std::fmt::Formatter<'_>, - left: &Term, - right: &Term, - operation: BinaryOperation, - ) -> std::fmt::Result { - if let Some(operator) = operation.infix() { - self.format_braces_priority(f, left)?; - - write!(f, " {operator} ")?; - - self.format_braces_priority(f, right) - } else { - write!(f, "{}({}, {})", operation.name(), left, right) - } - } - - fn format_ternary_operation( - &self, - f: &mut std::fmt::Formatter<'_>, - first: &Term, - second: &Term, - third: &Term, - operation: TernaryOperation, - ) -> std::fmt::Result { - write!(f, "{}({}, {}, {})", operation.name(), first, second, third) - } -} - -impl Debug for Term { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - ascii_tree::write_tree(f, &self.ascii_tree()) - } -} - -impl Display for Term { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Term::Primitive(primitive) => write!(f, "{}", primitive), - Term::Binary { - operation, - lhs, - rhs, - } => self.format_binary_operation(f, lhs, rhs, *operation), - Term::Unary(UnaryOperation::NumericNegation, inner) => { - write!(f, "-")?; - self.format_braces_priority(f, inner) - } - Term::Unary(UnaryOperation::NumericAbsolute, inner) => { - write!(f, "|{}|", inner) - } - Term::Unary(operation, inner) => { - write!(f, "{}({})", operation.name(), inner) - } - Term::Aggregation(aggregate) => write!(f, "{}", aggregate), - Term::Function(function, subterms) => { - f.write_str(&function.to_string())?; - f.write_str("(")?; - self.format_nary_operation(f, subterms, ", ")?; - f.write_str(")") - } - Term::Ternary { - operation, - first, - second, - third, - } => self.format_ternary_operation(f, first, second, third, *operation), - Term::Nary { - operation, - parameters, - } => { - f.write_str(&operation.name())?; - f.write_str("(")?; - self.format_nary_operation(f, parameters, ", ")?; - f.write_str(")") - } - } - } -} - -impl From for Term { - fn from(adv: AnyDataValue) -> Self { - Term::Primitive(PrimitiveTerm::from(adv)) - } -} - -#[cfg(test)] -mod test {} diff --git a/nemo/src/_model/types/complex_types.rs b/nemo/src/_model/types/complex_types.rs deleted file mode 100644 index 1156c905d..000000000 --- a/nemo/src/_model/types/complex_types.rs +++ /dev/null @@ -1,232 +0,0 @@ -//! This module defines a model for nested type constructs - -use std::{iter::from_fn, sync::Arc}; - -use super::primitive_types::PrimitiveType; - -/// A nested type -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) enum NestedType { - /// A tuple of nested types - Tuple(TupleType), - /// A primitive type - Primitive(PrimitiveType), -} - -impl NestedType { - /// Returns the [PrimitiveType] contained within, if any. - pub(crate) fn as_primitive(&self) -> Option<&PrimitiveType> { - match self { - Self::Primitive(inner) => Some(inner), - _ => None, - } - } - - /// Returns the [TupleType] contained within, if any. - pub(crate) fn as_tuple(&self) -> Option<&TupleType> { - match self { - Self::Tuple(inner) => Some(inner), - _ => None, - } - } -} - -impl Default for NestedType { - fn default() -> Self { - Self::Primitive(PrimitiveType::default()) - } -} - -/// A tuple of nested types -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct TupleType { - field_types: Arc<[NestedType]>, -} - -impl TupleType { - /// Returns the arity (width) the tuple type. - pub(crate) fn arity(&self) -> usize { - self.field_types.len() - } - - /// Returns `true` if the tuple type does not contain nested tuples. - pub(crate) fn is_flat(&self) -> bool { - self.field_types - .iter() - .all(|t| matches!(t, NestedType::Primitive(_))) - } - - /// Returns the underlying [primitive types][PrimitiveType], - /// provided that this is a flat type. - pub(crate) fn into_flat(&self) -> Option> { - let mut result = Vec::new(); - - for field_type in self.field_types.iter() { - if let Some(primitive_type) = field_type.as_primitive() { - result.push(*primitive_type) - } else { - // found a non-primitive type, so we're not flat. - return None; - } - } - - Some(result) - } -} - -// impl Deref for TupleType { -// type Target = [NestedType]; - -// fn deref(&self) -> &Self::Target { -// &self.field_types -// } -// } - -impl FromIterator for TupleType { - fn from_iter>(iter: T) -> Self { - Self { - field_types: iter.into_iter().map(NestedType::Primitive).collect(), - } - } -} - -impl FromIterator for TupleType { - fn from_iter>(iter: T) -> Self { - Self { - field_types: iter.into_iter().collect(), - } - } -} - -impl From for NestedType { - fn from(value: TypeConstraint) -> Self { - match value { - TypeConstraint::None => NestedType::Primitive(PrimitiveType::Any), - TypeConstraint::Exact(p) => NestedType::Primitive(p), - TypeConstraint::AtLeast(p) => NestedType::Primitive(p), - TypeConstraint::Tuple(t) => NestedType::Tuple(t.into()), - } - } -} - -impl From for TupleType { - fn from(value: TupleConstraint) -> Self { - value.fields.iter().cloned().map(NestedType::from).collect() - } -} - -impl TryFrom for TupleType { - type Error = (); - - fn try_from(value: TypeConstraint) -> Result { - match value { - TypeConstraint::None => Err(()), - TypeConstraint::Exact(_) => Err(()), - TypeConstraint::AtLeast(_) => Err(()), - TypeConstraint::Tuple(t) => Ok(t.into()), - } - } -} - -/// A constraint on the type of an item -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -#[allow(variant_size_differences)] -pub(crate) enum TypeConstraint { - /// No constraints - None, - /// An exact constraint - Exact(PrimitiveType), - /// A soft constraint - AtLeast(PrimitiveType), - /// A constraint on a tuple type - Tuple(TupleConstraint), -} - -/// A constraint on a tuple type -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct TupleConstraint { - fields: Arc<[TypeConstraint]>, -} - -impl TupleConstraint { - /// Returns the arity specified in the constraint. - pub(crate) fn arity(&self) -> usize { - self.fields.len() - } - - /// Creates a [TupleConstraint], which only constrains the arity. - pub(crate) fn from_arity(arity: usize) -> Self { - from_fn(|| Some(TypeConstraint::None)).take(arity).collect() - } - - /// Creates a [TupleConstraint] using the [primitive - /// types][PrimitiveType] as lower bounds. - pub(crate) fn at_least(types: T) -> Self - where - T: IntoIterator, - { - Self::from_iter(types.into_iter().map(TypeConstraint::AtLeast)) - } - - /// Creates a [TupleConstraint] using the [primitive - /// types][PrimitiveType] as exact bounds. - pub(crate) fn exact(types: T) -> Self - where - T: IntoIterator, - { - Self::from_iter(types.into_iter().map(TypeConstraint::Exact)) - } - - /// Returns the underlying [primitive types][PrimitiveType], - /// provided that this is a flat tuple of primitive types, with a - /// default type for unspecified constraints. - pub(crate) fn into_flat_primitive_with_default( - &self, - default_type: PrimitiveType, - ) -> Option { - let mut result = Vec::new(); - - for type_constraint in self.fields.iter() { - match type_constraint { - TypeConstraint::None => result.push(TypeConstraint::AtLeast(default_type)), - TypeConstraint::Exact(inner) => result.push(TypeConstraint::Exact(*inner)), - TypeConstraint::AtLeast(inner) => result.push(TypeConstraint::AtLeast(*inner)), - TypeConstraint::Tuple(_) => return None, - } - } - - Some(Self::from_iter(result)) - } - - pub(crate) fn into_flat_primitive(self) -> Option> { - let mut result = Vec::new(); - - for type_constraint in self.fields.iter() { - match type_constraint { - TypeConstraint::None => (), - TypeConstraint::Exact(inner) | TypeConstraint::AtLeast(inner) => { - result.push(*inner) - } - TypeConstraint::Tuple(_) => return None, - } - } - - Some(result) - } -} - -// impl Deref for TupleConstraint { -// type Target = [TypeConstraint]; - -// fn deref(&self) -> &Self::Target { -// &self.fields -// } -// } - -impl FromIterator for TupleConstraint { - fn from_iter>(iter: T) -> Self { - Self { - fields: iter.into_iter().collect(), - } - } -} diff --git a/nemo/src/_model/types/error.rs b/nemo/src/_model/types/error.rs deleted file mode 100644 index f8c109640..000000000 --- a/nemo/src/_model/types/error.rs +++ /dev/null @@ -1,51 +0,0 @@ -// // use super::primitive_types::PrimitiveType; -// use crate::model::{Constant, LogicalAggregateOperation}; - -// use nemo_physical::error::ReadingError; -// use thiserror::Error; - -// /// An [InvalidRuleTermConversion] -// #[derive(Debug, Error, PartialEq)] -// #[error("The term \"{}\" cannot be converted to a {}.", .constant, .target_type)] -// pub struct InvalidRuleTermConversion { -// constant: Constant, -// target_type: PrimitiveType, -// } - -// impl InvalidRuleTermConversion { -// /// Create new `InvalidRuleTermConversion` error -// pub(crate) fn new(constant: Constant, target_type: PrimitiveType) -> Self { -// Self { -// constant, -// target_type, -// } -// } -// } - -// impl From for ReadingError { -// fn from(value: InvalidRuleTermConversion) -> Self { -// Self::TypeConversionError(value.constant.to_string(), value.target_type.to_string()) -// } -// } - -// /// Errors that can occur during type checking -// #[derive(Error, Debug)] -// pub(crate) enum TypeError { -// /// Non-numerical aggregate input type -// #[error("Aggregate operation \"{0:?}\" on input variable \"{1}\" requires a numerical input type, but the input type was \"{2}\".")] -// NonNumericalAggregateInputType(LogicalAggregateOperation, String, PrimitiveType), -// /// Conflicting type declarations -// #[error("Conflicting type declarations. Predicate \"{0}\" at position {1} has been inferred to have the conflicting types {2} and {3}.")] -// InvalidRuleConflictingTypes(String, usize, PrimitiveType, PrimitiveType), -// /// Conflicting type conversions -// #[error(transparent)] -// InvalidRuleTermConversion(#[from] InvalidRuleTermConversion), -// /// Comparison of a non-numeric type -// #[error("Invalid type declarations. Comparison operator can only be used with numeric types.")] -// InvalidRuleNonNumericComparison, -// /// Arithmetic operations with of a non-numeric type -// #[error( -// "Invalid type declarations. Arithmetic operations can only be used with numeric types." -// )] -// InvalidRuleNonNumericArithmetic, -// } diff --git a/nemo/src/_model/types/primitive_logical_value.rs b/nemo/src/_model/types/primitive_logical_value.rs deleted file mode 100644 index 8b47f1581..000000000 --- a/nemo/src/_model/types/primitive_logical_value.rs +++ /dev/null @@ -1,1113 +0,0 @@ -// use std::num::ParseIntError; - -// use num::FromPrimitive; - -// use nemo_physical::datatypes::data_value::DataValueIteratorT; -// use nemo_physical::datatypes::data_value::PhysicalString; -// use nemo_physical::datatypes::Double; -// use nemo_physical::error::ReadingError; - -// use crate::model::{ -// Constant, Identifier, Map, NumericLiteral, RdfLiteral, Tuple, XSD_DECIMAL, XSD_DOUBLE, -// XSD_INTEGER, -// }; - -// use super::{error::InvalidRuleTermConversion, primitive_types::PrimitiveType}; - -// const LANGUAGE_STRING_PREFIX: &str = "LS:"; -// const STRING_PREFIX: &str = "ST:"; -// const INTEGER_PREFIX: &str = "IN:"; -// const DECIMAL_PREFIX: &str = "DE:"; -// const DOUBLE_PREFIX: &str = "DO:"; -// const CONSTANT_PREFIX: &str = "CO:"; -// const DATATYPE_VALUE_PREFIX: &str = "DV:"; -// const MAP_VALUE_PREFIX: &str = "MP:"; -// const TUPLE_VALUE_PREFIX: &str = "TP:"; -// const NULL_PREFIX: &str = "NULL:"; // TODO: Temporary fix - -/// The prefix used to indicate constants that are Nulls -pub const LOGICAL_NULL_PREFIX: &str = "__Null#"; - -/// An Api wrapper fot the logical string type -#[repr(transparent)] -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct LogicalString(String); - -impl From for LogicalString { - fn from(value: String) -> Self { - LogicalString(value) - } -} - -impl From for String { - fn from(value: LogicalString) -> Self { - value.0 - } -} - -// impl<'a> From<&'a LogicalString> for &'a str { -// fn from(value: &'a LogicalString) -> Self { -// &value.0 -// } -// } - -// impl std::fmt::Display for LogicalString { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// write!(f, "{}", self.0) -// } -// } - -// /// An Api wrapper fot the logical integer type -// #[repr(transparent)] -// #[derive(Copy, Clone, Debug, PartialEq, Eq)] -// pub struct LogicalInteger(i64); - -// impl From for LogicalInteger { -// fn from(value: i64) -> Self { -// LogicalInteger(value) -// } -// } - -// impl From for i64 { -// fn from(value: LogicalInteger) -> Self { -// value.0 -// } -// } - -// impl std::fmt::Display for LogicalInteger { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// write!(f, "{}", self.0) -// } -// } - -// /// An Api wrapper fot the logical float64 type -// #[repr(transparent)] -// #[derive(Copy, Clone, Debug, PartialEq, Eq)] -// pub struct LogicalFloat64(Double); - -// impl From for LogicalFloat64 { -// fn from(value: Double) -> Self { -// LogicalFloat64(value) -// } -// } - -// impl From for Double { -// fn from(value: LogicalFloat64) -> Self { -// value.0 -// } -// } - -// impl std::fmt::Display for LogicalFloat64 { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// write!(f, "{}", self.0) -// } -// } - -// #[derive(Clone, Debug, PartialEq, Eq)] -// struct LanguageString(String, String); - -// #[derive(Clone, Debug, PartialEq, Eq)] -// struct DatatypeValue(String, String); - -// #[derive(Clone, Debug, PartialEq, Eq)] -// struct Decimal(i64, u64); - -// impl From for Constant { -// fn from(value: PrimitiveLogicalValueT) -> Self { -// match value { -// PrimitiveLogicalValueT::Any(constant) => constant, -// PrimitiveLogicalValueT::String(value) => value.into(), -// PrimitiveLogicalValueT::Integer(value) => value.into(), -// PrimitiveLogicalValueT::Float64(value) => value.into(), -// } -// } -// } - -// impl From for Constant { -// fn from(value: LogicalString) -> Self { -// Self::StringLiteral(value.into()) -// } -// } - -// impl From for Constant { -// fn from(value: LogicalInteger) -> Self { -// Self::NumericLiteral(NumericLiteral::Integer(value.into())) -// } -// } - -// impl From for Constant { -// fn from(value: LogicalFloat64) -> Self { -// Self::NumericLiteral(NumericLiteral::Double(value.into())) -// } -// } - -// impl From for PhysicalString { -// fn from(value: LanguageString) -> Self { -// format!("{LANGUAGE_STRING_PREFIX}{}@{}", value.0, value.1).into() -// } -// } - -// impl From for PhysicalString { -// fn from(value: LogicalString) -> Self { -// format!("{STRING_PREFIX}{value}").into() -// } -// } - -// impl From for PhysicalString { -// fn from(value: LogicalInteger) -> Self { -// format!("{INTEGER_PREFIX}{value}").into() -// } -// } - -// impl From for PhysicalString { -// fn from(value: Decimal) -> Self { -// format!("{DECIMAL_PREFIX}{}.{}", value.0, value.1).into() -// } -// } - -// impl From for PhysicalString { -// fn from(value: LogicalFloat64) -> Self { -// format!("{DOUBLE_PREFIX}{value}").into() -// } -// } - -// impl From for PhysicalString { -// fn from(value: Identifier) -> Self { -// format!("{CONSTANT_PREFIX}{value}").into() -// } -// } - -// impl From for PhysicalString { -// fn from(value: DatatypeValue) -> Self { -// format!("{DATATYPE_VALUE_PREFIX}{}^^{}", value.0, value.1).into() -// } -// } - -// impl From for PhysicalString { -// fn from(value: Map) -> Self { -// format!("{MAP_VALUE_PREFIX}{value}").into() -// } -// } - -// impl From for PhysicalString { -// fn from(value: Tuple) -> Self { -// format!("{TUPLE_VALUE_PREFIX}{value}").into() -// } -// } - -// impl From for LogicalString { -// fn from(value: LogicalInteger) -> Self { -// value.0.to_string().into() -// } -// } - -// impl From for LogicalString { -// fn from(value: LogicalFloat64) -> Self { -// value.0.to_string().into() -// } -// } - -// impl TryFrom for LogicalInteger { -// type Error = ParseIntError; - -// fn try_from(value: LogicalString) -> Result { -// value.0.parse::().map(|i| i.into()) -// } -// } - -// impl TryFrom for LogicalFloat64 { -// type Error = ReadingError; - -// fn try_from(value: LogicalString) -> Result { -// let parsed = value.0.parse::()?; -// Double::new(parsed).map(|d| d.into()) -// } -// } - -// impl TryFrom for LogicalInteger { -// type Error = ReadingError; - -// fn try_from(value: LogicalFloat64) -> Result { -// i64::from_f64(value.0.into()) -// .map(|i| i.into()) -// .ok_or(ReadingError::TypeConversionError( -// value.to_string(), -// PrimitiveType::Integer.to_string(), -// )) -// } -// } - -// impl TryFrom for LogicalFloat64 { -// type Error = ReadingError; - -// fn try_from(value: LogicalInteger) -> Result { -// Double::new( -// f64::from_i64(value.0).ok_or(ReadingError::TypeConversionError( -// value.to_string(), -// PrimitiveType::Integer.to_string(), -// ))?, -// ) -// .map(|d| d.into()) -// } -// } - -// impl TryFrom for LogicalString { -// type Error = InvalidRuleTermConversion; - -// fn try_from(constant: Constant) -> Result { -// match constant { -// Constant::StringLiteral(s) => Ok(s.into()), -// _ => Err(InvalidRuleTermConversion::new( -// constant, -// PrimitiveType::String, -// )), -// } -// } -// } - -// impl TryFrom for LogicalInteger { -// type Error = InvalidRuleTermConversion; - -// fn try_from(constant: Constant) -> Result { -// match constant { -// Constant::NumericLiteral(NumericLiteral::Integer(i)) => Ok(i.into()), -// Constant::NumericLiteral(NumericLiteral::Decimal(i, 0)) => Ok(i.into()), -// _ => Err(InvalidRuleTermConversion::new( -// constant, -// PrimitiveType::Integer, -// )), -// } -// } -// } - -// impl TryFrom for i64 { -// type Error = InvalidRuleTermConversion; - -// fn try_from(constant: Constant) -> Result { -// Ok(LogicalInteger::try_from(constant)?.into()) -// } -// } - -// impl TryFrom for LogicalFloat64 { -// type Error = InvalidRuleTermConversion; - -// fn try_from(constant: Constant) -> Result { -// match constant { -// Constant::NumericLiteral(NumericLiteral::Double(d)) => Ok(d.into()), -// Constant::NumericLiteral(NumericLiteral::Decimal(a, b)) => format!("{a}.{b}") -// .parse() -// .ok() -// .and_then(|d: f64| Double::new(d).map(|d| d.into()).ok()) -// .ok_or(InvalidRuleTermConversion::new( -// constant, -// PrimitiveType::Float64, -// )), -// Constant::NumericLiteral(NumericLiteral::Integer(a)) => LogicalInteger(a) -// .try_into() -// .map_err(|_err| InvalidRuleTermConversion::new(constant, PrimitiveType::Float64)), -// _ => Err(InvalidRuleTermConversion::new( -// constant, -// PrimitiveType::Float64, -// )), -// } -// } -// } - -// impl TryFrom for Double { -// type Error = InvalidRuleTermConversion; - -// fn try_from(constant: Constant) -> Result { -// Ok(LogicalFloat64::try_from(constant)?.into()) -// } -// } - -// impl TryFrom for PhysicalString { -// type Error = InvalidRuleTermConversion; - -// fn try_from(constant: Constant) -> Result { -// match constant { -// Constant::Abstract(c) => Ok(c.into()), -// Constant::NumericLiteral(NumericLiteral::Integer(i)) => Ok(LogicalInteger(i).into()), -// Constant::NumericLiteral(NumericLiteral::Decimal(a, b)) => Ok(Decimal(a, b).into()), -// Constant::NumericLiteral(NumericLiteral::Double(d)) => Ok(LogicalFloat64(d).into()), -// Constant::StringLiteral(s) => Ok(LogicalString(s).into()), -// Constant::RdfLiteral(RdfLiteral::LanguageString { value, tag }) => { -// Ok(LanguageString(value, tag).into()) -// } -// Constant::RdfLiteral(RdfLiteral::DatatypeValue { value, datatype }) => { -// Ok(DatatypeValue(value, datatype).into()) -// } -// Constant::MapLiteral(value) => Ok(PhysicalString::from(value)), -// Constant::TupleLiteral(tuple) => Ok(PhysicalString::from(tuple)), -// } -// } -// } - -// /// Enum for values in the logical layer -// #[derive(Debug)] -// pub enum PrimitiveLogicalValueT { -// /// Any variant -// Any(Constant), -// /// String variant -// String(LogicalString), -// /// Integer variant -// Integer(LogicalInteger), -// /// Float64 variant -// Float64(LogicalFloat64), -// } - -// impl From for PrimitiveLogicalValueT { -// fn from(value: Constant) -> Self { -// Self::Any(value) -// } -// } - -// impl From for PrimitiveLogicalValueT { -// fn from(value: LogicalString) -> Self { -// Self::String(value) -// } -// } - -// impl From for PrimitiveLogicalValueT { -// fn from(value: LogicalInteger) -> Self { -// Self::Integer(value) -// } -// } - -// impl From for PrimitiveLogicalValueT { -// fn from(value: LogicalFloat64) -> Self { -// Self::Float64(value) -// } -// } - -// pub(super) type DefaultAnyIterator<'a> = Box + 'a>; -// pub(super) type DefaultStringIterator<'a> = Box + 'a>; -// pub(super) type DefaultIntegerIterator<'a> = Box + 'a>; -// pub(super) type DefaultFloat64Iterator<'a> = Box + 'a>; -// pub(super) type DefaultSerializedIterator<'a> = Box + 'a>; - -// /// Iterator over one kind of possible logical values -// #[allow(missing_debug_implementations)] -// pub enum PrimitiveLogicalValueIteratorT<'a> { -// /// Any variant -// Any(DefaultAnyIterator<'a>), -// /// String variant -// String(DefaultStringIterator<'a>), -// /// Integer variant -// Integer(DefaultIntegerIterator<'a>), -// /// Float64 variant -// Float64(DefaultFloat64Iterator<'a>), -// } - -// impl<'a> Iterator for PrimitiveLogicalValueIteratorT<'a> { -// type Item = PrimitiveLogicalValueT; - -// fn next(&mut self) -> Option { -// match self { -// Self::Any(iter) => Some(PrimitiveLogicalValueT::Any(iter.next()?)), -// Self::String(iter) => Some(PrimitiveLogicalValueT::String(iter.next()?)), -// Self::Integer(iter) => Some(PrimitiveLogicalValueT::Integer(iter.next()?)), -// Self::Float64(iter) => Some(PrimitiveLogicalValueT::Float64(iter.next()?)), -// } -// } -// } - -// pub(super) struct AnyOutputMapper<'a> { -// physical_iter: Box + 'a>, -// } - -// impl<'a> AnyOutputMapper<'a> { -// pub(super) fn new(phy: DataValueIteratorT<'a>) -> Self { -// match phy { -// DataValueIteratorT::String(physical_iter) => Self { physical_iter }, -// _ => unreachable!("If the database representation of the logical types is correct, we never reach this branch.") -// } -// } -// } - -// impl From for Constant { -// fn from(s: PhysicalString) -> Self { -// // unwrap physical string -// let s: String = s.into(); -// match s { -// s if s.starts_with(LANGUAGE_STRING_PREFIX) => { -// let (value, tag) = s[LANGUAGE_STRING_PREFIX.len()..] -// .rsplit_once('@') -// .expect("Physical Value should be well-formatted."); -// Constant::RdfLiteral(RdfLiteral::LanguageString { -// value: value.to_string(), -// tag: tag.to_string(), -// }) -// } -// s if s.starts_with(STRING_PREFIX) => { -// Constant::StringLiteral(s[STRING_PREFIX.len()..].to_string()) -// } -// s if s.starts_with(INTEGER_PREFIX) => { -// Constant::NumericLiteral(NumericLiteral::Integer( -// s[INTEGER_PREFIX.len()..] -// .parse() -// .expect("Physical Value should be well-formatted."), -// )) -// } -// s if s.starts_with(DECIMAL_PREFIX) => { -// let (a, b) = s[DECIMAL_PREFIX.len()..] -// .rsplit_once('.') -// .and_then(|(a, b)| Some((a.parse().ok()?, b.parse().ok()?))) -// .expect("Physical Value should be well-formatted."); -// Constant::NumericLiteral(NumericLiteral::Decimal(a, b)) -// } -// s if s.starts_with(DOUBLE_PREFIX) => Constant::NumericLiteral(NumericLiteral::Double( -// s[DOUBLE_PREFIX.len()..] -// .parse() -// .ok() -// .and_then(|f64| Double::new(f64).ok()) -// .expect("Physical Value should be well-formatted."), -// )), -// s if s.starts_with(CONSTANT_PREFIX) => { -// Constant::Abstract(s[CONSTANT_PREFIX.len()..].to_string().into()) -// } -// s if s.starts_with(DATATYPE_VALUE_PREFIX) => { -// let (value, datatype) = s[DATATYPE_VALUE_PREFIX.len()..] -// .rsplit_once("^^") -// .expect("Physical Value should be well-formatted."); -// Constant::RdfLiteral(RdfLiteral::DatatypeValue { -// value: value.to_string(), -// datatype: datatype.to_string(), -// }) -// } -// s if s.starts_with(NULL_PREFIX) => Constant::Abstract(format!("{LOGICAL_NULL_PREFIX}{}", &s[NULL_PREFIX.len()..]).into()), -// _ => unreachable!("The physical strings should take one of the previous forms. Apparently we forgot to handle terms like: {s:?}"), -// } -// } -// } - -// impl<'a> From> for DefaultAnyIterator<'a> { -// fn from(source: AnyOutputMapper<'a>) -> Self { -// Box::new(source.physical_iter.map(|s| s.into())) -// } -// } - -// impl<'a> From> for DefaultSerializedIterator<'a> { -// fn from(source: AnyOutputMapper<'a>) -> Self { -// // NOTE: depending on performance change, maybe implement shortcut here to not construct Term first; -// // I prefer the current solution at the moment since it is easier to maintain -// Box::new(DefaultAnyIterator::from(source).map(|term| { -// let mapped_term = match term { -// // for numeric literals we do not use the standard display but convert them to a proper -// // rdf literal first -// Constant::NumericLiteral(NumericLiteral::Integer(i)) => { -// Constant::RdfLiteral(RdfLiteral::DatatypeValue { -// value: i.to_string(), -// datatype: XSD_INTEGER.to_string(), -// }) -// } -// Constant::NumericLiteral(NumericLiteral::Decimal(a, b)) => { -// Constant::RdfLiteral(RdfLiteral::DatatypeValue { -// value: format!("{a}.{b}").to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }) -// } -// Constant::NumericLiteral(NumericLiteral::Double(d)) => { -// Constant::RdfLiteral(RdfLiteral::DatatypeValue { -// value: format!("{:E}", f64::from(d)), -// datatype: XSD_DOUBLE.to_string(), -// }) -// } -// _ => term, -// }; - -// mapped_term.to_string() -// })) -// } -// } - -// pub(super) struct StringOutputMapper<'a> { -// physical_iter: Box + 'a>, -// } - -// impl<'a> StringOutputMapper<'a> { -// pub(super) fn new(phy: DataValueIteratorT<'a>) -> Self { -// match phy { -// DataValueIteratorT::String(physical_iter) => Self { physical_iter }, -// _ => unreachable!("If the database representation of the logical types is correct, we never reach this branch.") -// } -// } -// } - -// impl From for LogicalString { -// fn from(s: PhysicalString) -> Self { -// // unwrap physical string -// let s: String = s.into(); -// match s { -// s if s.starts_with(LANGUAGE_STRING_PREFIX) => { -// let (value, _tag) = s[LANGUAGE_STRING_PREFIX.len()..] -// .rsplit_once('@') -// .expect("Physical Value should be well-formatted."); -// value.to_string().into() -// } -// s if s.starts_with(STRING_PREFIX) => { -// s[STRING_PREFIX.len()..].to_string().into() -// } -// _ => unreachable!("The physical strings should take one of the previous forms. Apparently we forgot to handle terms like: {s:?}"), -// } -// } -// } - -// impl<'a> From> for DefaultStringIterator<'a> { -// fn from(source: StringOutputMapper<'a>) -> Self { -// Box::new(source.physical_iter.map(|s| s.into())) -// } -// } - -// impl<'a> From> for DefaultSerializedIterator<'a> { -// fn from(source: StringOutputMapper<'a>) -> Self { -// Box::new(source.physical_iter.map(|s| LogicalString::from(s).into())) -// } -// } - -// pub(super) struct IntegerOutputMapper<'a> { -// physical_iter: Box + 'a>, -// } - -// impl<'a> IntegerOutputMapper<'a> { -// pub(super) fn new(phy: DataValueIteratorT<'a>) -> Self { -// match phy { -// DataValueIteratorT::I64(physical_iter) => Self { physical_iter }, -// _ => unreachable!("If the database representation of the logical types is correct, we never reach this branch.") -// } -// } -// } - -// impl<'a> From> for DefaultIntegerIterator<'a> { -// fn from(source: IntegerOutputMapper<'a>) -> Self { -// Box::new(source.physical_iter.map(|i| i.into())) -// } -// } - -// impl<'a> From> for DefaultSerializedIterator<'a> { -// fn from(source: IntegerOutputMapper<'a>) -> Self { -// Box::new( -// source -// .physical_iter -// .map(|i| LogicalInteger::from(i).to_string()), -// ) -// } -// } - -// pub(super) struct Float64OutputMapper<'a> { -// physical_iter: Box + 'a>, -// } - -// impl<'a> Float64OutputMapper<'a> { -// pub(super) fn new(phy: DataValueIteratorT<'a>) -> Self { -// match phy { -// DataValueIteratorT::Double(physical_iter) => Self { physical_iter }, -// _ => unreachable!("If the database representation of the logical types is correct, we never reach this branch.") -// } -// } -// } - -// impl<'a> From> for DefaultFloat64Iterator<'a> { -// fn from(source: Float64OutputMapper<'a>) -> Self { -// Box::new(source.physical_iter.map(|d| d.into())) -// } -// } - -// impl<'a> From> for DefaultSerializedIterator<'a> { -// fn from(source: Float64OutputMapper<'a>) -> Self { -// Box::new( -// source -// .physical_iter -// .map(|d| LogicalFloat64::from(d).to_string()), -// ) -// } -// } - -// #[cfg(test)] -// mod test { -// use std::assert_eq; - -// use crate::model::{InvalidRdfLiteral, XSD_STRING}; - -// use super::*; - -// #[test] -// fn input_mapping() { -// let string = LogicalString::from("my string".to_string()); -// let integer = LogicalInteger::from(42); -// let double = LogicalFloat64::from(Double::new(3.41).unwrap()); -// let constant = Constant::Abstract("my constant".to_string().into()); -// let string_literal = Constant::StringLiteral("string literal".to_string()); -// let num_int_literal = Constant::NumericLiteral(NumericLiteral::Integer(45)); -// let num_decimal_literal = Constant::NumericLiteral(NumericLiteral::Decimal(4, 2)); -// let num_whole_decimal_literal = Constant::NumericLiteral(NumericLiteral::Decimal(42, 0)); -// let num_double_literal = -// Constant::NumericLiteral(NumericLiteral::Double(Double::new(2.99).unwrap())); -// let language_string_literal = Constant::try_from(RdfLiteral::LanguageString { -// value: "language string".to_string(), -// tag: "en".to_string(), -// }) -// .unwrap(); -// let random_datavalue_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "some random datavalue".to_string(), -// datatype: "a datatype that I totally did not just make up".to_string(), -// }) -// .unwrap(); -// let string_datavalue_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "string datavalue".to_string(), -// datatype: XSD_STRING.to_string(), -// }) -// .unwrap(); -// let integer_datavalue_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "73".to_string(), -// datatype: XSD_INTEGER.to_string(), -// }) -// .unwrap(); -// let decimal_datavalue_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "1.23".to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }) -// .unwrap(); -// let signed_decimal_datavalue_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "+1.23".to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }) -// .unwrap(); -// let negative_decimal_datavalue_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "-1.23".to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }) -// .unwrap(); -// let pointless_decimal_datavalue_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "23".to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }) -// .unwrap(); -// let signed_pointless_decimal_datavalue_literal = -// Constant::try_from(RdfLiteral::DatatypeValue { -// value: "+23".to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }) -// .unwrap(); -// let negative_pointless_decimal_datavalue_literal = -// Constant::try_from(RdfLiteral::DatatypeValue { -// value: "-23".to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }) -// .unwrap(); -// let double_datavalue_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "3.33".to_string(), -// datatype: XSD_DOUBLE.to_string(), -// }) -// .unwrap(); -// let large_integer_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "9950000000000000000".to_string(), -// datatype: XSD_INTEGER.to_string(), -// }) -// .unwrap(); -// let large_decimal_literal = Constant::try_from(RdfLiteral::DatatypeValue { -// value: "9950000000000000001".to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }) -// .unwrap(); -// let invalid_integer_literal = RdfLiteral::DatatypeValue { -// value: "123.45".to_string(), -// datatype: XSD_INTEGER.to_string(), -// }; -// let invalid_decimal_literal = RdfLiteral::DatatypeValue { -// value: "123.45a".to_string(), -// datatype: XSD_DECIMAL.to_string(), -// }; - -// let expected_string: PhysicalString = format!("{STRING_PREFIX}my string").into(); -// let expected_integer: PhysicalString = format!("{INTEGER_PREFIX}42").into(); -// let expected_double: PhysicalString = format!("{DOUBLE_PREFIX}3.41").into(); -// let expected_constant: PhysicalString = format!("{CONSTANT_PREFIX}my constant").into(); -// let expected_string_literal: PhysicalString = -// format!("{STRING_PREFIX}string literal").into(); -// let expected_num_int_literal: PhysicalString = format!("{INTEGER_PREFIX}45").into(); -// let expected_num_decimal_literal: PhysicalString = format!("{DECIMAL_PREFIX}4.2").into(); -// let expected_num_double_literal: PhysicalString = format!("{DOUBLE_PREFIX}2.99").into(); -// let expected_language_string_literal: PhysicalString = -// format!("{LANGUAGE_STRING_PREFIX}language string@en").into(); -// let expected_random_datavalue_literal: PhysicalString = format!("{DATATYPE_VALUE_PREFIX}some random datavalue^^a datatype that I totally did not just make up").into(); -// let expected_string_datavalue_literal: PhysicalString = -// format!("{STRING_PREFIX}string datavalue").into(); -// let expected_integer_datavalue_literal: PhysicalString = -// format!("{INTEGER_PREFIX}73").into(); -// let expected_decimal_datavalue_literal: PhysicalString = -// format!("{DECIMAL_PREFIX}1.23").into(); -// let expected_signed_decimal_datavalue_literal: PhysicalString = -// format!("{DECIMAL_PREFIX}1.23").into(); -// let expected_negative_decimal_datavalue_literal: PhysicalString = -// format!("{DECIMAL_PREFIX}-1.23").into(); -// let expected_pointless_decimal_datavalue_literal: PhysicalString = -// format!("{DECIMAL_PREFIX}23.0").into(); -// let expected_signed_pointless_decimal_datavalue_literal: PhysicalString = -// format!("{DECIMAL_PREFIX}23.0").into(); -// let expected_negative_pointless_decimal_datavalue_literal: PhysicalString = -// format!("{DECIMAL_PREFIX}-23.0").into(); -// let expected_double_datavalue_literal: PhysicalString = -// format!("{DOUBLE_PREFIX}3.33").into(); -// let expected_large_integer_literal: PhysicalString = -// format!("{DATATYPE_VALUE_PREFIX}9950000000000000000^^{XSD_INTEGER}").into(); -// let expected_large_decimal_literal: PhysicalString = -// format!("{DATATYPE_VALUE_PREFIX}9950000000000000001^^{XSD_DECIMAL}").into(); -// let expected_invalid_integer_literal = -// InvalidRdfLiteral::new(invalid_integer_literal.clone()); -// let expected_invalid_decimal_literal = -// InvalidRdfLiteral::new(invalid_decimal_literal.clone()); - -// assert_eq!(PhysicalString::from(string), expected_string); -// assert_eq!(PhysicalString::from(integer), expected_integer); -// assert_eq!(PhysicalString::from(double), expected_double); -// assert_eq!( -// PhysicalString::try_from(constant).unwrap(), -// expected_constant -// ); -// assert_eq!( -// PhysicalString::try_from(string_literal.clone()).unwrap(), -// expected_string_literal -// ); -// assert_eq!( -// PhysicalString::try_from(string_literal).unwrap(), -// expected_string_literal -// ); -// assert_eq!( -// PhysicalString::try_from(num_int_literal.clone()).unwrap(), -// expected_num_int_literal -// ); -// assert_eq!(i64::try_from(num_int_literal).unwrap(), 45); -// assert_eq!( -// LogicalInteger::try_from(num_whole_decimal_literal).unwrap(), -// LogicalInteger(42) -// ); -// assert_eq!( -// LogicalInteger::try_from(signed_pointless_decimal_datavalue_literal.clone()).unwrap(), -// LogicalInteger(23) -// ); -// assert_eq!( -// PhysicalString::try_from(num_decimal_literal).unwrap(), -// expected_num_decimal_literal -// ); -// assert_eq!( -// PhysicalString::try_from(num_double_literal.clone()).unwrap(), -// expected_num_double_literal -// ); -// assert_eq!( -// Double::try_from(num_double_literal).unwrap(), -// Double::new(2.99).unwrap() -// ); -// assert_eq!( -// PhysicalString::try_from(language_string_literal.clone()).unwrap(), -// expected_language_string_literal -// ); -// assert_eq!( -// PhysicalString::try_from(language_string_literal).unwrap(), -// expected_language_string_literal -// ); -// assert_eq!( -// PhysicalString::try_from(random_datavalue_literal).unwrap(), -// expected_random_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(string_datavalue_literal.clone()).unwrap(), -// expected_string_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(string_datavalue_literal).unwrap(), -// expected_string_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(integer_datavalue_literal.clone()).unwrap(), -// expected_integer_datavalue_literal -// ); -// assert_eq!(i64::try_from(integer_datavalue_literal).unwrap(), 73); -// assert_eq!( -// PhysicalString::try_from(decimal_datavalue_literal).unwrap(), -// expected_decimal_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(signed_decimal_datavalue_literal).unwrap(), -// expected_signed_decimal_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(negative_decimal_datavalue_literal).unwrap(), -// expected_negative_decimal_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(pointless_decimal_datavalue_literal).unwrap(), -// expected_pointless_decimal_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(signed_pointless_decimal_datavalue_literal).unwrap(), -// expected_signed_pointless_decimal_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(negative_pointless_decimal_datavalue_literal).unwrap(), -// expected_negative_pointless_decimal_datavalue_literal -// ); -// assert_eq!( -// PhysicalString::try_from(double_datavalue_literal.clone()).unwrap(), -// expected_double_datavalue_literal -// ); -// assert_eq!( -// Double::try_from(double_datavalue_literal).unwrap(), -// Double::new(3.33).unwrap() -// ); -// assert_eq!( -// PhysicalString::try_from(large_integer_literal).unwrap(), -// expected_large_integer_literal -// ); -// assert_eq!( -// PhysicalString::try_from(large_decimal_literal).unwrap(), -// expected_large_decimal_literal -// ); -// assert_eq!( -// Constant::try_from(invalid_integer_literal).unwrap_err(), -// expected_invalid_integer_literal -// ); -// assert_eq!( -// Constant::try_from(invalid_decimal_literal).unwrap_err(), -// expected_invalid_decimal_literal -// ); -// } - -// #[test] -// fn api_output_mapping() { -// let phys_any_iter = DataValueIteratorT::String(Box::new( -// [ -// format!("{STRING_PREFIX}my string"), -// format!("{INTEGER_PREFIX}42"), -// format!("{DOUBLE_PREFIX}3.41"), -// format!("{CONSTANT_PREFIX}my constant"), -// format!("{STRING_PREFIX}string literal"), -// format!("{INTEGER_PREFIX}45"), -// format!("{DECIMAL_PREFIX}4.2"), -// format!("{DOUBLE_PREFIX}2.99"), -// format!("{LANGUAGE_STRING_PREFIX}language string@en"), -// format!("{DATATYPE_VALUE_PREFIX}some random datavalue^^a datatype that I totally did not just make up"), -// format!("{STRING_PREFIX}string datavalue"), -// format!("{INTEGER_PREFIX}73"), -// format!("{DECIMAL_PREFIX}1.23"), -// format!("{DOUBLE_PREFIX}3.33"), -// format!("{NULL_PREFIX}1000001"), -// ].into_iter().map(PhysicalString::from) -// )); - -// let phys_string_iter = DataValueIteratorT::String(Box::new( -// [ -// format!("{STRING_PREFIX}my string"), -// format!("{STRING_PREFIX}42"), -// format!("{STRING_PREFIX}3.41"), -// format!("{STRING_PREFIX}string literal"), -// format!("{LANGUAGE_STRING_PREFIX}language string@en"), -// format!("{STRING_PREFIX}string datavalue"), -// ] -// .into_iter() -// .map(PhysicalString::from), -// )); - -// let phys_int_iter = DataValueIteratorT::I64(Box::new([42, 45, 73].into_iter())); - -// let phys_double_iter = DataValueIteratorT::Double(Box::new( -// [ -// Double::new(3.41).unwrap(), -// Double::new(2.99).unwrap(), -// Double::new(3.33).unwrap(), -// ] -// .into_iter(), -// )); - -// let any_out: DefaultAnyIterator = AnyOutputMapper::new(phys_any_iter).into(); -// let string_out: DefaultStringIterator = StringOutputMapper::new(phys_string_iter).into(); -// let int_out: DefaultIntegerIterator = IntegerOutputMapper::new(phys_int_iter).into(); -// let double_out: DefaultFloat64Iterator = Float64OutputMapper::new(phys_double_iter).into(); - -// let any_vec: Vec = any_out.collect(); -// let string_vec: Vec = string_out.collect(); -// let integer_vec: Vec = int_out.collect(); -// let double_vec: Vec = double_out.collect(); - -// assert_eq!( -// any_vec, -// vec![ -// Constant::StringLiteral("my string".to_string()), -// Constant::NumericLiteral(NumericLiteral::Integer(42)), -// Constant::NumericLiteral(NumericLiteral::Double(Double::new(3.41).unwrap())), -// Constant::Abstract("my constant".to_string().into()), -// Constant::StringLiteral("string literal".to_string()), -// Constant::NumericLiteral(NumericLiteral::Integer(45)), -// Constant::NumericLiteral(NumericLiteral::Decimal(4, 2)), -// Constant::NumericLiteral(NumericLiteral::Double(Double::new(2.99).unwrap())), -// Constant::RdfLiteral(RdfLiteral::LanguageString { -// value: "language string".to_string(), -// tag: "en".to_string(), -// }), -// Constant::RdfLiteral(RdfLiteral::DatatypeValue { -// value: "some random datavalue".to_string(), -// datatype: "a datatype that I totally did not just make up".to_string(), -// }), -// Constant::StringLiteral("string datavalue".to_string()), -// Constant::NumericLiteral(NumericLiteral::Integer(73)), -// Constant::NumericLiteral(NumericLiteral::Decimal(1, 23)), -// Constant::NumericLiteral(NumericLiteral::Double(Double::new(3.33).unwrap())), -// Constant::Abstract(format!("{LOGICAL_NULL_PREFIX}1000001").into()), -// ] -// ); - -// assert_eq!( -// string_vec, -// [ -// "my string", -// "42", -// "3.41", -// "string literal", -// "language string", -// "string datavalue", -// ] -// .into_iter() -// .map(String::from) -// .map(LogicalString::from) -// .collect::>(), -// ); - -// assert_eq!( -// integer_vec, -// [42, 45, 73] -// .into_iter() -// .map(LogicalInteger::from) -// .collect::>() -// ); - -// assert_eq!( -// double_vec, -// [ -// Double::new(3.41).unwrap(), -// Double::new(2.99).unwrap(), -// Double::new(3.33).unwrap(), -// ] -// .into_iter() -// .map(LogicalFloat64::from) -// .collect::>() -// ); -// } - -// #[test] -// fn serialized_output_mapping() { -// let phys_any_iter = DataValueIteratorT::String(Box::new( -// [ -// format!("{STRING_PREFIX}my string"), -// format!("{INTEGER_PREFIX}42"), -// format!("{DOUBLE_PREFIX}3.41"), -// format!("{CONSTANT_PREFIX}my constant"), -// format!("{STRING_PREFIX}string literal"), -// format!("{INTEGER_PREFIX}45"), -// format!("{DECIMAL_PREFIX}4.2"), -// format!("{DOUBLE_PREFIX}2.99"), -// format!("{LANGUAGE_STRING_PREFIX}language string@en"), -// format!("{DATATYPE_VALUE_PREFIX}some random datavalue^^a datatype that I totally did not just make up"), -// format!("{STRING_PREFIX}string datavalue"), -// format!("{INTEGER_PREFIX}73"), -// format!("{DECIMAL_PREFIX}1.23"), -// format!("{DOUBLE_PREFIX}3.33"), -// format!("{NULL_PREFIX}1000001"), -// ] -// .into_iter().map(|s| s.into()))); - -// let phys_string_iter = DataValueIteratorT::String(Box::new( -// [ -// format!("{STRING_PREFIX}my string"), -// format!("{STRING_PREFIX}42"), -// format!("{STRING_PREFIX}3.41"), -// format!("{STRING_PREFIX}string literal"), -// format!("{LANGUAGE_STRING_PREFIX}language string@en"), -// format!("{STRING_PREFIX}string datavalue"), -// ] -// .into_iter() -// .map(|s| s.into()), -// )); - -// let phys_int_iter = DataValueIteratorT::I64(Box::new([42, 45, 73].into_iter())); - -// let phys_double_iter = DataValueIteratorT::Double(Box::new( -// [ -// Double::new(3.41).unwrap(), -// Double::new(2.99).unwrap(), -// Double::new(3.33).unwrap(), -// ] -// .into_iter(), -// )); - -// let any_out: DefaultSerializedIterator = AnyOutputMapper::new(phys_any_iter).into(); -// let string_out: DefaultSerializedIterator = -// StringOutputMapper::new(phys_string_iter).into(); -// let int_out: DefaultSerializedIterator = IntegerOutputMapper::new(phys_int_iter).into(); -// let double_out: DefaultSerializedIterator = -// Float64OutputMapper::new(phys_double_iter).into(); - -// let any_vec: Vec = any_out.collect(); -// let string_vec: Vec = string_out.collect(); -// let integer_vec: Vec = int_out.collect(); -// let double_vec: Vec = double_out.collect(); - -// assert_eq!( -// any_vec, -// [ -// r#""my string""#, -// r#""42"^^"#, -// r#""3.41E0"^^"#, -// r#"my constant"#, -// r#""string literal""#, -// r#""45"^^"#, -// r#""4.2"^^"#, -// r#""2.99E0"^^"#, -// r#""language string"@en"#, -// r#""some random datavalue"^^"#, -// r#""string datavalue""#, -// r#""73"^^"#, -// r#""1.23"^^"#, -// r#""3.33E0"^^"#, -// r#"<__Null#1000001>"#, -// ] -// .into_iter() -// .map(String::from) -// .collect::>(), -// ); - -// assert_eq!( -// string_vec, -// [ -// "my string", -// "42", -// "3.41", -// "string literal", -// "language string", -// "string datavalue", -// ] -// .into_iter() -// .map(String::from) -// .collect::>(), -// ); - -// assert_eq!( -// integer_vec, -// [42, 45, 73] -// .into_iter() -// .map(|i| i.to_string()) -// .collect::>(), -// ); - -// assert_eq!( -// double_vec, -// [ -// Double::new(3.41).unwrap(), -// Double::new(2.99).unwrap(), -// Double::new(3.33).unwrap(), -// ] -// .into_iter() -// .map(|d| d.to_string()) -// .collect::>(), -// ); -// } -// } diff --git a/nemo/src/_model/types/primitive_types.rs b/nemo/src/_model/types/primitive_types.rs deleted file mode 100644 index 8c8aae983..000000000 --- a/nemo/src/_model/types/primitive_types.rs +++ /dev/null @@ -1,141 +0,0 @@ -use std::fmt::Display; -use std::str::FromStr; - -use crate::io::parser::ParseError; - -use crate::model::NestedType; - -macro_rules! count { - () => (0usize); - ( $x:tt $($xs:tt)* ) => (1usize + count!($($xs)*)); -} - -macro_rules! generate_logical_type_enum { - ($(($variant_name:ident, $string_repr: literal)),+) => { - /// An enum capturing the logical type names and funtionality related to parsing and translating into and from physical types - #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] - pub(crate) enum PrimitiveType { - $( - /// $variant_name - $variant_name - ),+ - } - - impl PrimitiveType { - const _VARIANTS: [Self; count!($($variant_name)+)] = [ - $(Self::$variant_name),+ - ]; - - /// Returns a list of the syntactic representations of valid types. - pub fn type_representations() -> Vec<&'static str> { - vec![$($string_repr),+] - } - } - - impl Display for PrimitiveType { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - $(Self::$variant_name => write!(f, "{}", $string_repr)),+ - } - } - } - - impl FromStr for PrimitiveType { - type Err = ParseError; - - fn from_str(s: &str) -> Result { - match s { - $($string_repr => Ok(Self::$variant_name)),+, - _ => panic!("types no longer used, will soon go") //Err(Self::Err::ParseUnknownType(s.to_string())) - } - } - } - }; -} - -generate_logical_type_enum!( - (Any, "any"), - (String, "string"), - (Integer, "integer"), - (Float64, "float64") -); - -impl PartialOrd for PrimitiveType { - fn partial_cmp(&self, other: &Self) -> Option { - match self { - Self::Any => match other { - Self::Any => Some(std::cmp::Ordering::Equal), - Self::String => Some(std::cmp::Ordering::Greater), - _ => None, // TODO: should be the following once reasoning supports casting: Some(std::cmp::Ordering::Greater), - }, - Self::String => match other { - Self::Any => Some(std::cmp::Ordering::Less), - Self::String => Some(std::cmp::Ordering::Equal), - _ => None, - }, - Self::Integer => match other { - Self::Any => None, // TODO: should be the following once reasoning supports casting: Some(std::cmp::Ordering::Less), - Self::Integer => Some(std::cmp::Ordering::Equal), - _ => None, - }, - Self::Float64 => match other { - Self::Any => None, // TODO: should be the following once reasoning supports casting: Some(std::cmp::Ordering::Less), - Self::Float64 => Some(std::cmp::Ordering::Equal), - _ => None, - }, - } - } -} - -impl Default for PrimitiveType { - fn default() -> Self { - Self::Any - } -} - -impl TryFrom for PrimitiveType { - type Error = (); - - fn try_from(value: NestedType) -> Result { - match value { - NestedType::Tuple(_) => Err(()), - NestedType::Primitive(p) => Ok(p), - } - } -} - -impl PrimitiveType { - // TODO: I think this should be the PartialCmp between types but as long as we do not - // have casting we still want to forbid e.g. integer and string merges while reasoning - // HOWEVER for data that occurs in facts or sources we can decide for the max type up - // front and read them accordingly - /// Get the more general type out of two types (not necessarily castable but can be used to - /// determine how data should be read) - pub fn max_type(&self, other: &PrimitiveType) -> PrimitiveType { - match self { - PrimitiveType::Any => PrimitiveType::Any, - PrimitiveType::String => match other { - PrimitiveType::String => PrimitiveType::String, - _ => PrimitiveType::Any, - }, - PrimitiveType::Integer => match other { - PrimitiveType::Integer => PrimitiveType::Integer, - _ => PrimitiveType::Any, - }, - PrimitiveType::Float64 => match other { - PrimitiveType::Float64 => PrimitiveType::Float64, - _ => PrimitiveType::Any, - }, - } - } - - /// Whether this logical type can be used to perform numeric operations. - pub fn allows_numeric_operations(&self) -> bool { - match self { - Self::Any => false, - Self::String => false, - Self::Integer => true, - Self::Float64 => true, - } - } -} diff --git a/nemo/src/api.rs b/nemo/src/api.rs index de768364f..1ccaca1eb 100644 --- a/nemo/src/api.rs +++ b/nemo/src/api.rs @@ -26,9 +26,7 @@ use std::{fs::read_to_string, path::PathBuf}; use crate::{ error::{Error, ReadingError}, execution::DefaultExecutionEngine, - io::parser::parse_program_str, - model::Identifier, - rule_model::program::Program, + rule_model::components::tag::Tag, }; /// Reasoning Engine exposed by the API @@ -51,7 +49,7 @@ pub fn load(file: PathBuf) -> Result { /// /// # Error /// Returns an appropriate [Error] variant on parsing and feature check issues. -pub fn load_string(input: String) -> Result { +pub fn load_string(_input: String) -> Result { // let (ast, _errors) = parse_program_str(&input); // let _program = Program::from_ast(ast); todo!("ExecutionEngine has to use the new rule model") @@ -69,11 +67,12 @@ pub fn reason(engine: &mut Engine) -> Result<(), Error> { } /// Get a [Vec] of all output predicates that are computed by the engine. -pub fn output_predicates(engine: &Engine) -> Vec { +pub fn output_predicates(engine: &Engine) -> Vec { engine .program() .exports() - .map(|(id, _)| id) + .into_iter() + .map(|export| export.predicate()) .cloned() .collect() } diff --git a/nemo/src/chase_model/analysis/program_analysis.rs b/nemo/src/chase_model/analysis/program_analysis.rs index 1175c7cb4..cc639acd1 100644 --- a/nemo/src/chase_model/analysis/program_analysis.rs +++ b/nemo/src/chase_model/analysis/program_analysis.rs @@ -33,7 +33,7 @@ pub struct RuleAnalysis { /// Whether an atom in the head also occurs in the body. pub is_recursive: bool, /// Whether the rule has positive constraints that need to be applied. - pub has_positive_constraints: bool, + pub _has_positive_constraints: bool, /// Whether the rule has at least one aggregate term in the head. pub has_aggregates: bool, @@ -45,16 +45,16 @@ pub struct RuleAnalysis { pub head_predicates: HashSet, /// Variables occurring in the positive part of the body. - pub positive_body_variables: HashSet, + pub _positive_body_variables: HashSet, /// Variables occurring in the positive part of the body. - pub negative_body_variables: HashSet, + pub _negative_body_variables: HashSet, /// Variables occurring in the head. pub head_variables: HashSet, /// Number of existential variables. - pub num_existential: usize, + pub _num_existential: usize, /// Rule that represents the calculation of the satisfied matches for an existential rule. - pub existential_aux_rule: ChaseRule, + existential_aux_rule: ChaseRule, /// The associated variable order for the join of the head atoms pub existential_aux_order: VariableOrder, @@ -62,6 +62,13 @@ pub struct RuleAnalysis { pub promising_variable_orders: Vec, } +impl RuleAnalysis { + /// Return the existential auxillary rule. + pub(crate) fn existential_aux_rule(&self) -> &ChaseRule { + &self.existential_aux_rule + } +} + /// Errors than can occur during rule analysis #[derive(Error, Debug, Clone, PartialEq, Eq)] #[allow(clippy::enum_variant_names)] @@ -225,15 +232,15 @@ fn analyze_rule( RuleAnalysis { is_existential: num_existential > 0, is_recursive: is_recursive(rule), - has_positive_constraints: !rule.positive_filters().is_empty(), + _has_positive_constraints: !rule.positive_filters().is_empty(), has_aggregates: rule.aggregate().is_some(), positive_body_predicates: get_predicates(rule.positive_body()), negative_body_predicates: get_predicates(rule.negative_body()), head_predicates: get_predicates(rule.head()), - positive_body_variables: get_variables(rule.positive_body()), - negative_body_variables: get_variables(rule.negative_body()), + _positive_body_variables: get_variables(rule.positive_body()), + _negative_body_variables: get_variables(rule.negative_body()), head_variables: get_variables(rule.head()), - num_existential, + _num_existential: num_existential, existential_aux_rule, existential_aux_order, promising_variable_orders, @@ -280,12 +287,6 @@ impl ChaseProgram { } } - fn add_missing(predicate: Tag, arities: &HashMap, missing: &mut HashSet) { - if arities.get(&predicate).is_none() { - missing.insert(predicate); - } - } - // Predicates in import statements for import in self.imports() { add_arity(import.predicate().clone(), import.arity(), &mut result); diff --git a/nemo/src/chase_model/analysis/variable_order.rs b/nemo/src/chase_model/analysis/variable_order.rs index a1ef5f3e5..b7a77a162 100644 --- a/nemo/src/chase_model/analysis/variable_order.rs +++ b/nemo/src/chase_model/analysis/variable_order.rs @@ -36,7 +36,7 @@ impl VariableOrder { } /// Insert a new variable at a certain position. - pub fn push_position(&mut self, variable: Variable, position: usize) { + pub fn _push_position(&mut self, variable: Variable, position: usize) { for current_position in &mut self.0.values_mut() { if *current_position >= position { *current_position += 1; @@ -58,7 +58,7 @@ impl VariableOrder { } /// Returns a [VariableOrder] which is restricted to the given variables (but preserve their order) - pub fn restrict_to(&self, variables: &HashSet) -> Self { + pub fn _restrict_to(&self, variables: &HashSet) -> Self { let mut variable_vector = Vec::::with_capacity(variables.len()); for variable in variables { if self.0.contains_key(variable) { @@ -83,12 +83,12 @@ impl VariableOrder { } /// Returns the number of entries. - pub fn len(&self) -> usize { + pub fn _len(&self) -> usize { self.0.len() } /// Returns whether it contains any entry. - pub fn is_empty(&self) -> bool { + pub fn _is_empty(&self) -> bool { self.0.is_empty() } diff --git a/nemo/src/chase_model/components/aggregate.rs b/nemo/src/chase_model/components/aggregate.rs index 1e9667cbe..24329aab1 100644 --- a/nemo/src/chase_model/components/aggregate.rs +++ b/nemo/src/chase_model/components/aggregate.rs @@ -1,9 +1,10 @@ //! This module defines [ChaseAggregate]. -use std::collections::HashSet; - use crate::rule_model::{ - components::term::{aggregate::AggregateKind, primitive::variable::Variable}, + components::{ + term::{aggregate::AggregateKind, primitive::variable::Variable}, + IterableVariables, + }, origin::Origin, }; @@ -32,7 +33,7 @@ pub(crate) struct ChaseAggregate { /// Distinct variables distinct_variables: Vec, /// Group-by variables - group_by_variables: HashSet, + group_by_variables: Vec, } impl ChaseAggregate { @@ -43,7 +44,7 @@ impl ChaseAggregate { input_variable: Variable, output_variable: Variable, distinct_variables: Vec, - group_by_variables: HashSet, + group_by_variables: Vec, ) -> Self { Self { origin, @@ -71,7 +72,7 @@ impl ChaseAggregate { } /// Return the group by variable. - pub fn group_by_variables(&self) -> &HashSet { + pub fn group_by_variables(&self) -> &Vec { &self.group_by_variables } @@ -94,3 +95,33 @@ impl ChaseComponent for ChaseAggregate { self } } + +impl IterableVariables for ChaseAggregate { + fn variables<'a>(&'a self) -> Box + 'a> { + let input_variables = Some(&self.input_variable).into_iter(); + let output_variables = Some(&self.output_variable).into_iter(); + let distinct_variables = self.distinct_variables.iter(); + let group_by_variables = self.group_by_variables.iter(); + + Box::new( + input_variables + .chain(output_variables) + .chain(distinct_variables) + .chain(group_by_variables), + ) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + let input_variables = Some(&mut self.input_variable).into_iter(); + let output_variables = Some(&mut self.output_variable).into_iter(); + let distinct_variables = self.distinct_variables.iter_mut(); + let group_by_variables = self.group_by_variables.iter_mut(); + + Box::new( + input_variables + .chain(output_variables) + .chain(distinct_variables) + .chain(group_by_variables), + ) + } +} diff --git a/nemo/src/chase_model/components/atom.rs b/nemo/src/chase_model/components/atom.rs index e30eba2ef..3fc95f4dc 100644 --- a/nemo/src/chase_model/components/atom.rs +++ b/nemo/src/chase_model/components/atom.rs @@ -4,12 +4,14 @@ pub(crate) mod ground_atom; pub(crate) mod primitive_atom; pub(crate) mod variable_atom; +use std::fmt::Display; + use crate::rule_model::components::{tag::Tag, IterableVariables}; use super::ChaseComponent; /// Tagged list of terms. -pub(crate) trait ChaseAtom: ChaseComponent + IterableVariables { +pub(crate) trait ChaseAtom: ChaseComponent + IterableVariables + Display { /// Type of the terms within the atom. type TypeTerm; diff --git a/nemo/src/chase_model/components/atom/ground_atom.rs b/nemo/src/chase_model/components/atom/ground_atom.rs index f04e6ca77..74b9c93a9 100644 --- a/nemo/src/chase_model/components/atom/ground_atom.rs +++ b/nemo/src/chase_model/components/atom/ground_atom.rs @@ -1,24 +1,32 @@ //! This module defines [GroundAtom]. +use std::fmt::Display; + use nemo_physical::datavalues::AnyDataValue; use crate::{ chase_model::components::ChaseComponent, rule_model::{ components::{ + atom::Atom, tag::Tag, - term::primitive::{ground::GroundTerm, variable::Variable}, - IterableVariables, + term::{ + primitive::{ground::GroundTerm, variable::Variable, Primitive}, + Term, + }, + IterableVariables, ProgramComponent, }, origin::Origin, }, + syntax, + util::seperated_list::DisplaySeperatedList, }; use super::ChaseAtom; /// An atom which may only use [GroundTerm]s #[derive(Debug, Clone)] -pub(crate) struct GroundAtom { +pub struct GroundAtom { /// Origin of this component origin: Origin, @@ -30,7 +38,7 @@ pub(crate) struct GroundAtom { impl GroundAtom { /// Construct a new [GroundAtom]. - pub(crate) fn new(predicate: Tag, terms: Vec) -> Self { + pub fn new(predicate: Tag, terms: Vec) -> Self { Self { origin: Origin::default(), predicate, @@ -39,7 +47,7 @@ impl GroundAtom { } /// Returns all [AnyDataValue]s used as constants in this atom - pub(crate) fn datavalues(&self) -> impl Iterator + '_ { + pub fn datavalues(&self) -> impl Iterator + '_ { self.terms().map(|term| term.value()) } } @@ -60,6 +68,22 @@ impl ChaseAtom for GroundAtom { } } +impl Display for GroundAtom { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let terms = DisplaySeperatedList::display( + self.terms(), + &format!("{} ", syntax::SEQUENCE_SEPARATOR), + ); + let predicate = self.predicate(); + + f.write_str(&format!( + "{predicate}{}{terms}{}", + syntax::expression::atom::OPEN, + syntax::expression::atom::CLOSE + )) + } +} + impl IterableVariables for GroundAtom { fn variables<'a>(&'a self) -> Box + 'a> { Box::new(std::iter::empty()) @@ -83,3 +107,33 @@ impl ChaseComponent for GroundAtom { self } } + +/// Error struct for converting logical atoms to [GroundAtom]s +#[derive(Debug, Clone, Copy)] +pub struct GroundAtomConversionError; + +impl Display for GroundAtomConversionError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("atom contains non-ground terms") + } +} + +impl TryFrom for GroundAtom { + type Error = GroundAtomConversionError; + + fn try_from(value: Atom) -> Result { + let origin = value.origin().clone(); + let predicate = value.predicate(); + let mut terms = Vec::new(); + + for term in value.arguments().cloned() { + if let Term::Primitive(Primitive::Ground(ground_term)) = term { + terms.push(ground_term) + } else { + return Err(GroundAtomConversionError); + } + } + + Ok(Self::new(predicate, terms).set_origin(origin)) + } +} diff --git a/nemo/src/chase_model/components/atom/primitive_atom.rs b/nemo/src/chase_model/components/atom/primitive_atom.rs index 01cc3e406..728e5d763 100644 --- a/nemo/src/chase_model/components/atom/primitive_atom.rs +++ b/nemo/src/chase_model/components/atom/primitive_atom.rs @@ -1,17 +1,23 @@ //! This module defines [PrimitiveAtom]. -use nemo_physical::datavalues::AnyDataValue; +use std::fmt::Display; use crate::{ chase_model::components::ChaseComponent, rule_model::{ components::{ + atom::Atom, tag::Tag, - term::primitive::{variable::Variable, Primitive}, - IterableVariables, + term::{ + primitive::{variable::Variable, Primitive}, + Term, + }, + IterablePrimitives, IterableVariables, ProgramComponent, }, origin::Origin, }, + syntax, + util::seperated_list::DisplaySeperatedList, }; use super::ChaseAtom; @@ -37,14 +43,6 @@ impl PrimitiveAtom { terms, } } - - /// Returns all [AnyDataValue]s used as constants in this atom. - pub(crate) fn datavalues(&self) -> impl Iterator + '_ { - self.terms.iter().filter_map(|term| match term { - Primitive::Ground(ground) => Some(ground.value().clone()), - Primitive::Variable(_) => None, - }) - } } impl ChaseAtom for PrimitiveAtom { @@ -63,6 +61,22 @@ impl ChaseAtom for PrimitiveAtom { } } +impl Display for PrimitiveAtom { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let terms = DisplaySeperatedList::display( + self.terms(), + &format!("{} ", syntax::SEQUENCE_SEPARATOR), + ); + let predicate = self.predicate(); + + f.write_str(&format!( + "{predicate}{}{terms}{}", + syntax::expression::atom::OPEN, + syntax::expression::atom::CLOSE + )) + } +} + impl IterableVariables for PrimitiveAtom { fn variables<'a>(&'a self) -> Box + 'a> { Box::new(self.terms().filter_map(|term| match term { @@ -79,6 +93,16 @@ impl IterableVariables for PrimitiveAtom { } } +impl IterablePrimitives for PrimitiveAtom { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms.iter()) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new(self.terms.iter_mut()) + } +} + impl ChaseComponent for PrimitiveAtom { fn origin(&self) -> &Origin { &self.origin @@ -92,3 +116,33 @@ impl ChaseComponent for PrimitiveAtom { self } } + +/// Error struct for converting logical atoms to [PrimitiveAtom]s +#[derive(Debug)] +pub(crate) struct PrimitiveAtomConversionError; + +impl Display for PrimitiveAtomConversionError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("atom contains non-primitive terms") + } +} + +impl TryFrom for PrimitiveAtom { + type Error = PrimitiveAtomConversionError; + + fn try_from(value: Atom) -> Result { + let origin = value.origin().clone(); + let predicate = value.predicate(); + let mut terms = Vec::new(); + + for term in value.arguments().cloned() { + if let Term::Primitive(primitive_term) = term { + terms.push(primitive_term) + } else { + return Err(PrimitiveAtomConversionError); + } + } + + Ok(Self::new(predicate, terms).set_origin(origin)) + } +} diff --git a/nemo/src/chase_model/components/atom/variable_atom.rs b/nemo/src/chase_model/components/atom/variable_atom.rs index 01c33e8b7..a9e5ce55c 100644 --- a/nemo/src/chase_model/components/atom/variable_atom.rs +++ b/nemo/src/chase_model/components/atom/variable_atom.rs @@ -1,11 +1,23 @@ //! This module defines [VariableAtom]. +use std::fmt::Display; + use crate::{ chase_model::components::ChaseComponent, rule_model::{ - components::{tag::Tag, term::primitive::variable::Variable, IterableVariables}, + components::{ + atom::Atom, + tag::Tag, + term::{ + primitive::{variable::Variable, Primitive}, + Term, + }, + IterableVariables, ProgramComponent, + }, origin::Origin, }, + syntax, + util::seperated_list::DisplaySeperatedList, }; use super::ChaseAtom; @@ -33,6 +45,22 @@ impl VariableAtom { } } +impl Display for VariableAtom { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let terms = DisplaySeperatedList::display( + self.terms(), + &format!("{} ", syntax::SEQUENCE_SEPARATOR), + ); + let predicate = self.predicate(); + + f.write_str(&format!( + "{predicate}{}{terms}{}", + syntax::expression::atom::OPEN, + syntax::expression::atom::CLOSE + )) + } +} + impl ChaseAtom for VariableAtom { type TypeTerm = Variable; @@ -72,3 +100,33 @@ impl ChaseComponent for VariableAtom { self } } + +/// Error struct for converting logical atoms to [VariableAtom]s +#[derive(Debug)] +pub(crate) struct VariableAtomConversionError; + +impl Display for VariableAtomConversionError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("atom contains non-variable terms") + } +} + +impl TryFrom for VariableAtom { + type Error = VariableAtomConversionError; + + fn try_from(value: Atom) -> Result { + let origin = value.origin().clone(); + let predicate = value.predicate(); + let mut terms = Vec::new(); + + for term in value.arguments().cloned() { + if let Term::Primitive(Primitive::Variable(variable)) = term { + terms.push(variable) + } else { + return Err(VariableAtomConversionError); + } + } + + Ok(Self::new(predicate, terms).set_origin(origin)) + } +} diff --git a/nemo/src/chase_model/components/export.rs b/nemo/src/chase_model/components/export.rs index 93f06f96c..d771f9f7a 100644 --- a/nemo/src/chase_model/components/export.rs +++ b/nemo/src/chase_model/components/export.rs @@ -35,7 +35,7 @@ impl ChaseExport { } /// Return the handler. - pub(crate) fn handler(&self) -> &Box { + pub(crate) fn _handler(&self) -> &Box { &self.handler } diff --git a/nemo/src/chase_model/components/filter.rs b/nemo/src/chase_model/components/filter.rs index 08365264d..378d24b12 100644 --- a/nemo/src/chase_model/components/filter.rs +++ b/nemo/src/chase_model/components/filter.rs @@ -1,6 +1,12 @@ //! This module defines [ChaseFilter]. -use crate::rule_model::origin::Origin; +use crate::rule_model::{ + components::{ + term::primitive::{variable::Variable, Primitive}, + IterablePrimitives, IterableVariables, + }, + origin::Origin, +}; use super::{term::operation_term::OperationTerm, ChaseComponent}; @@ -44,3 +50,23 @@ impl ChaseComponent for ChaseFilter { self } } + +impl IterableVariables for ChaseFilter { + fn variables<'a>(&'a self) -> Box + 'a> { + self.filter.variables() + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + self.filter.variables_mut() + } +} + +impl IterablePrimitives for ChaseFilter { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + self.filter.primitive_terms() + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + self.filter.primitive_terms_mut() + } +} diff --git a/nemo/src/chase_model/components/operation.rs b/nemo/src/chase_model/components/operation.rs index b87da449d..d7a76e5e4 100644 --- a/nemo/src/chase_model/components/operation.rs +++ b/nemo/src/chase_model/components/operation.rs @@ -1,6 +1,12 @@ //! This module defines [ChaseOperation]. -use crate::rule_model::{components::term::primitive::variable::Variable, origin::Origin}; +use crate::rule_model::{ + components::{ + term::primitive::{variable::Variable, Primitive}, + IterablePrimitives, IterableVariables, + }, + origin::Origin, +}; use super::{term::operation_term::OperationTerm, ChaseComponent}; @@ -52,3 +58,31 @@ impl ChaseComponent for ChaseOperation { self } } + +impl IterableVariables for ChaseOperation { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new( + Some(self.variable()) + .into_iter() + .chain(self.operation.variables()), + ) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + Some(&mut self.output_variable) + .into_iter() + .chain(self.operation.variables_mut()), + ) + } +} + +impl IterablePrimitives for ChaseOperation { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + self.operation.primitive_terms() + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + self.operation.primitive_terms_mut() + } +} diff --git a/nemo/src/chase_model/components/program.rs b/nemo/src/chase_model/components/program.rs index 89acf2540..739d9017b 100644 --- a/nemo/src/chase_model/components/program.rs +++ b/nemo/src/chase_model/components/program.rs @@ -2,7 +2,9 @@ use std::collections::HashSet; -use crate::rule_model::components::tag::Tag; +use nemo_physical::datavalues::AnyDataValue; + +use crate::rule_model::components::{tag::Tag, term::primitive::Primitive, IterablePrimitives}; use super::{ atom::{ground_atom::GroundAtom, ChaseAtom}, @@ -11,7 +13,7 @@ use super::{ rule::ChaseRule, }; -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub(crate) struct ChaseProgram { /// Imports imports: Vec, @@ -26,23 +28,6 @@ pub(crate) struct ChaseProgram { } impl ChaseProgram { - /// Create a new [ChaseProgram]. - pub(crate) fn new( - imports: Vec, - exports: Vec, - rules: Vec, - facts: Vec, - output_predicates: Vec, - ) -> Self { - Self { - imports, - exports, - rules, - facts, - output_predicates, - } - } - /// Add a new rule to the program. pub(crate) fn add_rule(&mut self, rule: ChaseRule) { self.rules.push(rule) @@ -91,7 +76,7 @@ impl ChaseProgram { } /// Return a list of output predicates contained in this program. - pub(crate) fn output_predicates(&self) -> &Vec { + pub(crate) fn _output_predicates(&self) -> &Vec { &self.output_predicates } } @@ -105,4 +90,19 @@ impl ChaseProgram { .map(|atom| atom.predicate()) .collect() } + + /// Return an iterator over all [AnyDataValue]s contained in this program. + pub fn datavalues(&self) -> impl Iterator + '_ { + let datavalues_rules = self + .rules + .iter() + .flat_map(|rule| rule.primitive_terms()) + .filter_map(|primitive| match primitive { + Primitive::Variable(_) => None, + Primitive::Ground(ground) => Some(ground.value()), + }); + let datavalues_facts = self.facts.iter().flat_map(|fact| fact.datavalues()); + + datavalues_facts.chain(datavalues_rules) + } } diff --git a/nemo/src/chase_model/components/rule.rs b/nemo/src/chase_model/components/rule.rs index 60a42c5ef..6ffc0a04b 100644 --- a/nemo/src/chase_model/components/rule.rs +++ b/nemo/src/chase_model/components/rule.rs @@ -1,6 +1,9 @@ //! This module defines [ChaseRule]. -use crate::rule_model::origin::Origin; +use crate::rule_model::{ + components::{term::primitive::variable::Variable, IterablePrimitives, IterableVariables}, + origin::Origin, +}; use super::{ aggregate::ChaseAggregate, @@ -54,7 +57,7 @@ struct ChaseRuleHead { /// Representation of a rule in a [ChaseProgram][super::program::ChaseProgram] #[allow(dead_code)] #[derive(Debug, Default, Clone)] -pub(crate) struct ChaseRule { +pub struct ChaseRule { /// Origin of this component origin: Origin, @@ -70,6 +73,7 @@ pub(crate) struct ChaseRule { impl ChaseRule { /// Create a simple positive rule. + #[cfg(test)] pub(crate) fn positive_rule( head: Vec, body: Vec, @@ -158,11 +162,6 @@ impl ChaseRule { self.negative.filters.push(Vec::default()) } - /// Add a filter to the negative part of the body. - pub(crate) fn add_negative_filter(&mut self, atom_index: usize, filter: ChaseFilter) { - self.negative.filters[atom_index].push(filter) - } - /// Add a filter to the negative part of the body. /// /// # Panics @@ -187,7 +186,7 @@ impl ChaseRule { } /// Add a new filter that uses the result of aggregation. - pub(crate) fn add_aggregation_filter(&mut self, filter: ChaseFilter) { + pub(crate) fn _add_aggregation_filter(&mut self, filter: ChaseFilter) { self.aggregation.filters.push(filter); } @@ -210,3 +209,169 @@ impl ChaseComponent for ChaseRule { self } } + +impl IterableVariables for ChaseRule { + fn variables<'a>(&'a self) -> Box + 'a> { + let head_variables = self.head().iter().flat_map(|atom| atom.variables()); + let positive_body_variables = self + .positive_body() + .iter() + .flat_map(|atom| atom.variables()); + let positive_operation_variables = self + .positive_operations() + .iter() + .flat_map(|operation| operation.variables()); + let positive_filter_variables = self + .positive_filters() + .iter() + .flat_map(|filter| filter.variables()); + + let negative_body_variables = self + .negative_body() + .iter() + .flat_map(|atom| atom.variables()); + let negative_filter_variables = self + .negative_filters() + .iter() + .flatten() + .flat_map(|filter| filter.variables()); + + let aggregation_variables = self + .aggregate() + .into_iter() + .flat_map(|aggregate| aggregate.variables()); + let aggregation_operation_variables = self + .aggregate_operations() + .iter() + .flat_map(|operation| operation.variables()); + let aggregation_filter_variables = self + .aggregate_filters() + .iter() + .flat_map(|filter| filter.variables()); + + Box::new( + head_variables + .chain(positive_body_variables) + .chain(positive_operation_variables) + .chain(positive_filter_variables) + .chain(negative_body_variables) + .chain(negative_filter_variables) + .chain(aggregation_variables) + .chain(aggregation_operation_variables) + .chain(aggregation_filter_variables), + ) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + let head_variables = self + .head + .atoms + .iter_mut() + .flat_map(|atom| atom.variables_mut()); + let positive_body_variables = self + .positive + .atoms + .iter_mut() + .flat_map(|atom| atom.variables_mut()); + let positive_operation_variables = self + .positive + .operations + .iter_mut() + .flat_map(|operation| operation.variables_mut()); + let positive_filter_variables = self + .positive + .filters + .iter_mut() + .flat_map(|filter| filter.variables_mut()); + + let negative_body_variables = self + .negative + .atoms + .iter_mut() + .flat_map(|atom| atom.variables_mut()); + let negative_filter_variables = self + .negative + .filters + .iter_mut() + .flatten() + .flat_map(|filter| filter.variables_mut()); + + let aggregation_variables = self + .aggregation + .aggregate + .as_mut() + .into_iter() + .flat_map(|aggregate| aggregate.variables_mut()); + let aggregation_operation_variables = self + .aggregation + .operations + .iter_mut() + .flat_map(|operation| operation.variables_mut()); + let aggregation_filter_variables = self + .aggregation + .filters + .iter_mut() + .flat_map(|filter| filter.variables_mut()); + + Box::new( + head_variables + .chain(positive_body_variables) + .chain(positive_operation_variables) + .chain(positive_filter_variables) + .chain(negative_body_variables) + .chain(negative_filter_variables) + .chain(aggregation_variables) + .chain(aggregation_operation_variables) + .chain(aggregation_filter_variables), + ) + } +} + +impl IterablePrimitives for ChaseRule { + fn primitive_terms<'a>( + &'a self, + ) -> Box + 'a> + { + let head_terms = self.head().iter().flat_map(|atom| atom.primitive_terms()); + let positive_operation_terms = self + .positive_operations() + .iter() + .flat_map(|operation| operation.primitive_terms()); + let positive_filter_terms = self + .positive_filters() + .iter() + .flat_map(|filter| filter.primitive_terms()); + + let negative_filter_terms = self + .negative_filters() + .iter() + .flatten() + .flat_map(|filter| filter.primitive_terms()); + + let aggregation_operation_terms = self + .aggregate_operations() + .iter() + .flat_map(|operation| operation.primitive_terms()); + let aggregation_filter_terms = self + .aggregate_filters() + .iter() + .flat_map(|filter| filter.primitive_terms()); + + Box::new( + head_terms + .chain(positive_operation_terms) + .chain(positive_filter_terms) + .chain(negative_filter_terms) + .chain(aggregation_operation_terms) + .chain(aggregation_filter_terms), + ) + } + + fn primitive_terms_mut<'a>( + &'a mut self, + ) -> Box< + dyn Iterator + 'a, + > { + todo!() + } +} diff --git a/nemo/src/chase_model/components/term/operation_term.rs b/nemo/src/chase_model/components/term/operation_term.rs index 4b41971a9..0941152a3 100644 --- a/nemo/src/chase_model/components/term/operation_term.rs +++ b/nemo/src/chase_model/components/term/operation_term.rs @@ -3,7 +3,13 @@ use crate::{ chase_model::components::ChaseComponent, rule_model::{ - components::term::{operation::operation_kind::OperationKind, primitive::Primitive}, + components::{ + term::{ + operation::operation_kind::OperationKind, + primitive::{variable::Variable, Primitive}, + }, + IterablePrimitives, IterableVariables, + }, origin::Origin, }, }; @@ -32,6 +38,16 @@ impl Operation { subterms, } } + + /// Return the kind of operation. + pub(crate) fn operation_kind(&self) -> OperationKind { + self.kind + } + + /// Return the list of subterms. + pub(crate) fn subterms(&self) -> &Vec { + &self.subterms + } } impl ChaseComponent for Operation { @@ -48,9 +64,69 @@ impl ChaseComponent for Operation { } } +impl IterableVariables for Operation { + fn variables<'a>(&'a self) -> Box + 'a> { + Box::new(self.subterms.iter().flat_map(|term| term.variables())) + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.subterms + .iter_mut() + .flat_map(|term| term.variables_mut()), + ) + } +} + +impl IterablePrimitives for Operation { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + Box::new(self.subterms.iter().flat_map(|term| term.primitive_terms())) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.subterms + .iter_mut() + .flat_map(|term| term.primitive_terms_mut()), + ) + } +} + /// Term that can be evaluated #[derive(Debug, Clone)] pub(crate) enum OperationTerm { Primitive(Primitive), Operation(Operation), } + +impl IterableVariables for OperationTerm { + fn variables<'a>(&'a self) -> Box + 'a> { + match self { + OperationTerm::Primitive(primitive) => primitive.variables(), + OperationTerm::Operation(operation) => operation.variables(), + } + } + + fn variables_mut<'a>(&'a mut self) -> Box + 'a> { + match self { + OperationTerm::Primitive(primitive) => primitive.variables_mut(), + OperationTerm::Operation(operation) => operation.variables_mut(), + } + } +} + +impl IterablePrimitives for OperationTerm { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + match self { + OperationTerm::Primitive(primitive) => Box::new(Some(primitive).into_iter()), + OperationTerm::Operation(operation) => operation.primitive_terms(), + } + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + match self { + OperationTerm::Primitive(primitive) => Box::new(Some(primitive).into_iter()), + OperationTerm::Operation(operation) => operation.primitive_terms_mut(), + } + } +} diff --git a/nemo/src/chase_model/translation.rs b/nemo/src/chase_model/translation.rs index 0f91016ec..d8ab3cef3 100644 --- a/nemo/src/chase_model/translation.rs +++ b/nemo/src/chase_model/translation.rs @@ -7,14 +7,19 @@ pub(crate) mod import; pub(crate) mod operation; pub(crate) mod rule; -use crate::rule_model::program::Program; +use std::collections::HashMap; + +use crate::rule_model::{components::tag::Tag, program::Program}; use super::components::program::ChaseProgram; /// Object for translating a [Program] into a [ChaseProgram] #[derive(Debug)] pub(crate) struct ProgramChaseTranslation { + /// Counter for generating ids for fresh variables fresh_variable_counter: usize, + /// Map associating each predicate with its arity + predicate_arity: HashMap, } impl ProgramChaseTranslation { @@ -22,6 +27,7 @@ impl ProgramChaseTranslation { pub fn new() -> Self { Self { fresh_variable_counter: 0, + predicate_arity: HashMap::default(), } } @@ -41,6 +47,28 @@ impl ProgramChaseTranslation { result.add_output_predicate(output.predicate().clone()); } + for import in program.imports() { + if let Some(arity) = import.expected_arity() { + self.predicate_arity + .insert(import.predicate().clone(), arity); + } + } + + for export in program.exports() { + if let Some(arity) = export.expected_arity() { + self.predicate_arity + .insert(export.predicate().clone(), arity); + } + } + + for import in program.imports() { + result.add_import(self.build_import(import)); + } + + for export in program.exports() { + result.add_export(self.build_export(export)); + } + result } diff --git a/nemo/src/chase_model/translation/aggregate.rs b/nemo/src/chase_model/translation/aggregate.rs index ce94a3fbd..04b8e57b2 100644 --- a/nemo/src/chase_model/translation/aggregate.rs +++ b/nemo/src/chase_model/translation/aggregate.rs @@ -1,7 +1,5 @@ //! This module contains functions for creating [ChaseAggregate]s. -use std::collections::HashSet; - use crate::{ chase_model::components::{ aggregate::ChaseAggregate, @@ -31,7 +29,7 @@ impl ProgramChaseTranslation { &mut self, result: &mut ChaseRule, aggregate: &crate::rule_model::components::term::aggregate::Aggregate, - group_by_variables: &HashSet, + group_by_variables: &Vec, ) -> ChaseAggregate { let origin = aggregate.origin().clone(); let kind = aggregate.aggregate_kind(); @@ -90,7 +88,7 @@ impl ProgramChaseTranslation { &mut self, result: &mut ChaseRule, operation: &crate::rule_model::components::term::operation::Operation, - group_by_variables: &HashSet, + group_by_variables: &Vec, chase_aggregate: &mut Option, ) -> OperationTerm { let origin = operation.origin().clone(); @@ -132,7 +130,7 @@ impl ProgramChaseTranslation { &mut self, result: &mut ChaseRule, operation: &crate::rule_model::components::term::operation::Operation, - group_by_variables: &HashSet, + group_by_variables: &Vec, output_variable: Variable, chase_aggregate: &mut Option, ) -> ChaseOperation { diff --git a/nemo/src/chase_model/translation/fact.rs b/nemo/src/chase_model/translation/fact.rs index 733661fcb..69363001d 100644 --- a/nemo/src/chase_model/translation/fact.rs +++ b/nemo/src/chase_model/translation/fact.rs @@ -38,6 +38,8 @@ impl ProgramChaseTranslation { } } + self.predicate_arity.insert(predicate.clone(), terms.len()); + GroundAtom::new(predicate, terms).set_origin(origin) } } diff --git a/nemo/src/chase_model/translation/import.rs b/nemo/src/chase_model/translation/import.rs index c823e2805..1840ea9d7 100644 --- a/nemo/src/chase_model/translation/import.rs +++ b/nemo/src/chase_model/translation/import.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use oxiri::Iri; use crate::{ - chase_model::components::{import::ChaseImport, ChaseComponent}, + chase_model::components::{export::ChaseExport, import::ChaseImport, ChaseComponent}, io::formats::{ dsv::{value_format::DsvValueFormats, DsvHandler}, json::JsonHandler, @@ -29,15 +29,57 @@ impl ProgramChaseTranslation { /// Build a [ChaseImport] from a given /// [ImportDirective][crate::rule_model::components::import_export::ImportDirective]. pub(crate) fn build_import( - predicate_arity: &HashMap, + &self, import: &crate::rule_model::components::import_export::ImportDirective, ) -> ChaseImport { let origin = import.origin().clone(); let predicate = import.predicate().clone(); - let arity = predicate_arity.get(&predicate).cloned(); let attributes = import.attributes(); + let file_format = import.file_format(); - let handler = match import.file_format() { + let handler = self.import_export_handler( + Direction::Import, + predicate.clone(), + attributes, + file_format, + ); + + ChaseImport::new(predicate, handler).set_origin(origin) + } + + /// Build a [ChaseExport] from a given + /// [ExportDirective][crate::rule_model::components::import_export::ExportDirective]. + pub(crate) fn build_export( + &self, + export: &crate::rule_model::components::import_export::ExportDirective, + ) -> ChaseExport { + let origin = export.origin().clone(); + let predicate = export.predicate().clone(); + let attributes = export.attributes(); + let file_format = export.file_format(); + + let handler = self.import_export_handler( + Direction::Import, + predicate.clone(), + attributes, + file_format, + ); + + ChaseExport::new(predicate, handler).set_origin(origin) + } + + /// Create a [ImportExportHandler]. + /// [ImportDirective][crate::rule_model::components::import_export::ImportDirective]. + fn import_export_handler( + &self, + direction: Direction, + predicate: Tag, + attributes: HashMap, + file_format: FileFormat, + ) -> Box { + let arity = self.predicate_arity.get(&predicate).cloned(); + + match file_format { FileFormat::CSV => { Self::build_dsv_handler(Direction::Import, Some(b','), arity, &attributes) } @@ -45,15 +87,23 @@ impl ProgramChaseTranslation { FileFormat::TSV => { Self::build_dsv_handler(Direction::Import, Some(b'\t'), arity, &attributes) } - FileFormat::JSON => todo!(), - FileFormat::NTriples => todo!(), - FileFormat::NQuads => todo!(), - FileFormat::Turtle => todo!(), - FileFormat::RDFXML => todo!(), - FileFormat::TriG => todo!(), - }; - - ChaseImport::new(predicate, handler).set_origin(origin) + FileFormat::JSON => Self::build_json_handler(&attributes), + FileFormat::NTriples => { + Self::build_rdf_handler(direction, RdfVariant::NTriples, arity, &attributes) + } + FileFormat::NQuads => { + Self::build_rdf_handler(direction, RdfVariant::NQuads, arity, &attributes) + } + FileFormat::Turtle => { + Self::build_rdf_handler(direction, RdfVariant::Turtle, arity, &attributes) + } + FileFormat::RDFXML => { + Self::build_rdf_handler(direction, RdfVariant::RDFXML, arity, &attributes) + } + FileFormat::TriG => { + Self::build_rdf_handler(direction, RdfVariant::TriG, arity, &attributes) + } + } } /// Read resource attribute and check compression. @@ -178,9 +228,10 @@ impl ProgramChaseTranslation { fn build_rdf_handler( direction: Direction, variant: RdfVariant, - arity: usize, + arity: Option, attributes: &HashMap, ) -> Box { + let arity = arity.expect("rdf types have known arity"); let (mut compression_format, resource) = Self::read_resource(attributes); if let Some(format) = Self::read_compression(attributes) { diff --git a/nemo/src/chase_model/translation/rule.rs b/nemo/src/chase_model/translation/rule.rs index ed9b8da90..c8845c200 100644 --- a/nemo/src/chase_model/translation/rule.rs +++ b/nemo/src/chase_model/translation/rule.rs @@ -48,17 +48,21 @@ impl ProgramChaseTranslation { match literal { Literal::Positive(atom) => { let (variable_atom, filters) = self.build_body_atom(atom); + result.add_positive_atom(variable_atom); for filter in filters { result.add_positive_filter(filter); } + self.predicate_arity.insert(atom.predicate(), atom.len()); } Literal::Negative(atom) => { let (variable_atom, filters) = self.build_body_atom(atom); + result.add_negative_atom(variable_atom); for filter in filters { result.add_negative_filter_last(filter); } + self.predicate_arity.insert(atom.predicate(), atom.len()); } Literal::Operation(_) => { // Will be handled below @@ -243,7 +247,7 @@ impl ProgramChaseTranslation { fn handle_head(&mut self, result: &mut ChaseRule, head: &Vec) { let mut chase_aggregate: Option = None; - for atom in head { + for (head_index, atom) in head.iter().enumerate() { let origin = atom.origin().clone(); let predicate = atom.predicate().clone(); let mut terms = Vec::new(); @@ -277,9 +281,14 @@ impl ProgramChaseTranslation { } _ => unreachable!("invalid program: rule head contains complex terms"), } + + if let Some(aggregate) = chase_aggregate.clone() { + result.add_aggregation(aggregate, head_index); + } } - result.add_head_atom(PrimitiveAtom::new(predicate, terms).set_origin(origin)) + self.predicate_arity.insert(predicate.clone(), terms.len()); + result.add_head_atom(PrimitiveAtom::new(predicate, terms).set_origin(origin)); } } @@ -290,7 +299,7 @@ impl ProgramChaseTranslation { fn compute_group_by_variables<'a>( terms: impl Iterator, current_index: usize, - ) -> HashSet { + ) -> Vec { let mut result = HashSet::new(); for (term_index, term) in terms.enumerate() { @@ -301,6 +310,6 @@ impl ProgramChaseTranslation { result.extend(term.variables().cloned()); } - result + result.into_iter().collect() } } diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index d84dae680..ace548598 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -10,14 +10,26 @@ use nemo_physical::{ }; use crate::{ + chase_model::{ + analysis::program_analysis::ProgramAnalysis, + components::{ + atom::{ground_atom::GroundAtom, ChaseAtom}, + program::ChaseProgram, + }, + translation::ProgramChaseTranslation, + }, error::Error, execution::{planning::plan_tracing::TracingStrategy, tracing::trace::TraceDerivation}, io::import_manager::ImportManager, - model::{ - chase_model::{ChaseAtom, ChaseFact, ChaseProgram}, - Fact, Identifier, PrimitiveTerm, Program, Variable, + rule_model::{ + components::{ + fact::Fact, + tag::Tag, + term::primitive::{ground::GroundTerm, variable::Variable, Primitive}, + }, + program::Program, + term_map::PrimitiveTermMap, }, - program_analysis::analysis::ProgramAnalysis, table_manager::{MemoryUsage, SubtableExecutionPlan, TableManager}, }; @@ -58,8 +70,8 @@ pub struct ExecutionEngine { input_manager: ImportManager, table_manager: TableManager, - predicate_fragmentation: HashMap, - predicate_last_union: HashMap, + predicate_fragmentation: HashMap, + predicate_last_union: HashMap, rule_infos: Vec, rule_history: Vec, @@ -69,9 +81,9 @@ pub struct ExecutionEngine { impl ExecutionEngine { /// Initialize [ExecutionEngine]. pub fn initialize(program: &Program, input_manager: ImportManager) -> Result { - let chase_program: ChaseProgram = program.clone().try_into()?; + let chase_program = ProgramChaseTranslation::new().translate(program.clone()); - let analysis = chase_program.analyze()?; + let analysis = chase_program.analyze(); let mut table_manager = TableManager::new(); Self::register_all_predicates(&mut table_manager, &analysis); @@ -112,8 +124,8 @@ impl ExecutionEngine { /// Add all constants appearing in the rules of the program to the dictionary. fn add_all_constants(table_manager: &mut TableManager, program: &ChaseProgram) { - for dv in program.all_datavalues() { - table_manager.dictionary_mut().add_datavalue(dv.clone()); + for value in program.datavalues() { + table_manager.dictionary_mut().add_datavalue(value); } } @@ -124,30 +136,29 @@ impl ExecutionEngine { input_manager: &ImportManager, program: &ChaseProgram, ) -> Result<(), Error> { - let mut predicate_to_sources = HashMap::>::new(); + let mut predicate_to_sources = HashMap::>::new(); // Add all the import specifications - for (import_predicate, import_handler) in program.imports() { - let import_arity = table_manager.arity(import_predicate); + for import in program.imports() { let table_source = TableSource::new( - input_manager.table_provider_from_handler(&**import_handler, import_arity)?, - import_arity, + input_manager.table_provider_from_handler(&**import.handler())?, + import.arity(), ); predicate_to_sources - .entry(import_predicate.clone()) + .entry(import.predicate().clone()) .or_default() .push(table_source); } // Add all the facts contained in the rule file as a source - let mut predicate_to_rows = HashMap::::new(); + let mut predicate_to_rows = HashMap::::new(); for fact in program.facts() { let table = predicate_to_rows .entry(fact.predicate()) .or_insert(SimpleTable::new(fact.arity())); - table.add_row(fact.terms().to_vec()); + table.add_row(fact.datavalues().collect()); } for (predicate, table) in predicate_to_rows.into_iter() { @@ -250,7 +261,7 @@ impl ExecutionEngine { /// Creates an [Iterator] over all facts of a predicate. pub fn predicate_rows( &mut self, - predicate: &Identifier, + predicate: &Tag, ) -> Result> + '_>, Error> { let Some(table_id) = self.table_manager.combine_predicate(predicate)? else { return Ok(None); @@ -261,14 +272,14 @@ impl ExecutionEngine { /// Returns the arity of the predicate if the predicate is known to the engine, /// and `None` otherwise. - pub fn predicate_arity(&self, predicate: &Identifier) -> Option { + pub fn predicate_arity(&self, predicate: &Tag) -> Option { self.analysis.all_predicates.get(predicate).copied() } /// Counts the facts of a single predicate. /// /// TODO: Currently only counting of in-memory facts is supported, see - pub fn count_facts_of_predicate(&self, predicate: &Identifier) -> Option { + pub fn count_facts_of_predicate(&self, predicate: &Tag) -> Option { self.table_manager.predicate_count_rows(predicate) } @@ -296,7 +307,7 @@ impl ExecutionEngine { &mut self, program: &ChaseProgram, trace: &mut ExecutionTrace, - fact: ChaseFact, + fact: GroundAtom, ) -> TraceFactHandle { let trace_handle = trace.register_fact(fact.clone()); @@ -307,7 +318,7 @@ impl ExecutionEngine { // Find the origin of the given fact let step = match self .table_manager - .find_table_row(&fact.predicate(), fact.terms()) + .find_table_row(&fact.predicate(), &fact.datavalues().collect::>()) { Some(s) => s, None => { @@ -341,15 +352,15 @@ impl ExecutionEngine { // Contains the head variable and the ground term it aligns with. let mut grounding = HashMap::::new(); - for (head_term, fact_term) in head_atom.terms().iter().zip(fact.terms().iter()) { + for (head_term, fact_term) in head_atom.terms().zip(fact.terms()) { match head_term { - PrimitiveTerm::GroundTerm(ground) => { + Primitive::Ground(ground) => { if ground != fact_term { compatible = false; break; } } - PrimitiveTerm::Variable(variable) => { + Primitive::Variable(variable) => { // Matching with existential variables should not produce any restrictions, // so we just consider universal variables here if variable.is_existential() { @@ -358,13 +369,13 @@ impl ExecutionEngine { match grounding.entry(variable.clone()) { Entry::Occupied(entry) => { - if entry.get() != fact_term { + if *entry.get() != fact_term.value() { compatible = false; break; } } Entry::Vacant(entry) => { - entry.insert(fact_term.clone()); + entry.insert(fact_term.value()); } } } @@ -404,16 +415,17 @@ impl ExecutionEngine { let next_fact_predicate = body_atom.predicate(); let next_fact_terms = body_atom .terms() - .iter() .map(|variable| { - variable_assignment - .get(variable) - .expect("Query must assign value to each variable.") - .clone() + GroundTerm::from( + variable_assignment + .get(variable) + .expect("Query must assign value to each variable.") + .clone(), + ) }) .collect::>(); - let next_fact = ChaseFact::new(next_fact_predicate, next_fact_terms); + let next_fact = GroundAtom::new(next_fact_predicate, next_fact_terms); let next_handle = self.trace_recursive(program, trace, next_fact); @@ -429,8 +441,13 @@ impl ExecutionEngine { continue; } - let rule_application = - TraceRuleApplication::new(rule_index, variable_assignment, head_index); + let rule_application = TraceRuleApplication::new( + rule_index, + PrimitiveTermMap::new(variable_assignment.into_iter().map( + |(variable, value)| (Primitive::from(variable), Primitive::from(value)), + )), + head_index, + ); let derivation = TraceDerivation::Derived(rule_application, subtraces); trace.update_status(trace_handle, TraceStatus::Success(derivation)); @@ -447,21 +464,22 @@ impl ExecutionEngine { /// Build an [ExecutionTrace] for a list of facts. /// Also returns a list containing a [TraceFactHandle] for each fact. + /// + /// TODO: Verify that Fact is ground pub fn trace( &mut self, program: Program, facts: Vec, ) -> (ExecutionTrace, Vec) { let mut trace = ExecutionTrace::new(program); + let chase_program = self.program.clone(); let mut handles = Vec::new(); for fact in facts { - let chase_fact = ChaseFact::from_flat_atom(&fact.0); - - let program = self.program().clone(); + let chase_fact = ProgramChaseTranslation::new().build_fact(&fact); - handles.push(self.trace_recursive(&program, &mut trace, chase_fact)); + handles.push(self.trace_recursive(&chase_program, &mut trace, chase_fact)); } (trace, handles) diff --git a/nemo/src/execution/planning/operations.rs b/nemo/src/execution/planning/operations.rs index 248939d7d..6f513a356 100644 --- a/nemo/src/execution/planning/operations.rs +++ b/nemo/src/execution/planning/operations.rs @@ -1,12 +1,11 @@ //! This module collects different helper functions //! for creating execution nodes for specific operations. -pub(super) mod term; - pub(crate) mod aggregate; pub(crate) mod append; pub(crate) mod filter; pub(crate) mod functions; pub(crate) mod join; pub(crate) mod negation; +pub(super) mod operation; pub(crate) mod union; diff --git a/nemo/src/execution/planning/operations/aggregate.rs b/nemo/src/execution/planning/operations/aggregate.rs index 73494b938..b4f3bc10a 100644 --- a/nemo/src/execution/planning/operations/aggregate.rs +++ b/nemo/src/execution/planning/operations/aggregate.rs @@ -7,7 +7,10 @@ use nemo_physical::{ tabular::operations::{aggregate::AggregateAssignment, OperationColumnMarker, OperationTable}, }; -use crate::{execution::rule_execution::VariableTranslation, model::chase_model::ChaseAggregate}; +use crate::{ + chase_model::components::aggregate::ChaseAggregate, + execution::rule_execution::VariableTranslation, +}; fn operations_tables( input: &OperationTable, @@ -47,14 +50,14 @@ pub(crate) fn node_aggregate( aggregate: &ChaseAggregate, ) -> ExecutionNodeRef { let aggregate_input_column = *variable_translation - .get(&aggregate.input_variable) + .get(aggregate.input_variable()) .expect("aggregated variable has to be known"); let aggregate_output_column = *variable_translation - .get(&aggregate.output_variable) + .get(aggregate.output_variable()) .expect("aggregate output has to be known"); let distinct_columns: Vec<_> = aggregate - .distinct_variables + .distinct_variables() .iter() .map(|variable| { *variable_translation @@ -64,7 +67,7 @@ pub(crate) fn node_aggregate( .collect(); let group_by_columns: Vec<_> = aggregate - .group_by_variables + .group_by_variables() .iter() .map(|variable| { *variable_translation @@ -88,7 +91,7 @@ pub(crate) fn node_aggregate( output_markers, input_node, AggregateAssignment { - aggregate_operation: aggregate.aggregate_operation, + aggregate_operation: aggregate.aggregate_kind().into(), distinct_columns, group_by_columns, aggregated_column: aggregate_input_column, diff --git a/nemo/src/execution/planning/operations/append.rs b/nemo/src/execution/planning/operations/append.rs index 9ba17acfa..2415afe3a 100644 --- a/nemo/src/execution/planning/operations/append.rs +++ b/nemo/src/execution/planning/operations/append.rs @@ -10,10 +10,13 @@ use nemo_physical::{ }; use crate::{ + chase_model::components::atom::{ + primitive_atom::PrimitiveAtom, variable_atom::VariableAtom, ChaseAtom, + }, execution::rule_execution::VariableTranslation, - model::{ - chase_model::{ChaseAtom, PrimitiveAtom, VariableAtom}, - PrimitiveTerm, Variable, + rule_model::components::{ + term::primitive::{variable::Variable, Primitive}, + IterableVariables, }, }; @@ -40,7 +43,7 @@ pub(crate) struct HeadInstruction { /// Given an atom, bring compute the corresponding [HeadInstruction]. /// TODO: This needs to be revised once the Type System on the logical layer has been implemented. pub(crate) fn head_instruction_from_atom(atom: &PrimitiveAtom) -> HeadInstruction { - let arity = atom.terms().len(); + let arity = atom.arity(); let mut reduced_terms = Vec::::with_capacity(arity); let mut append_instructions = Vec::>::new(); @@ -51,7 +54,7 @@ pub(crate) fn head_instruction_from_atom(atom: &PrimitiveAtom) -> HeadInstructio for term in atom.terms() { match term { - PrimitiveTerm::Variable(variable) => { + Primitive::Variable(variable) => { if !variable_set.insert(variable) { let instruction = AppendInstruction::Repeat(variable.clone()); current_append_vector.push(instruction); @@ -62,8 +65,8 @@ pub(crate) fn head_instruction_from_atom(atom: &PrimitiveAtom) -> HeadInstructio current_append_vector = append_instructions.last_mut().unwrap(); } } - PrimitiveTerm::GroundTerm(datavalue) => { - let instruction = AppendInstruction::Constant(datavalue.clone()); + Primitive::Ground(datavalue) => { + let instruction = AppendInstruction::Constant(datavalue.value()); current_append_vector.push(instruction); } } @@ -86,7 +89,7 @@ pub(crate) fn node_head_instruction( instruction: &HeadInstruction, ) -> ExecutionNodeRef { let project_markers = - variable_translation.operation_table(instruction.reduced_atom.get_variables().iter()); + variable_translation.operation_table(instruction.reduced_atom.variables()); let project_node = plan.projectreorder(project_markers.clone(), subnode); let mut append_markers = project_markers; diff --git a/nemo/src/execution/planning/operations/filter.rs b/nemo/src/execution/planning/operations/filter.rs index 5053ecde5..5a83752ed 100644 --- a/nemo/src/execution/planning/operations/filter.rs +++ b/nemo/src/execution/planning/operations/filter.rs @@ -2,43 +2,26 @@ //! which realizes a filter operation. use nemo_physical::{ - function::tree::FunctionTree, management::execution_plan::{ExecutionNodeRef, ExecutionPlan}, - tabular::operations::{Filters, OperationColumnMarker}, + tabular::operations::Filters, }; -use crate::{execution::rule_execution::VariableTranslation, model::Constraint}; - -use super::term::term_to_function_tree; - -fn constraint_to_tree( - translation: &VariableTranslation, - constraint: &Constraint, -) -> FunctionTree { - let (left_term, right_term) = constraint.terms(); - let left = term_to_function_tree(translation, left_term); - let right = term_to_function_tree(translation, right_term); +use crate::{ + chase_model::components::filter::ChaseFilter, execution::rule_execution::VariableTranslation, +}; - match constraint { - Constraint::Equals(_, _) => FunctionTree::equals(left, right), - Constraint::Unequals(_, _) => FunctionTree::unequals(left, right), - Constraint::LessThan(_, _) => FunctionTree::numeric_lessthan(left, right), - Constraint::GreaterThan(_, _) => FunctionTree::numeric_greaterthan(left, right), - Constraint::LessThanEq(_, _) => FunctionTree::numeric_lessthaneq(left, right), - Constraint::GreaterThanEq(_, _) => FunctionTree::numeric_greaterthaneq(left, right), - } -} +use super::operation::operation_term_to_function_tree; /// Calculate helper structures that define the filters that need to be applied. pub(crate) fn node_filter( plan: &mut ExecutionPlan, variable_translation: &VariableTranslation, subnode: ExecutionNodeRef, - constraints: &[Constraint], + chase_filters: &[ChaseFilter], ) -> ExecutionNodeRef { - let filters = constraints + let filters = chase_filters .iter() - .map(|constraint| constraint_to_tree(variable_translation, constraint)) + .map(|operation| operation_term_to_function_tree(variable_translation, operation.filter())) .collect::(); plan.filter(subnode, filters) diff --git a/nemo/src/execution/planning/operations/functions.rs b/nemo/src/execution/planning/operations/functions.rs index c6ea34647..94263fdb7 100644 --- a/nemo/src/execution/planning/operations/functions.rs +++ b/nemo/src/execution/planning/operations/functions.rs @@ -7,25 +7,29 @@ use nemo_physical::{ tabular::operations::FunctionAssignment, }; -use crate::{execution::rule_execution::VariableTranslation, model::chase_model::Constructor}; +use crate::{ + chase_model::components::operation::ChaseOperation, + execution::rule_execution::VariableTranslation, +}; -use super::term::term_to_function_tree; +use super::operation::operation_term_to_function_tree; /// Calculate helper structures that define the filters that need to be applied. pub(crate) fn node_functions( plan: &mut ExecutionPlan, variable_translation: &VariableTranslation, subnode: ExecutionNodeRef, - constructors: &[Constructor], + operations: &[ChaseOperation], ) -> ExecutionNodeRef { let mut output_markers = subnode.markers_cloned(); let mut assignments = FunctionAssignment::new(); - for constructor in constructors { + for operation in operations { let marker = *variable_translation - .get(constructor.variable()) + .get(operation.variable()) .expect("All variables are known"); - let function_tree = term_to_function_tree(variable_translation, constructor.term()); + let function_tree = + operation_term_to_function_tree(variable_translation, operation.operation()); assignments.insert(marker, function_tree); output_markers.push(marker); diff --git a/nemo/src/execution/planning/operations/join.rs b/nemo/src/execution/planning/operations/join.rs index dc08d221c..9a56b1d36 100644 --- a/nemo/src/execution/planning/operations/join.rs +++ b/nemo/src/execution/planning/operations/join.rs @@ -6,8 +6,9 @@ use nemo_physical::{ }; use crate::{ + chase_model::components::atom::{variable_atom::VariableAtom, ChaseAtom}, execution::rule_execution::VariableTranslation, - model::chase_model::{ChaseAtom, VariableAtom}, + rule_model::components::IterableVariables, table_manager::TableManager, }; @@ -56,7 +57,7 @@ pub(crate) fn node_join( // For every atom that did not receive any update since the last rule application take all available elements for atom in &side_atoms { - let atom_markers = variable_translation.operation_table(atom.get_variables().iter()); + let atom_markers = variable_translation.operation_table(atom.variables()); let subnode = subplan_union( plan, table_manager, @@ -70,7 +71,7 @@ pub(crate) fn node_join( // For every atom before the mid point we take all the tables until the current `rule_step` for &atom in main_atoms.iter().take(atom_index) { - let atom_markers = variable_translation.operation_table(atom.get_variables().iter()); + let atom_markers = variable_translation.operation_table(atom.variables()); let subnode = subplan_union( plan, table_manager, @@ -88,13 +89,13 @@ pub(crate) fn node_join( table_manager, &main_atoms[atom_index].predicate(), step_last_applied..current_step_number, - variable_translation.operation_table(main_atoms[atom_index].get_variables().iter()), + variable_translation.operation_table(main_atoms[atom_index].variables()), ); seminaive_node.add_subnode(midnode); // For every atom past the mid point we take only the old tables for atom in main_atoms.iter().skip(atom_index + 1) { - let atom_markers = variable_translation.operation_table(atom.get_variables().iter()); + let atom_markers = variable_translation.operation_table(atom.variables()); let subnode = subplan_union( plan, table_manager, diff --git a/nemo/src/execution/planning/operations/negation.rs b/nemo/src/execution/planning/operations/negation.rs index f6233f111..93b47c306 100644 --- a/nemo/src/execution/planning/operations/negation.rs +++ b/nemo/src/execution/planning/operations/negation.rs @@ -4,11 +4,11 @@ use nemo_physical::management::execution_plan::{ExecutionNodeRef, ExecutionPlan}; use crate::{ - execution::rule_execution::VariableTranslation, - model::{ - chase_model::{ChaseAtom, VariableAtom}, - Constraint, + chase_model::components::{ + atom::{variable_atom::VariableAtom, ChaseAtom}, + filter::ChaseFilter, }, + execution::rule_execution::VariableTranslation, table_manager::TableManager, }; @@ -22,13 +22,13 @@ pub(crate) fn node_negation( node_main: ExecutionNodeRef, current_step_number: usize, subtracted_atoms: &[VariableAtom], - subtracted_filters: &[Vec], + subtracted_filters: &[Vec], ) -> ExecutionNodeRef { let subtracted = subtracted_atoms .iter() .zip(subtracted_filters.iter()) .map(|(atom, constraints)| { - let subtract_markers = variable_translation.operation_table(atom.terms().iter()); + let subtract_markers = variable_translation.operation_table(atom.terms()); let node = subplan_union( plan, diff --git a/nemo/src/execution/planning/operations/operation.rs b/nemo/src/execution/planning/operations/operation.rs new file mode 100644 index 000000000..a7cd81520 --- /dev/null +++ b/nemo/src/execution/planning/operations/operation.rs @@ -0,0 +1,141 @@ +//! This module contains a helper function to translate [Term] into [FunctionTree] + +use nemo_physical::{function::tree::FunctionTree, tabular::operations::OperationColumnMarker}; + +use crate::{ + chase_model::components::term::operation_term::{Operation, OperationTerm}, + execution::rule_execution::VariableTranslation, + rule_model::components::term::{ + operation::operation_kind::OperationKind, primitive::Primitive, + }, +}; + +/// Helper function to translate a [OperationTerm] into a [FunctionTree]. +pub(super) fn operation_term_to_function_tree( + translation: &VariableTranslation, + operation_term: &OperationTerm, +) -> FunctionTree { + match operation_term { + OperationTerm::Primitive(primitive) => match &primitive { + Primitive::Ground(datavalue) => FunctionTree::constant(datavalue.value()), + Primitive::Variable(variable) => FunctionTree::reference( + *translation + .get(&variable) + .expect("Every variable must be known"), + ), + }, + OperationTerm::Operation(operation) => operation_to_function_tree(translation, operation), + } +} + +macro_rules! binary { + ($func:ident, $vec:ident) => {{ + // Get ownership of the last two elements of the vector. + let right = $vec + .pop() + .expect("expected at least two elements in the vector"); + let left = $vec + .pop() + .expect("expected at least two elements in the vector"); + + // Call the function with the two arguments. + FunctionTree::$func(left, right) + }}; +} + +macro_rules! unary { + ($func:ident, $vec:ident) => {{ + // Get ownership of the last two elements of the vector. + let sub = $vec + .pop() + .expect("expected at least two elements in the vector"); + + // Call the function with the two arguments. + FunctionTree::$func(sub) + }}; +} + +/// Helper function to translate a [Operation] into a [FunctionTree]. +pub(super) fn operation_to_function_tree( + translation: &VariableTranslation, + operation: &Operation, +) -> FunctionTree { + let mut sub = operation + .subterms() + .iter() + .map(|term| operation_term_to_function_tree(translation, term)) + .collect::>(); + + match operation.operation_kind() { + OperationKind::Equal => binary!(equals, sub), + OperationKind::Unequals => binary!(unequals, sub), + OperationKind::NumericSubtraction => binary!(numeric_subtraction, sub), + OperationKind::NumericDivision => binary!(numeric_division, sub), + OperationKind::NumericLogarithm => binary!(numeric_logarithm, sub), + OperationKind::NumericPower => binary!(numeric_power, sub), + OperationKind::NumericRemainder => binary!(numeric_remainder, sub), + OperationKind::NumericGreaterthaneq => binary!(numeric_greaterthaneq, sub), + OperationKind::NumericGreaterthan => binary!(numeric_greaterthan, sub), + OperationKind::NumericLessthaneq => binary!(numeric_lessthaneq, sub), + OperationKind::NumericLessthan => binary!(numeric_lessthan, sub), + OperationKind::StringCompare => binary!(string_compare, sub), + OperationKind::StringContains => binary!(string_contains, sub), + OperationKind::StringBefore => binary!(string_before, sub), + OperationKind::StringAfter => binary!(string_after, sub), + OperationKind::StringStarts => binary!(string_starts, sub), + OperationKind::StringEnds => binary!(string_ends, sub), + OperationKind::StringSubstring => { + if sub.len() == 2 { + let start = sub.pop().expect("length must be 2"); + let string = sub.pop().expect("length must be 2"); + + FunctionTree::string_substring(string, start) + } else { + let length = sub.pop().expect("length must be 3"); + let start = sub.pop().expect("length must be 3"); + let string = sub.pop().expect("length must be 3"); + + FunctionTree::string_substring_length(string, start, length) + } + } + OperationKind::BooleanNegation => unary!(boolean_negation, sub), + OperationKind::CastToDouble => unary!(casting_to_double, sub), + OperationKind::CastToFloat => unary!(casting_to_float, sub), + OperationKind::CastToInteger => unary!(casting_to_integer64, sub), + OperationKind::CanonicalString => unary!(canonical_string, sub), + OperationKind::CheckIsInteger => unary!(check_is_integer, sub), + OperationKind::CheckIsFloat => unary!(check_is_float, sub), + OperationKind::CheckIsDouble => unary!(check_is_double, sub), + OperationKind::CheckIsIri => unary!(check_is_iri, sub), + OperationKind::CheckIsNumeric => unary!(check_is_numeric, sub), + OperationKind::CheckIsNull => unary!(check_is_null, sub), + OperationKind::CheckIsString => unary!(check_is_string, sub), + OperationKind::Datatype => unary!(datatype, sub), + OperationKind::LanguageTag => unary!(languagetag, sub), + OperationKind::NumericAbsolute => unary!(numeric_absolute, sub), + OperationKind::NumericCosine => unary!(numeric_cosine, sub), + OperationKind::NumericCeil => unary!(numeric_ceil, sub), + OperationKind::NumericFloor => unary!(numeric_floor, sub), + OperationKind::NumericNegation => unary!(numeric_negation, sub), + OperationKind::NumericRound => unary!(numeric_round, sub), + OperationKind::NumericSine => unary!(numeric_sine, sub), + OperationKind::NumericSquareroot => unary!(numeric_squareroot, sub), + OperationKind::NumericTangent => unary!(numeric_tangent, sub), + OperationKind::StringLength => unary!(string_length, sub), + OperationKind::StringReverse => unary!(string_reverse, sub), + OperationKind::StringLowercase => unary!(string_lowercase, sub), + OperationKind::StringUppercase => unary!(string_uppercase, sub), + OperationKind::LexicalValue => unary!(lexical_value, sub), + OperationKind::NumericSum => FunctionTree::numeric_sum(sub), + OperationKind::NumericProduct => FunctionTree::numeric_product(sub), + OperationKind::BitAnd => FunctionTree::bit_and(sub), + OperationKind::BitOr => FunctionTree::bit_or(sub), + OperationKind::BitXor => FunctionTree::bit_xor(sub), + OperationKind::BooleanConjunction => FunctionTree::boolean_conjunction(sub), + OperationKind::BooleanDisjunction => FunctionTree::boolean_disjunction(sub), + OperationKind::NumericMinimum => FunctionTree::numeric_minimum(sub), + OperationKind::NumericMaximum => FunctionTree::numeric_maximum(sub), + OperationKind::NumericLukasiewicz => FunctionTree::numeric_lukasiewicz(sub), + OperationKind::StringConcatenation => FunctionTree::string_concatenation(sub), + } +} diff --git a/nemo/src/execution/planning/operations/term.rs b/nemo/src/execution/planning/operations/term.rs deleted file mode 100644 index 215f43f63..000000000 --- a/nemo/src/execution/planning/operations/term.rs +++ /dev/null @@ -1,147 +0,0 @@ -//! This module contains a helper function to translate [Term] into [FunctionTree] - -use nemo_physical::{function::tree::FunctionTree, tabular::operations::OperationColumnMarker}; - -use crate::{ - execution::rule_execution::VariableTranslation, - model::{ - BinaryOperation, NaryOperation, PrimitiveTerm, Term, TernaryOperation, UnaryOperation, - }, -}; - -/// Helper function to translate a [Term] into a [FunctionTree] -pub(super) fn term_to_function_tree( - translation: &VariableTranslation, - term: &Term, -) -> FunctionTree { - match term { - Term::Primitive(primitive) => match primitive { - PrimitiveTerm::GroundTerm(datavalue) => FunctionTree::constant(datavalue.clone()), - PrimitiveTerm::Variable(variable) => FunctionTree::reference( - *translation - .get(variable) - .expect("Every variable must be known"), - ), - }, - Term::Binary { - operation, - lhs, - rhs, - } => { - let left = term_to_function_tree(translation, lhs); - let right = term_to_function_tree(translation, rhs); - - match operation { - BinaryOperation::NumericAddition => FunctionTree::numeric_addition(left, right), - BinaryOperation::NumericSubtraction => { - FunctionTree::numeric_subtraction(left, right) - } - BinaryOperation::NumericMultiplication => { - FunctionTree::numeric_multiplication(left, right) - } - BinaryOperation::NumericDivision => FunctionTree::numeric_division(left, right), - BinaryOperation::NumericPower => FunctionTree::numeric_power(left, right), - BinaryOperation::NumericRemainder => FunctionTree::numeric_remainder(left, right), - BinaryOperation::NumericLogarithm => FunctionTree::numeric_logarithm(left, right), - BinaryOperation::StringCompare => FunctionTree::string_compare(left, right), - BinaryOperation::StringContains => FunctionTree::string_contains(left, right), - BinaryOperation::StringRegex => FunctionTree::string_regex(left, right), - BinaryOperation::StringSubstring => FunctionTree::string_subtstring(left, right), - BinaryOperation::Equal => FunctionTree::equals(left, right), - BinaryOperation::Unequals => FunctionTree::unequals(left, right), - BinaryOperation::NumericGreaterthan => { - FunctionTree::numeric_greaterthan(left, right) - } - BinaryOperation::NumericGreaterthaneq => { - FunctionTree::numeric_greaterthaneq(left, right) - } - BinaryOperation::NumericLessthan => FunctionTree::numeric_lessthan(left, right), - BinaryOperation::NumericLessthaneq => FunctionTree::numeric_lessthaneq(left, right), - BinaryOperation::StringBefore => FunctionTree::string_before(left, right), - BinaryOperation::StringAfter => FunctionTree::string_after(left, right), - BinaryOperation::StringStarts => FunctionTree::string_starts(left, right), - BinaryOperation::StringEnds => FunctionTree::string_ends(left, right), - } - } - Term::Unary(operation, subterm) => { - let sub = term_to_function_tree(translation, subterm); - - match operation { - UnaryOperation::BooleanNegation => FunctionTree::boolean_negation(sub), - UnaryOperation::CanonicalString => FunctionTree::canonical_string(sub), - UnaryOperation::NumericAbsolute => FunctionTree::numeric_absolute(sub), - UnaryOperation::NumericCosine => FunctionTree::numeric_cosine(sub), - UnaryOperation::NumericNegation => FunctionTree::numeric_negation(sub), - UnaryOperation::NumericSine => FunctionTree::numeric_sine(sub), - UnaryOperation::NumericSquareroot => FunctionTree::numeric_squareroot(sub), - UnaryOperation::NumericTangent => FunctionTree::numeric_tangent(sub), - UnaryOperation::StringLength => FunctionTree::string_length(sub), - UnaryOperation::StringReverse => FunctionTree::string_reverse(sub), - UnaryOperation::StringLowercase => FunctionTree::string_lowercase(sub), - UnaryOperation::StringUppercase => FunctionTree::string_uppercase(sub), - UnaryOperation::NumericCeil => FunctionTree::numeric_ceil(sub), - UnaryOperation::NumericFloor => FunctionTree::numeric_floor(sub), - UnaryOperation::NumericRound => FunctionTree::numeric_round(sub), - UnaryOperation::CheckIsInteger => FunctionTree::check_is_integer(sub), - UnaryOperation::CheckIsFloat => FunctionTree::check_is_float(sub), - UnaryOperation::CheckIsDouble => FunctionTree::check_is_double(sub), - UnaryOperation::CheckIsIri => FunctionTree::check_is_iri(sub), - UnaryOperation::CheckIsNumeric => FunctionTree::check_is_numeric(sub), - UnaryOperation::CheckIsNull => FunctionTree::check_is_null(sub), - UnaryOperation::CheckIsString => FunctionTree::check_is_string(sub), - UnaryOperation::Datatype => FunctionTree::datatype(sub), - UnaryOperation::LanguageTag => FunctionTree::languagetag(sub), - UnaryOperation::LexicalValue => FunctionTree::lexical_value(sub), - UnaryOperation::CastToInteger => FunctionTree::casting_to_integer64(sub), - UnaryOperation::CastToDouble => FunctionTree::casting_to_double(sub), - UnaryOperation::CastToFloat => FunctionTree::casting_to_float(sub), - } - } - Term::Aggregation(_) => unimplemented!("Aggregates are not implement yet"), - Term::Function(name, _) => unimplemented!( - "Function symbols are not supported yet. {} is not recognized as a builtin function.", - name - ), - Term::Ternary { - operation, - first, - second, - third, - } => { - let first = term_to_function_tree(translation, first); - let second = term_to_function_tree(translation, second); - let third = term_to_function_tree(translation, third); - - match operation { - TernaryOperation::StringSubstringLength => { - FunctionTree::string_subtstring_length(first, second, third) - } - } - } - Term::Nary { - operation, - parameters, - } => { - let parameters = parameters - .iter() - .map(|term| term_to_function_tree(translation, term)) - .collect::>(); - - match operation { - NaryOperation::StringConcatenation => { - FunctionTree::string_concatenation(parameters) - } - NaryOperation::BooleanConjunction => FunctionTree::boolean_conjunction(parameters), - NaryOperation::BooleanDisjunction => FunctionTree::boolean_disjunction(parameters), - NaryOperation::BitAnd => FunctionTree::bit_and(parameters), - NaryOperation::BitOr => FunctionTree::bit_or(parameters), - NaryOperation::BitXor => FunctionTree::bit_xor(parameters), - NaryOperation::NumericSum => FunctionTree::numeric_sum(parameters), - NaryOperation::NumericProduct => FunctionTree::numeric_product(parameters), - NaryOperation::NumericMinimum => FunctionTree::numeric_minimum(parameters), - NaryOperation::NumericMaximum => FunctionTree::numeric_maximum(parameters), - NaryOperation::NumericLukasiewicz => FunctionTree::numeric_lukasiewicz(parameters), - } - } - } -} diff --git a/nemo/src/execution/planning/operations/union.rs b/nemo/src/execution/planning/operations/union.rs index b9e3251c8..9d34b5b42 100644 --- a/nemo/src/execution/planning/operations/union.rs +++ b/nemo/src/execution/planning/operations/union.rs @@ -8,7 +8,7 @@ use nemo_physical::{ tabular::operations::OperationTable, }; -use crate::{model::Identifier, table_manager::TableManager}; +use crate::{rule_model::components::tag::Tag, table_manager::TableManager}; /// Given a predicate and a range of execution steps, /// adds to the given [ExecutionPlan] @@ -18,7 +18,7 @@ use crate::{model::Identifier, table_manager::TableManager}; pub(crate) fn subplan_union( plan: &mut ExecutionPlan, table_manager: &TableManager, - predicate: &Identifier, + predicate: &Tag, steps: Range, output_markers: OperationTable, ) -> ExecutionNodeRef { @@ -40,7 +40,7 @@ pub(crate) fn subplan_union( pub(super) fn _subplan_union_reordered( plan: &mut ExecutionPlan, table_manager: &TableManager, - predicate: &Identifier, + predicate: &Tag, steps: Range, output_markers: OperationTable, column_order: ColumnOrder, diff --git a/nemo/src/execution/planning/plan_aggregate.rs b/nemo/src/execution/planning/plan_aggregate.rs index 7e8d6bb0e..d3928234d 100644 --- a/nemo/src/execution/planning/plan_aggregate.rs +++ b/nemo/src/execution/planning/plan_aggregate.rs @@ -3,12 +3,14 @@ use nemo_physical::management::execution_plan::ExecutionNodeRef; use crate::{ - execution::rule_execution::VariableTranslation, - model::{ - chase_model::{ChaseAggregate, ChaseRule, Constructor}, - Constraint, + chase_model::{ + analysis::program_analysis::RuleAnalysis, + components::{ + aggregate::ChaseAggregate, filter::ChaseFilter, operation::ChaseOperation, + rule::ChaseRule, + }, }, - program_analysis::analysis::RuleAnalysis, + execution::rule_execution::VariableTranslation, table_manager::SubtableExecutionPlan, }; @@ -20,8 +22,8 @@ use super::operations::{ #[derive(Debug)] pub(crate) struct AggregateStategy { aggregate: ChaseAggregate, - aggregate_constructors: Vec, - aggregate_constraints: Vec, + aggregate_operation: Vec, + aggregate_filters: Vec, } impl AggregateStategy { @@ -30,10 +32,10 @@ impl AggregateStategy { Self { aggregate: rule .aggregate() - .clone() + .cloned() .expect("do not call this if there is no aggregate"), - aggregate_constructors: rule.aggregate_constructors().clone(), - aggregate_constraints: rule.aggregate_constraints().clone(), + aggregate_operation: rule.aggregate_operations().clone(), + aggregate_filters: rule.aggregate_filters().clone(), } } @@ -54,14 +56,14 @@ impl AggregateStategy { current_plan.plan_mut(), variable_translation, node_aggregation, - &self.aggregate_constructors, + &self.aggregate_operation, ); let node_result = node_filter( current_plan.plan_mut(), variable_translation, node_aggregate_functions, - &self.aggregate_constraints, + &self.aggregate_filters, ); current_plan.add_temporary_table(node_result.clone(), "Aggregation"); diff --git a/nemo/src/execution/planning/plan_body_seminaive.rs b/nemo/src/execution/planning/plan_body_seminaive.rs index e8c2e2c10..8b5262ebc 100644 --- a/nemo/src/execution/planning/plan_body_seminaive.rs +++ b/nemo/src/execution/planning/plan_body_seminaive.rs @@ -3,12 +3,14 @@ use nemo_physical::management::execution_plan::ExecutionNodeRef; use crate::{ - execution::{execution_engine::RuleInfo, rule_execution::VariableTranslation}, - model::{ - chase_model::{ChaseRule, Constructor, VariableAtom}, - Constraint, + chase_model::{ + analysis::{program_analysis::RuleAnalysis, variable_order::VariableOrder}, + components::{ + atom::variable_atom::VariableAtom, filter::ChaseFilter, operation::ChaseOperation, + rule::ChaseRule, + }, }, - program_analysis::{analysis::RuleAnalysis, variable_order::VariableOrder}, + execution::{execution_engine::RuleInfo, rule_execution::VariableTranslation}, table_manager::{SubtableExecutionPlan, TableManager}, }; @@ -23,11 +25,11 @@ use super::{ #[derive(Debug)] pub(crate) struct SeminaiveStrategy { positive_atoms: Vec, - positive_constraints: Vec, - positive_constructors: Vec, + positive_filters: Vec, + positive_operations: Vec, negative_atoms: Vec, - negative_constraints: Vec>, + negative_filters: Vec>, } impl SeminaiveStrategy { @@ -35,10 +37,10 @@ impl SeminaiveStrategy { pub(crate) fn initialize(rule: &ChaseRule, _analysis: &RuleAnalysis) -> Self { Self { positive_atoms: rule.positive_body().clone(), - positive_constraints: rule.positive_constraints().clone(), + positive_filters: rule.positive_filters().clone(), negative_atoms: rule.negative_body().clone(), - negative_constraints: rule.negative_constraints().clone(), - positive_constructors: rule.positive_constructors().clone(), + negative_filters: rule.negative_filters().clone(), + positive_operations: rule.positive_operations().clone(), } } } @@ -68,14 +70,14 @@ impl BodyStrategy for SeminaiveStrategy { current_plan.plan_mut(), variable_translation, node_join, - &self.positive_constructors, + &self.positive_operations, ); let node_body_filter = node_filter( current_plan.plan_mut(), variable_translation, node_body_functions, - &self.positive_constraints, + &self.positive_filters, ); let node_negation = node_negation( @@ -85,7 +87,7 @@ impl BodyStrategy for SeminaiveStrategy { node_body_filter, step_number, &self.negative_atoms, - &self.negative_constraints, + &self.negative_filters, ); let node_result = node_negation; diff --git a/nemo/src/execution/planning/plan_head_datalog.rs b/nemo/src/execution/planning/plan_head_datalog.rs index 403190561..c2d2aa1dd 100644 --- a/nemo/src/execution/planning/plan_head_datalog.rs +++ b/nemo/src/execution/planning/plan_head_datalog.rs @@ -9,12 +9,12 @@ use nemo_physical::{ }; use crate::{ - execution::{execution_engine::RuleInfo, rule_execution::VariableTranslation}, - model::{ - chase_model::{ChaseAtom, ChaseRule}, - Identifier, + chase_model::{ + analysis::program_analysis::RuleAnalysis, + components::{atom::ChaseAtom, rule::ChaseRule}, }, - program_analysis::analysis::RuleAnalysis, + execution::{execution_engine::RuleInfo, rule_execution::VariableTranslation}, + rule_model::components::tag::Tag, table_manager::{SubtableExecutionPlan, SubtableIdentifier, TableManager}, }; @@ -26,13 +26,13 @@ use super::{ /// Strategy for computing the results for a datalog (non-existential) rule. #[derive(Debug)] pub(crate) struct DatalogStrategy { - predicate_to_atoms: HashMap>, + predicate_to_atoms: HashMap>, } impl DatalogStrategy { /// Create a new [DatalogStrategy] object. pub(crate) fn initialize(rule: &ChaseRule, _analysis: &RuleAnalysis) -> Self { - let mut predicate_to_atoms = HashMap::>::new(); + let mut predicate_to_atoms = HashMap::>::new(); for (head_index, head_atom) in rule.head().iter().enumerate() { let is_aggregate_atom = if let Some(aggregate_index) = rule.aggregate_head_index() { diff --git a/nemo/src/execution/planning/plan_head_restricted.rs b/nemo/src/execution/planning/plan_head_restricted.rs index de6749a7e..e87df360f 100644 --- a/nemo/src/execution/planning/plan_head_restricted.rs +++ b/nemo/src/execution/planning/plan_head_restricted.rs @@ -12,6 +12,14 @@ use nemo_physical::{ }; use crate::{ + chase_model::{ + analysis::{program_analysis::RuleAnalysis, variable_order::VariableOrder}, + components::{ + atom::{variable_atom::VariableAtom, ChaseAtom}, + filter::ChaseFilter, + rule::ChaseRule, + }, + }, execution::{ execution_engine::RuleInfo, planning::operations::{ @@ -20,11 +28,10 @@ use crate::{ }, rule_execution::VariableTranslation, }, - model::{ - chase_model::{ChaseAtom, ChaseRule, VariableAtom}, - Constraint, Identifier, PrimitiveTerm, Variable, + rule_model::components::{ + tag::Tag, + term::primitive::{variable::Variable, Primitive}, }, - program_analysis::{analysis::RuleAnalysis, variable_order::VariableOrder}, table_manager::{SubtableExecutionPlan, SubtableIdentifier, TableManager}, }; @@ -39,17 +46,17 @@ pub(crate) struct RestrictedChaseStrategy { /// Atoms for computing the table "new satisfied matches" head_join_atoms: Vec, /// Constraints associated with computing the table "new satisfied matches" - head_join_constraints: Vec, + head_join_filters: Vec, - predicate_to_instructions: HashMap>, - predicate_to_full_existential: HashMap, + predicate_to_instructions: HashMap>, + predicate_to_full_existential: HashMap, /// The calculation of "new statified matches" is represented by an auxillary rule /// "head -> aux_predicate(frontier_variables)". /// This is the order of those variables aux_head_order: VariableOrder, /// This is the predicate of the auxillary table containing the "satisfied matches" - aux_predicate: Identifier, + aux_predicate: Tag, analysis: RuleAnalysis, } @@ -57,14 +64,14 @@ pub(crate) struct RestrictedChaseStrategy { impl RestrictedChaseStrategy { /// Create a new [RestrictedChaseStrategy] object. pub(crate) fn initialize(rule: &ChaseRule, analysis: &RuleAnalysis) -> Self { - let mut predicate_to_instructions = HashMap::>::new(); - let mut predicate_to_full_existential = HashMap::::new(); + let hash_map = HashMap::>::new(); + let mut predicate_to_instructions = hash_map; + let mut predicate_to_full_existential = HashMap::::new(); for head_atom in rule.head() { let is_existential = head_atom .terms() - .iter() - .any(|t| matches!(t, PrimitiveTerm::Variable(Variable::Existential(_)))); + .any(|t| matches!(t, Primitive::Variable(Variable::Existential(_)))); let instructions = predicate_to_instructions .entry(head_atom.predicate()) @@ -77,14 +84,14 @@ impl RestrictedChaseStrategy { *is_full_existential &= is_existential; } - let head_join_atoms = analysis.existential_aux_rule.positive_body().clone(); - let head_join_constraints = analysis.existential_aux_rule.positive_constraints().clone(); + let head_join_atoms = analysis.existential_aux_rule().positive_body().clone(); + let head_join_filters = analysis.existential_aux_rule().positive_filters().clone(); - let aux_head = &analysis.existential_aux_rule.head()[0]; + let aux_head = &analysis.existential_aux_rule().head()[0]; let mut aux_head_order = VariableOrder::new(); let mut used_join_head_variables = HashSet::::new(); for term in aux_head.terms() { - if let PrimitiveTerm::Variable(variable) = term { + if let Primitive::Variable(variable) = term { aux_head_order.push(variable.clone()); used_join_head_variables.insert(variable.clone()); } else { @@ -96,7 +103,7 @@ impl RestrictedChaseStrategy { RestrictedChaseStrategy { head_join_atoms, - head_join_constraints, + head_join_filters, predicate_to_instructions, predicate_to_full_existential, analysis: analysis.clone(), @@ -185,7 +192,7 @@ impl HeadStrategy for RestrictedChaseStrategy { current_plan.plan_mut(), variable_translation, node_new_satisfied_matches, - &self.head_join_constraints, + &self.head_join_filters, ); current_plan.add_temporary_table( diff --git a/nemo/src/execution/planning/plan_tracing.rs b/nemo/src/execution/planning/plan_tracing.rs index a857bf66e..070342e12 100644 --- a/nemo/src/execution/planning/plan_tracing.rs +++ b/nemo/src/execution/planning/plan_tracing.rs @@ -5,12 +5,24 @@ use std::collections::HashMap; use nemo_physical::{datavalues::AnyDataValue, management::execution_plan::ExecutionNodeRef}; use crate::{ + chase_model::{ + analysis::variable_order::VariableOrder, + components::{ + atom::variable_atom::VariableAtom, + filter::ChaseFilter, + rule::ChaseRule, + term::operation_term::{Operation, OperationTerm}, + }, + }, execution::rule_execution::VariableTranslation, - model::{ - chase_model::{ChaseRule, VariableAtom}, - Constraint, Identifier, PrimitiveTerm, Term, Variable, + rule_model::components::{ + tag::Tag, + term::{ + operation::operation_kind::OperationKind, + primitive::{variable::Variable, Primitive}, + }, + IterableVariables, }, - program_analysis::variable_order::VariableOrder, table_manager::{SubtableExecutionPlan, SubtableIdentifier, TableManager}, }; @@ -20,10 +32,10 @@ use super::operations::{filter::node_filter, join::node_join, negation::node_neg #[derive(Debug)] pub(crate) struct TracingStrategy { positive_atoms: Vec, - positive_constraints: Vec, + positive_filters: Vec, negative_atoms: Vec, - negatie_constraints: Vec>, + negatie_filters: Vec>, variable_translation: VariableTranslation, } @@ -32,37 +44,45 @@ impl TracingStrategy { /// Create new [TracingStrategy] object. pub(crate) fn initialize(rule: &ChaseRule, grounding: HashMap) -> Self { let mut variable_translation = VariableTranslation::new(); - for variable in rule.all_variables() { + for variable in rule.variables().cloned() { variable_translation.add_marker(variable); } - let mut positive_constraints = rule.positive_constraints().clone(); + let mut positive_filters = rule.positive_filters().clone(); - let constructors = rule - .positive_constructors() + let operations = rule + .positive_operations() .iter() - .map(|constructor| (constructor.variable().clone(), constructor.term().clone())) - .collect::>(); + .map(|operation| (operation.variable().clone(), operation.operation().clone())) + .collect::>(); for (variable, value) in grounding { - if let Some(term) = constructors.get(&variable) { - positive_constraints.push(Constraint::Equals( - term.clone(), - Term::Primitive(PrimitiveTerm::GroundTerm(value)), - )); + if let Some(term) = operations.get(&variable) { + let filter = ChaseFilter::new(OperationTerm::Operation(Operation::new( + OperationKind::Equal, + vec![ + OperationTerm::Primitive(Primitive::from(value)), + term.clone(), + ], + ))); + positive_filters.push(filter); } else { - positive_constraints.push(Constraint::Equals( - Term::Primitive(PrimitiveTerm::Variable(variable)), - Term::Primitive(PrimitiveTerm::GroundTerm(value)), - )); + let filter = ChaseFilter::new(OperationTerm::Operation(Operation::new( + OperationKind::Equal, + vec![ + OperationTerm::Primitive(Primitive::from(variable)), + OperationTerm::Primitive(Primitive::from(value)), + ], + ))); + positive_filters.push(filter); } } Self { positive_atoms: rule.positive_body().clone(), - positive_constraints, + positive_filters, negative_atoms: rule.negative_body().clone(), - negatie_constraints: rule.negative_constraints().clone(), + negatie_filters: rule.negative_filters().clone(), variable_translation, } } @@ -91,7 +111,7 @@ impl TracingStrategy { current_plan.plan_mut(), &self.variable_translation, node_join, - &self.positive_constraints, + &self.positive_filters, ); let node_negation = node_negation( @@ -101,14 +121,14 @@ impl TracingStrategy { node_filter, step_number, &self.negative_atoms, - &self.negatie_constraints, + &self.negatie_filters, ); current_plan.add_permanent_table( node_negation.clone(), "Tracing Query", "Tracing Query", - SubtableIdentifier::new(Identifier::new(String::from("_TRACING")), step_number), + SubtableIdentifier::new(Tag::new(String::from("_TRACING")), step_number), ); node_negation diff --git a/nemo/src/execution/planning/strategy_body.rs b/nemo/src/execution/planning/strategy_body.rs index d8de7f0ec..aded0d865 100644 --- a/nemo/src/execution/planning/strategy_body.rs +++ b/nemo/src/execution/planning/strategy_body.rs @@ -4,8 +4,8 @@ use nemo_physical::management::execution_plan::ExecutionNodeRef; use crate::{ + chase_model::analysis::variable_order::VariableOrder, execution::{execution_engine::RuleInfo, rule_execution::VariableTranslation}, - program_analysis::variable_order::VariableOrder, table_manager::{SubtableExecutionPlan, TableManager}, }; diff --git a/nemo/src/execution/rule_execution.rs b/nemo/src/execution/rule_execution.rs index 6f77b6283..634d0da87 100644 --- a/nemo/src/execution/rule_execution.rs +++ b/nemo/src/execution/rule_execution.rs @@ -3,9 +3,12 @@ use nemo_physical::tabular::operations::OperationTableGenerator; use crate::{ + chase_model::{ + analysis::{program_analysis::RuleAnalysis, variable_order::VariableOrder}, + components::rule::ChaseRule, + }, error::Error, - model::{chase_model::ChaseRule, Identifier, Variable}, - program_analysis::{analysis::RuleAnalysis, variable_order::VariableOrder}, + rule_model::components::{tag::Tag, term::primitive::variable::Variable, IterableVariables}, table_manager::{SubtableExecutionPlan, TableManager}, }; @@ -45,10 +48,10 @@ impl RuleExecution { /// Create new [RuleExecution]. pub(crate) fn initialize(rule: &ChaseRule, analysis: &RuleAnalysis) -> Self { let mut variable_translation = VariableTranslation::new(); - for variable in rule.all_variables() { + for variable in rule.variables().cloned() { variable_translation.add_marker(variable); } - for variable in analysis.existential_aux_rule.all_variables() { + for variable in analysis.existential_aux_rule().variables().cloned() { variable_translation.add_marker(variable); } @@ -80,7 +83,7 @@ impl RuleExecution { table_manager: &mut TableManager, rule_info: &RuleInfo, step_number: usize, - ) -> Result, Error> { + ) -> Result, Error> { log::info!( "Available orders: {}", self.promising_variable_orders.iter().enumerate().fold( diff --git a/nemo/src/execution/selection_strategy.rs b/nemo/src/execution/selection_strategy.rs index f5d58b134..daccbbf9e 100644 --- a/nemo/src/execution/selection_strategy.rs +++ b/nemo/src/execution/selection_strategy.rs @@ -3,8 +3,8 @@ pub mod strategy; pub(crate) mod strategy_graph; -pub mod strategy_random; -pub mod strategy_round_robin; -pub mod strategy_stratified_negation; +pub(crate) mod strategy_random; +pub(crate) mod strategy_round_robin; +pub(crate) mod strategy_stratified_negation; pub(crate) mod dependency_graph; diff --git a/nemo/src/execution/selection_strategy/dependency_graph/graph_constructor.rs b/nemo/src/execution/selection_strategy/dependency_graph/graph_constructor.rs index 591cc41b4..7b7abcd42 100644 --- a/nemo/src/execution/selection_strategy/dependency_graph/graph_constructor.rs +++ b/nemo/src/execution/selection_strategy/dependency_graph/graph_constructor.rs @@ -2,7 +2,7 @@ use petgraph::{adj::NodeIndex, Directed, Graph}; -use crate::{model::chase_model::ChaseRule, program_analysis::analysis::RuleAnalysis}; +use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; /// Graph that represents a prioritization between rules. pub type DependencyGraph = Graph, (), Directed>; diff --git a/nemo/src/execution/selection_strategy/dependency_graph/graph_positive.rs b/nemo/src/execution/selection_strategy/dependency_graph/graph_positive.rs index adcb7a341..21f5f7ea5 100644 --- a/nemo/src/execution/selection_strategy/dependency_graph/graph_positive.rs +++ b/nemo/src/execution/selection_strategy/dependency_graph/graph_positive.rs @@ -4,8 +4,8 @@ use std::collections::HashMap; use crate::{ - model::{chase_model::ChaseRule, Identifier}, - program_analysis::analysis::RuleAnalysis, + chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}, + rule_model::components::tag::Tag, }; use super::graph_constructor::{DependencyGraph, DependencyGraphConstructor}; @@ -20,8 +20,8 @@ impl DependencyGraphConstructor for GraphConstructorPositive { debug_assert!(rules.len() == rule_analyses.len()); let rule_count = rules.len(); - let mut predicate_to_rules_body = HashMap::>::new(); - let mut predicate_to_rules_head = HashMap::>::new(); + let mut predicate_to_rules_body = HashMap::>::new(); + let mut predicate_to_rules_head = HashMap::>::new(); for (rule_index, rule_analysis) in rule_analyses.iter().enumerate() { for body_predicate in &rule_analysis.positive_body_predicates { diff --git a/nemo/src/execution/selection_strategy/strategy.rs b/nemo/src/execution/selection_strategy/strategy.rs index 2f3633776..9ea70b5cf 100644 --- a/nemo/src/execution/selection_strategy/strategy.rs +++ b/nemo/src/execution/selection_strategy/strategy.rs @@ -2,7 +2,7 @@ use thiserror::Error; -use crate::{model::chase_model::ChaseRule, program_analysis::analysis::RuleAnalysis}; +use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; /// Errors that can occur while creating a strategy. #[derive(Error, Debug, Copy, Clone)] diff --git a/nemo/src/execution/selection_strategy/strategy_graph.rs b/nemo/src/execution/selection_strategy/strategy_graph.rs index 79392aabb..144187fd9 100644 --- a/nemo/src/execution/selection_strategy/strategy_graph.rs +++ b/nemo/src/execution/selection_strategy/strategy_graph.rs @@ -2,7 +2,7 @@ use std::marker::PhantomData; -use crate::{model::chase_model::ChaseRule, program_analysis::analysis::RuleAnalysis}; +use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; use super::{ dependency_graph::graph_constructor::DependencyGraphConstructor, diff --git a/nemo/src/execution/selection_strategy/strategy_random.rs b/nemo/src/execution/selection_strategy/strategy_random.rs index 3e4d922d3..fee07e113 100644 --- a/nemo/src/execution/selection_strategy/strategy_random.rs +++ b/nemo/src/execution/selection_strategy/strategy_random.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use rand::Rng; -use crate::{model::chase_model::ChaseRule, program_analysis::analysis::RuleAnalysis}; +use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; use super::strategy::{RuleSelectionStrategy, SelectionStrategyError}; diff --git a/nemo/src/execution/selection_strategy/strategy_round_robin.rs b/nemo/src/execution/selection_strategy/strategy_round_robin.rs index a10343a78..bb6dd5de0 100644 --- a/nemo/src/execution/selection_strategy/strategy_round_robin.rs +++ b/nemo/src/execution/selection_strategy/strategy_round_robin.rs @@ -1,6 +1,6 @@ //! Defines the execution strategy by which each rule is applied in the order it appears. -use crate::{model::chase_model::ChaseRule, program_analysis::analysis::RuleAnalysis}; +use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}; use super::strategy::{RuleSelectionStrategy, SelectionStrategyError}; diff --git a/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs b/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs index 31d580032..acc26dd76 100644 --- a/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs +++ b/nemo/src/execution/selection_strategy/strategy_stratified_negation.rs @@ -5,8 +5,8 @@ use std::collections::HashMap; use petgraph::Directed; use crate::{ - model::{chase_model::ChaseRule, Identifier}, - program_analysis::analysis::RuleAnalysis, + chase_model::{analysis::program_analysis::RuleAnalysis, components::rule::ChaseRule}, + rule_model::components::tag::Tag, util::labeled_graph::LabeledGraph, }; @@ -34,9 +34,9 @@ pub struct StrategyStratifiedNegation { impl StrategyStratifiedNegation { fn build_graph(rule_analyses: &[&RuleAnalysis]) -> NegationGraph { - let mut predicate_to_rules_body_positive = HashMap::>::new(); - let mut predicate_to_rules_body_negative = HashMap::>::new(); - let mut predicate_to_rules_head = HashMap::>::new(); + let mut predicate_to_rules_body_positive = HashMap::>::new(); + let mut predicate_to_rules_body_negative = HashMap::>::new(); + let mut predicate_to_rules_head = HashMap::>::new(); for (rule_index, rule_analysis) in rule_analyses.iter().enumerate() { for body_predicate in &rule_analysis.positive_body_predicates { diff --git a/nemo/src/execution/tracing/trace.rs b/nemo/src/execution/tracing/trace.rs index f6da33b66..de32e9efa 100644 --- a/nemo/src/execution/tracing/trace.rs +++ b/nemo/src/execution/tracing/trace.rs @@ -1,19 +1,19 @@ //! This module contains basic data structures for tracing the origins of derived facts. -use std::{ - borrow::Cow, - collections::{HashMap, HashSet}, -}; +use std::{borrow::Cow, collections::HashSet}; use ascii_tree::write_tree; -use nemo_physical::datavalues::AnyDataValue; use petgraph::graph::{DiGraph, NodeIndex}; use petgraph_graphml::GraphMl; use serde::Serialize; -use crate::model::{ - chase_model::{ChaseAtom, ChaseFact}, - Atom, PrimitiveTerm, Term, Variable, +use crate::{ + chase_model::components::atom::{ground_atom::GroundAtom, ChaseAtom}, + rule_model::{ + components::{fact::Fact, rule::Rule}, + program::Program, + term_map::PrimitiveTermMap, + }, }; /// Index of a rule within a [Program] @@ -25,18 +25,14 @@ pub(crate) struct TraceRuleApplication { /// Index of the rule that was applied rule_index: RuleIndex, /// Variable assignment used during the rule application - assignment: HashMap, + assignment: PrimitiveTermMap, /// Index of the head atom which produced the fact under consideration _position: usize, } impl TraceRuleApplication { /// Create new [TraceRuleApplication]. - pub fn new( - rule_index: RuleIndex, - assignment: HashMap, - _position: usize, - ) -> Self { + pub fn new(rule_index: RuleIndex, assignment: PrimitiveTermMap, _position: usize) -> Self { Self { rule_index, assignment, @@ -70,14 +66,14 @@ pub(crate) enum TraceStatus { } impl TraceStatus { - /// Return `true` when fact was successfully derived - /// and `false` otherwise. + /// Return true when fact was successfully derived + /// and false otherwise. pub fn is_success(&self) -> bool { matches!(self, TraceStatus::Success(_)) } - /// Return `true` if it has already been decided whether - /// a given fact has been derived and `false` otherwise. + /// Return true if it has already been decided whether + /// a given fact has been derived and false otherwise. pub fn is_known(&self) -> bool { !matches!(self, TraceStatus::Unknown) } @@ -87,7 +83,7 @@ impl TraceStatus { #[derive(Debug)] struct TracedFact { /// The considered fact - fact: ChaseFact, + fact: GroundAtom, /// Its current status with resepect to its derivablity in the chase status: TraceStatus, } @@ -119,10 +115,9 @@ impl ExecutionTrace { &mut self.facts[handle.0] } - /// Search a given [ChaseFact] in `self.facts`. + /// Search a given [GroundAtom] in self.facts. /// Also takes into account that the interpretation of a constant depends on its type. - - fn find_fact(&self, fact: &ChaseFact) -> Option { + fn find_fact(&self, fact: &GroundAtom) -> Option { for (fact_index, traced_fact) in self.facts.iter().enumerate() { if traced_fact.fact.predicate() != fact.predicate() || traced_fact.fact.arity() != fact.arity() @@ -131,7 +126,7 @@ impl ExecutionTrace { } let mut identical = true; - for (term_fact, term_traced_fact) in fact.terms().iter().zip(traced_fact.fact.terms()) { + for (term_fact, term_traced_fact) in fact.terms().zip(traced_fact.fact.terms()) { if term_fact != term_traced_fact { identical = false; break; @@ -146,12 +141,12 @@ impl ExecutionTrace { None } - /// Registers a new [ChaseFact]. + /// Registers a new [GroundAtom]. /// /// If the fact was not already known then it will return a fresh handle - /// with the status `TraceStatus::Known`. + /// with the status TraceStatus::Known. /// Otherwise a handle to the existing fact will be returned. - pub fn register_fact(&mut self, fact: ChaseFact) -> TraceFactHandle { + pub fn register_fact(&mut self, fact: GroundAtom) -> TraceFactHandle { if let Some(handle) = self.find_fact(&fact) { handle } else { @@ -182,35 +177,25 @@ pub struct TraceTreeRuleApplication { /// Rule that was applied pub rule: Rule, /// Variable assignment used during the rule application - pub assignment: HashMap, + pub assignment: PrimitiveTermMap, /// Index of the head atom which produced the fact under consideration _position: usize, } impl TraceTreeRuleApplication { - /// Instantiate the given rule with its assignment producing a [`Rule`] with only ground terms. + /// Instantiate the given rule with its assignment producing a [Rule] with only ground terms. fn to_instantiated_rule(&self) -> Rule { let mut rule = self.rule.clone(); - rule.apply_assignment( - &self - .assignment - .iter() - .map(|(variable, constant)| { - ( - variable.clone(), - Term::Primitive(PrimitiveTerm::GroundTerm(constant.clone())), - ) - }) - .collect(), - ); + self.assignment.apply(&mut rule); + rule } - /// Get the [`Atom`] that was produced by this rule application. - fn to_derived_atom(&self) -> Atom { + /// Get the [Fact] that was produced by this rule application. + fn to_derived_atom(&self) -> Fact { let rule = self.to_instantiated_rule(); let derived_atom = rule.head()[self._position].clone(); - derived_atom + Fact::from(derived_atom) } /// Get a string representation of the Instantiated rule. @@ -219,18 +204,18 @@ impl TraceTreeRuleApplication { } } -/// Tree representation of an [`ExecutionTrace`] from a given start node +/// Tree representation of an [ExecutionTrace] from a given start node #[derive(Debug, Clone)] pub enum ExecutionTraceTree { /// Node represent a fact in the initial data base - Fact(ChaseFact), + Fact(GroundAtom), /// Node represents a derived fact Rule(TraceTreeRuleApplication, Vec), } #[derive(Debug)] enum TracePetGraphNodeLabel { - Fact(Atom), + Fact(Fact), Rule(Rule), } @@ -264,7 +249,7 @@ impl ExecutionTraceTree { let next_node_index = match next_node { Self::Fact(ref chase_fact) => { let next_node_index = graph - .add_node(TracePetGraphNodeLabel::Fact(Atom::from(chase_fact.clone()))); + .add_node(TracePetGraphNodeLabel::Fact(Fact::from(chase_fact.clone()))); if let Some(parent_node_index) = parent_node_index_opt { graph.add_edge(next_node_index, parent_node_index, ()); } @@ -300,7 +285,7 @@ impl ExecutionTraceTree { graph } - /// Return [`ExecutionTraceTree`] in [GraphML](http://graphml.graphdrawing.org/) format (for [Evonne](https://github.com/imldresden/evonne) integration) + /// Return [ExecutionTraceTree] in [GraphML](http://graphml.graphdrawing.org/) format (for [Evonne](https://github.com/imldresden/evonne) integration) pub fn to_graphml(&self) -> String { let petgraph = self.to_petgraph(); GraphMl::new(&petgraph) @@ -338,7 +323,7 @@ impl ExecutionTrace { } let tree_application = TraceTreeRuleApplication { - rule: self.program.rules()[application.rule_index].clone(), + rule: self.program.rule(application.rule_index).clone(), assignment: application.assignment.clone(), _position: application._position, }; @@ -352,7 +337,7 @@ impl ExecutionTrace { } } -/// Represents an inference in an [`ExecutionTraceJson`] +/// Represents an inference in an [ExecutionTraceJson] #[derive(Debug, Serialize)] struct ExecutionTraceJsonInference { #[serde(rename = "ruleName")] @@ -387,7 +372,7 @@ impl ExecutionTrace { fn json_inference( &self, derivation: &TraceDerivation, - conclusion: &ChaseFact, + conclusion: &GroundAtom, ) -> ExecutionTraceJsonInference { const RULE_NAME_FACT: &str = "Asserted"; @@ -398,7 +383,7 @@ impl ExecutionTrace { vec![], ), TraceDerivation::Derived(application, premises_handles) => { - let rule = &self.program.rules()[application.rule_index]; + let rule = self.program.rule(application.rule_index); let premises = premises_handles .iter() @@ -456,117 +441,65 @@ impl ExecutionTrace { #[cfg(test)] mod test { - use std::collections::HashMap; - use nemo_physical::datavalues::AnyDataValue; use crate::{ + chase_model::components::atom::ground_atom::GroundAtom, execution::tracing::trace::{TraceDerivation, TraceStatus}, - model::{ - chase_model::ChaseFact, Atom, Identifier, Literal, PrimitiveTerm, Program, Rule, Term, - Variable, + rule_model::{ + components::{ + atom::Atom, + rule::Rule, + term::primitive::{variable::Variable, Primitive}, + ProgramComponent, + }, + program::ProgramBuilder, + term_map::PrimitiveTermMap, }, }; use super::{ExecutionTrace, TraceRuleApplication}; macro_rules! variable_assignment { - ($($k:expr => $v:expr),*) => { - [$(($k, $v)),*] - .into_iter() - .map(|(k, v)| { - ( - Variable::Universal(k.to_string()), - AnyDataValue::new_iri(v.to_string()), - ) - }) - .collect() - }; - } - - macro_rules! atom_term { - (? $var:expr ) => { - Term::Primitive(PrimitiveTerm::Variable(Variable::Universal( - $var.to_string(), - ))) - }; - } + ($($k:expr => $v:expr),*) => {{ + let terms = [$(($k, $v)),*] + .into_iter() + .map(|(k, v)| { + ( + Primitive::from(Variable::universal(k)), + Primitive::from(AnyDataValue::new_iri(v.to_string())), + ) + }); - macro_rules! atom { - ( $pred:expr; $( $marker:tt $t:tt ),* ) => { - Atom::new(Identifier($pred.to_string()), vec![ $( atom_term!( $marker $t ) ),* ]) - }; + PrimitiveTermMap::new(terms) + }}; } fn test_trace() -> ExecutionTrace { - // P(?x, ?y) :- Q(?y, ?x) . - let rule_1 = Rule::new( - vec![atom!("P"; ?"x", ?"y")], - vec![Literal::Positive(atom!("Q"; ?"y", ?"x"))], - vec![], - ); + let rule_1 = Rule::parse("P(?x, ?y) :- Q(?y, ?x)").unwrap(); let rule_1_assignment = variable_assignment!("x" => "b", "y" => "a"); - // S(?x) :- T(?x) . - let rule_2 = Rule::new( - vec![atom!("S"; ?"x")], - vec![Literal::Positive(atom!("T"; ?"x"))], - vec![], - ); - + let rule_2 = Rule::parse("S(?x) :- T(?x)").unwrap(); let rule_2_assignment = variable_assignment!("x" => "a"); - // R(?x, ?y) :- P(?x, ?y), S(?y) . - let rule_3 = Rule::new( - vec![atom!("R"; ?"x", ?"y")], - vec![ - Literal::Positive(atom!("P"; ?"x", ?"y")), - Literal::Positive(atom!("S"; ?"y")), - ], - vec![], - ); - let rule_3_assignment: HashMap<_, _> = variable_assignment!("x" => "b", "y" => "a"); - - let q_ab = ChaseFact::new( - Identifier("Q".to_string()), - vec![ - AnyDataValue::new_iri("a".to_string()), - AnyDataValue::new_iri("b".to_string()), - ], - ); - - let p_ba = ChaseFact::new( - Identifier("P".to_string()), - vec![ - AnyDataValue::new_iri("b".to_string()), - AnyDataValue::new_iri("a".to_string()), - ], - ); - - let r_ba = ChaseFact::new( - Identifier("R".to_string()), - vec![ - AnyDataValue::new_iri("b".to_string()), - AnyDataValue::new_iri("a".to_string()), - ], - ); - - let s_a = ChaseFact::new( - Identifier("S".to_string()), - vec![AnyDataValue::new_iri("a".to_string())], - ); + let rule_3 = Rule::parse("R(?x, ?y) :- P(?x, ?y), S(?y)").unwrap(); + let rule_3_assignment = variable_assignment!("x" => "b", "y" => "a"); - let t_a = ChaseFact::new( - Identifier("T".to_string()), - vec![AnyDataValue::new_iri("a".to_string())], - ); + let q_ab = GroundAtom::try_from(Atom::parse("Q(a,b)").unwrap()).unwrap(); + let p_ba = GroundAtom::try_from(Atom::parse("P(b,a)").unwrap()).unwrap(); + let r_ba = GroundAtom::try_from(Atom::parse("R(b,a)").unwrap()).unwrap(); + let s_a = GroundAtom::try_from(Atom::parse("S(a)").unwrap()).unwrap(); + let t_a = GroundAtom::try_from(Atom::parse("T(a)").unwrap()).unwrap(); - let rules = vec![rule_1, rule_2, rule_3]; let rule_1_index = 0; let rule_2_index = 1; let rule_3_index = 2; - let program = Program::builder().rules(rules).build(); + let mut program = ProgramBuilder::default(); + program.add_rule(rule_1); + program.add_rule(rule_2); + program.add_rule(rule_3); + let program = program.finalize(); let mut trace = ExecutionTrace::new(program); @@ -606,13 +539,8 @@ mod test { #[test] fn trace_ascii() { let trace = test_trace(); - let r_ba = ChaseFact::new( - Identifier("R".to_string()), - vec![ - AnyDataValue::new_iri("b".to_string()), - AnyDataValue::new_iri("a".to_string()), - ], - ); + let r_ba = GroundAtom::try_from(Atom::parse("R(b,a)").unwrap()).unwrap(); + let trace_r_ba = trace.find_fact(&r_ba).unwrap(); let trace_string = r#" R(b, a) :- P(b, a), S(a) . @@ -632,20 +560,8 @@ mod test { fn trace_json() { let trace = test_trace(); - let r_ba = ChaseFact::new( - Identifier("R".to_string()), - vec![ - AnyDataValue::new_iri("b".to_string()), - AnyDataValue::new_iri("a".to_string()), - ], - ); - let p_ba = ChaseFact::new( - Identifier("P".to_string()), - vec![ - AnyDataValue::new_iri("b".to_string()), - AnyDataValue::new_iri("a".to_string()), - ], - ); + let r_ba = GroundAtom::try_from(Atom::parse("R(b,a)").unwrap()).unwrap(); + let p_ba = GroundAtom::try_from(Atom::parse("P(b,a)").unwrap()).unwrap(); let trace_r_ba = trace.find_fact(&r_ba).unwrap(); let trace_p_ba = trace.find_fact(&p_ba).unwrap(); diff --git a/nemo/src/io/error.rs b/nemo/src/io/error.rs index cc55dbc72..e9b839613 100644 --- a/nemo/src/io/error.rs +++ b/nemo/src/io/error.rs @@ -1,3 +1,5 @@ +//! This module contains errors relating to io. + use std::path::PathBuf; use nemo_physical::datavalues::AnyDataValue; diff --git a/nemo/src/io/export_manager.rs b/nemo/src/io/export_manager.rs index 0c656b123..f3dc4be64 100644 --- a/nemo/src/io/export_manager.rs +++ b/nemo/src/io/export_manager.rs @@ -140,94 +140,4 @@ impl ExportManager { Ok(export_handler.resource_is_stdout()) } - - // /// Export a (possibly empty) table according to the given [ExportDirective]. - // /// If the table is empty (i.e., [Option<_>::None]), an empty output file will be created. - // /// - // /// The `predicate_arity` is the arity of the predicate that is to be exported. This information - // /// is used for validation and as a hint to exporters that were not initialized with details - // /// about the arity. - // pub fn export_table<'a>( - // &self, - // export_directive: &ExportDirective, - // table: Option> + 'a>, - // predicate_arity: usize, - // ) -> Result { - // if self.disable_write { - // return Ok(false); - // } - - // let handler = ImportExportHandlers::export_handler(export_directive)?; - - // let writer = self.writer(&*handler, export_directive.predicate())?; - - // self.export_table_with_handler_writer(&*handler, writer, table, predicate_arity)?; - - // Ok(handler.resource_is_stdout()) - // } - - // /// Export a (possibly empty) table according to the given [ExportDirective], - // /// but direct output into the given writer instead of using whatever - // /// resource the directive specifies. - // /// - // /// The `predicate_arity` is the arity of the predicate that is to be exported. This information - // /// is used for validation and as a hint to exporters that were not initialized with details - // /// about the arity. - // /// - // /// This function ignores [ExportManager::disable_write]. - // pub fn export_table_with_writer<'a>( - // &self, - // export_directive: &ExportDirective, - // writer: Box, - // table: Option> + 'a>, - // predicate_arity: usize, - // ) -> Result<(), Error> { - // let handler = ImportExportHandlers::export_handler(export_directive)?; - // self.export_table_with_handler_writer(&*handler, writer, table, predicate_arity) - // } - - // /// Create a writer based on an export handler. The predicate is used to - // /// obtain a default file name if needed. - // /// - // /// This function may already create directories, and should not be used if - // /// [ExportManager::disable_write] is `true`. - // fn writer( - // &self, - // export_handler: &dyn ImportExportHandler, - // predicate: &Identifier, - // ) -> Result, Error> { - // if export_handler.resource_is_stdout() { - // Ok(Box::new(std::io::stdout().lock())) - // } else { - // let output_path = self.output_file_path(export_handler, predicate); - - // log::info!( - // "Exporting predicate \"{}\" to {output_path:?}", - // predicate.name() - // ); - - // if let Some(parent) = output_path.parent() { - // create_dir_all(parent)?; - // } - - // export_handler - // .compression_format() - // .unwrap_or(self.compression_format) - // .file_writer(output_path, Self::open_options(self.overwrite)) - // } - // } - - // /// Provide suitable options writing to files under the given settings. - // fn open_options(overwrite: bool) -> OpenOptions { - // let mut options = OpenOptions::new(); - // options.write(true); - - // if overwrite { - // options.create(true).truncate(true); - // } else { - // options.create_new(true); - // }; - - // options - // } } diff --git a/nemo/src/io/formats/dsv/value_format.rs b/nemo/src/io/formats/dsv/value_format.rs index fad503c2c..0f52d73c9 100644 --- a/nemo/src/io/formats/dsv/value_format.rs +++ b/nemo/src/io/formats/dsv/value_format.rs @@ -104,7 +104,7 @@ impl DsvValueFormats { } /// Return whether the tuple is empty. - pub(crate) fn is_empty(&self) -> bool { + pub(crate) fn _is_empty(&self) -> bool { self.len() == 0 } diff --git a/nemo/src/io/formats/rdf/value_format.rs b/nemo/src/io/formats/rdf/value_format.rs index 6c1ed53d6..a4bbecd1f 100644 --- a/nemo/src/io/formats/rdf/value_format.rs +++ b/nemo/src/io/formats/rdf/value_format.rs @@ -70,7 +70,7 @@ impl RdfValueFormats { } /// Return whether the tuple is empty. - pub(crate) fn is_empty(&self) -> bool { + pub(crate) fn _is_empty(&self) -> bool { self.len() == 0 } diff --git a/nemo/src/io/import_manager.rs b/nemo/src/io/import_manager.rs index 4fd32b1b6..c9d80fabf 100644 --- a/nemo/src/io/import_manager.rs +++ b/nemo/src/io/import_manager.rs @@ -20,52 +20,6 @@ impl ImportManager { Self { resource_providers } } - // /// Validates the given [ImportDirective]. - // /// - // /// TODO: Currently, this only checks the coherence of the given settings, - // /// without actually trying to access the resource. Some set-ups, such as WASM, - // /// may actually want to validate without such a check, but this can be done - // /// via `resource()`. - // pub fn validate(&self, import_directive: &ImportDirective) -> Result<(), Error> { - // ImportExportHandlers::import_handler(import_directive)?; - // Ok(()) - // } - - // /// Returns the resource that data is to be imported from according - // /// to this [ImportDirective]. - // pub fn resource(import_directive: &ImportDirective) -> Result { - // let handler = ImportExportHandlers::import_handler(import_directive)?; - // if let Some(resource) = handler.resource() { - // Ok(resource) - // } else { - // unreachable!("handler validation should make sure that all imports have a resource"); - // } - // } - - // /// Constructs a [TableProvider] from the given [ImportDirective]. - // /// The arity, if given, defines the expected arity of the data: it is validated if - // /// the import directive is compatible with this assumption. - // pub fn table_provider( - // &self, - // import_directive: &ImportDirective, - // expected_arity: Option, - // ) -> Result, Error> { - // let handler = ImportExportHandlers::import_handler(import_directive)?; - - // let arity; - // if let Some(expected_arity) = expected_arity { - // arity = expected_arity; - // } else if let Some(expected_arity) = handler.predicate_arity() { - // arity = expected_arity; - // } else { - // // Note: this only occurs if imported data is not used in any arity-determining way, which should be rare. - // return Err( - // ImportExportError::MissingAttribute(PARAMETER_NAME_FORMAT.to_string()).into(), - // ); - // } - // self.table_provider_from_handler(&*handler, arity) - // } - /// Constructs a [TableProvider] from the given [ImportExportHandler]. /// The expeced arity can reflect additional knowledge of the caller (or might be taken /// from the handler, if it has an arity). It is validated if the import directive is diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 906e5f319..b2e037732 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -33,8 +33,8 @@ pub mod execution; pub mod rule_model; pub mod util; -mod chase_model; -mod table_manager; +pub mod chase_model; // TODO: Make private +pub(crate) mod table_manager; // we use datavalues and meta from nemo_physical in our API, so re-export it here. pub use nemo_physical::datavalues; diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs index 0d830b668..18b6964d8 100644 --- a/nemo/src/rule_model.rs +++ b/nemo/src/rule_model.rs @@ -8,4 +8,5 @@ pub(crate) mod origin; pub mod components; pub mod error; pub mod program; +pub mod term_map; pub mod translation; diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index 9a7e428d2..962d7c7a2 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -1,5 +1,7 @@ //! This module defines the logical components that make up a program. +#![allow(missing_docs)] + #[macro_use] pub mod atom; @@ -15,7 +17,7 @@ pub mod term; use std::fmt::{Debug, Display}; use enum_assoc::Assoc; -use term::primitive::variable::Variable; +use term::primitive::{variable::Variable, Primitive}; use super::{ error::{ValidationError, ValidationErrorBuilder}, @@ -130,3 +132,12 @@ pub trait IterableVariables { /// Return a mutable iterator over all [Variable]s contained within this program component. fn variables_mut<'a>(&'a mut self) -> Box + 'a>; } + +/// Trait implemented by program components that allow iterating over [Primitive] terms +pub trait IterablePrimitives { + /// Return an iterator over all [Primitive] terms contained within this program component. + fn primitive_terms<'a>(&'a self) -> Box + 'a>; + + /// Return a mutable iterator over all [Primitive] terms contained within this program component. + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a>; +} diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index cc2f0ddd2..e696c920c 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -9,8 +9,11 @@ use crate::rule_model::{ use super::{ tag::Tag, - term::{primitive::variable::Variable, Term}, - IterableVariables, ProgramComponent, ProgramComponentKind, + term::{ + primitive::{variable::Variable, Primitive}, + Term, + }, + IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, }; /// Atom @@ -172,6 +175,20 @@ impl IterableVariables for Atom { } } +impl IterablePrimitives for Atom { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms.iter().flat_map(|term| term.primitive_terms())) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.terms + .iter_mut() + .flat_map(|term| term.primitive_terms_mut()), + ) + } +} + #[cfg(test)] mod test { use crate::rule_model::components::{term::primitive::variable::Variable, IterableVariables}; diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index a6fd2130d..05ac184ba 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -2,13 +2,22 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, - origin::Origin, +use crate::{ + chase_model::components::{ + atom::{ground_atom::GroundAtom, ChaseAtom}, + ChaseComponent, + }, + rule_model::{ + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, + }, }; use super::{ - atom::Atom, tag::Tag, term::Term, IterableVariables, ProgramComponent, ProgramComponentKind, + atom::Atom, + tag::Tag, + term::{primitive::Primitive, Term}, + IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, }; /// A (ground) fact @@ -138,3 +147,35 @@ impl ProgramComponent for Fact { ProgramComponentKind::Fact } } + +impl IterablePrimitives for Fact { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + Box::new(self.subterms().flat_map(|term| term.primitive_terms())) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.terms + .iter_mut() + .flat_map(|term| term.primitive_terms_mut()), + ) + } +} + +impl From for Fact { + fn from(value: GroundAtom) -> Self { + let origin = value.origin().clone(); + let predicate = value.predicate(); + let terms = value + .terms() + .cloned() + .map(|term| Term::from(term)) + .collect(); + + Self { + origin, + predicate, + terms, + } + } +} diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index d99b6ad73..57df4db34 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -6,8 +6,12 @@ use crate::rule_model::error::{ValidationError, ValidationErrorBuilder}; use super::{ atom::Atom, - term::{operation::Operation, primitive::variable::Variable, Term}, - IterableVariables, ProgramComponent, ProgramComponentKind, + term::{ + operation::Operation, + primitive::{variable::Variable, Primitive}, + Term, + }, + IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, }; /// Literal @@ -99,6 +103,28 @@ impl IterableVariables for Literal { } fn variables_mut<'a>(&'a mut self) -> Box + 'a> { - todo!() + match self { + Literal::Positive(literal) => literal.variables_mut(), + Literal::Negative(literal) => literal.variables_mut(), + Literal::Operation(literal) => literal.variables_mut(), + } + } +} + +impl IterablePrimitives for Literal { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + match self { + Literal::Positive(literal) => literal.primitive_terms(), + Literal::Negative(literal) => literal.primitive_terms(), + Literal::Operation(literal) => literal.primitive_terms(), + } + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + match self { + Literal::Positive(literal) => literal.primitive_terms_mut(), + Literal::Negative(literal) => literal.primitive_terms_mut(), + Literal::Operation(literal) => literal.primitive_terms_mut(), + } } } diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 0c75b8208..270fddc17 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -18,7 +18,7 @@ use super::{ primitive::{variable::Variable, Primitive}, Term, }, - IterableVariables, ProgramComponent, ProgramComponentKind, + IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, }; /// Rule @@ -419,6 +419,31 @@ impl IterableVariables for Rule { } } +impl IterablePrimitives for Rule { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + let head_primitives = self.head().iter().flat_map(|atom| atom.primitive_terms()); + let body_primitives = self + .body() + .iter() + .flat_map(|literal| literal.primitive_terms()); + + Box::new(head_primitives.chain(body_primitives)) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + let head_primitives = self + .head + .iter_mut() + .flat_map(|atom| atom.primitive_terms_mut()); + let body_primitives = self + .body + .iter_mut() + .flat_map(|literal| literal.primitive_terms_mut()); + + Box::new(head_primitives.chain(body_primitives)) + } +} + /// Builder for a rule #[derive(Debug, Default)] pub struct RuleBuilder { diff --git a/nemo/src/rule_model/components/term.rs b/nemo/src/rule_model/components/term.rs index 255331246..449874218 100644 --- a/nemo/src/rule_model/components/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -35,7 +35,7 @@ use crate::rule_model::{ origin::Origin, }; -use super::{IterableVariables, ProgramComponent}; +use super::{IterablePrimitives, IterableVariables, ProgramComponent}; /// Term /// @@ -162,6 +162,12 @@ impl From for Term { } } +impl From for Term { + fn from(value: GroundTerm) -> Self { + Self::Primitive(Primitive::from(value)) + } +} + impl From for Term { fn from(value: i64) -> Self { Self::Primitive(Primitive::from(value)) @@ -296,60 +302,102 @@ impl ProgramComponent for Term { impl IterableVariables for Term { fn variables<'a>(&'a self) -> Box + 'a> { - let mut iter_primitive = None; - let mut iter_function = None; - let mut iter_map = None; - let mut iter_operation = None; - let mut iter_tuple = None; - let mut iter_aggregate = None; - match self { - Term::Primitive(primitive) => iter_primitive = Some(primitive.variables()), - Term::FunctionTerm(function) => iter_function = Some(function.variables()), - Term::Map(map) => iter_map = Some(map.variables()), - Term::Operation(operation) => iter_operation = Some(operation.variables()), - Term::Tuple(tuple) => iter_tuple = Some(tuple.variables()), - Term::Aggregate(aggregate) => iter_aggregate = Some(aggregate.variables()), + Term::Primitive(term) => term.variables(), + Term::Aggregate(term) => term.variables(), + Term::FunctionTerm(term) => term.variables(), + Term::Map(term) => term.variables(), + Term::Operation(term) => term.variables(), + Term::Tuple(term) => term.variables(), } - Box::new( - iter_primitive - .into_iter() - .flatten() - .chain(iter_function.into_iter().flatten()) - .chain(iter_map.into_iter().flatten()) - .chain(iter_operation.into_iter().flatten()) - .chain(iter_tuple.into_iter().flatten()) - .chain(iter_aggregate.into_iter().flatten()), - ) + // let mut iter_primitive = None; + // let mut iter_function = None; + // let mut iter_map = None; + // let mut iter_operation = None; + // let mut iter_tuple = None; + // let mut iter_aggregate = None; + + // match self { + // Term::Primitive(primitive) => iter_primitive = Some(primitive.variables()), + // Term::FunctionTerm(function) => iter_function = Some(function.variables()), + // Term::Map(map) => iter_map = Some(map.variables()), + // Term::Operation(operation) => iter_operation = Some(operation.variables()), + // Term::Tuple(tuple) => iter_tuple = Some(tuple.variables()), + // Term::Aggregate(aggregate) => iter_aggregate = Some(aggregate.variables()), + // } + + // Box::new( + // iter_primitive + // .into_iter() + // .flatten() + // .chain(iter_function.into_iter().flatten()) + // .chain(iter_map.into_iter().flatten()) + // .chain(iter_operation.into_iter().flatten()) + // .chain(iter_tuple.into_iter().flatten()) + // .chain(iter_aggregate.into_iter().flatten()), + // ) } fn variables_mut<'a>(&'a mut self) -> Box + 'a> { - let mut iter_primitive = None; - let mut iter_function = None; - let mut iter_map = None; - let mut iter_operation = None; - let mut iter_tuple = None; - let mut iter_aggregate = None; + match self { + Term::Primitive(term) => term.variables_mut(), + Term::Aggregate(term) => term.variables_mut(), + Term::FunctionTerm(term) => term.variables_mut(), + Term::Map(term) => term.variables_mut(), + Term::Operation(term) => term.variables_mut(), + Term::Tuple(term) => term.variables_mut(), + } + // let mut iter_primitive = None; + // let mut iter_function = None; + // let mut iter_map = None; + // let mut iter_operation = None; + // let mut iter_tuple = None; + // let mut iter_aggregate = None; + + // match self { + // Term::Primitive(primitive) => iter_primitive = Some(primitive.variables_mut()), + // Term::FunctionTerm(function) => iter_function = Some(function.variables_mut()), + // Term::Map(map) => iter_map = Some(map.variables_mut()), + // Term::Operation(operation) => iter_operation = Some(operation.variables_mut()), + // Term::Tuple(tuple) => iter_tuple = Some(tuple.variables_mut()), + // Term::Aggregate(aggregate) => iter_aggregate = Some(aggregate.variables_mut()), + // } + + // Box::new( + // iter_primitive + // .into_iter() + // .flatten() + // .chain(iter_function.into_iter().flatten()) + // .chain(iter_map.into_iter().flatten()) + // .chain(iter_operation.into_iter().flatten()) + // .chain(iter_tuple.into_iter().flatten()) + // .chain(iter_aggregate.into_iter().flatten()), + // ) + } +} + +impl IterablePrimitives for Term { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { match self { - Term::Primitive(primitive) => iter_primitive = Some(primitive.variables_mut()), - Term::FunctionTerm(function) => iter_function = Some(function.variables_mut()), - Term::Map(map) => iter_map = Some(map.variables_mut()), - Term::Operation(operation) => iter_operation = Some(operation.variables_mut()), - Term::Tuple(tuple) => iter_tuple = Some(tuple.variables_mut()), - Term::Aggregate(aggregate) => iter_aggregate = Some(aggregate.variables_mut()), + Term::Primitive(term) => Box::new(Some(term).into_iter()), + Term::Aggregate(term) => term.primitive_terms(), + Term::FunctionTerm(term) => term.primitive_terms(), + Term::Map(term) => term.primitive_terms(), + Term::Operation(term) => term.primitive_terms(), + Term::Tuple(term) => term.primitive_terms(), } + } - Box::new( - iter_primitive - .into_iter() - .flatten() - .chain(iter_function.into_iter().flatten()) - .chain(iter_map.into_iter().flatten()) - .chain(iter_operation.into_iter().flatten()) - .chain(iter_tuple.into_iter().flatten()) - .chain(iter_aggregate.into_iter().flatten()), - ) + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + match self { + Term::Primitive(term) => Box::new(Some(term).into_iter()), + Term::Aggregate(term) => term.primitive_terms_mut(), + Term::FunctionTerm(term) => term.primitive_terms_mut(), + Term::Map(term) => term.primitive_terms_mut(), + Term::Operation(term) => term.primitive_terms_mut(), + Term::Tuple(term) => term.primitive_terms_mut(), + } } } diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 7de0c7628..22aa8a5bd 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -4,18 +4,25 @@ use std::{fmt::Display, hash::Hash}; use enum_assoc::Assoc; +use nemo_physical::aggregates::operation::AggregateOperation; use strum_macros::EnumIter; use crate::{ rule_model::{ - components::{IterableVariables, ProgramComponent, ProgramComponentKind}, + components::{ + IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, + }, error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, }, syntax::builtin::aggregate, }; -use super::{primitive::variable::Variable, value_type::ValueType, Term}; +use super::{ + primitive::{variable::Variable, Primitive}, + value_type::ValueType, + Term, +}; /// Aggregate operation on logical values #[derive(Assoc, EnumIter, Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] @@ -36,6 +43,7 @@ pub enum AggregateKind { #[assoc(name = aggregate::MAX)] #[assoc(value_type = ValueType::Number)] #[assoc(input_type = ValueType::Number)] + #[assoc(physical = AggregateOperation::Max)] MaxNumber, /// Sum of numerical values #[assoc(name = aggregate::SUM)] @@ -50,6 +58,17 @@ impl Display for AggregateKind { } } +impl Into for AggregateKind { + fn into(self) -> AggregateOperation { + match self { + AggregateKind::CountValues => AggregateOperation::Count, + AggregateKind::MinNumber => AggregateOperation::Min, + AggregateKind::MaxNumber => AggregateOperation::Max, + AggregateKind::SumOfNumbers => AggregateOperation::Sum, + } + } +} + /// Aggregate /// /// Function that performs a computation over a set of [Term]s @@ -241,3 +260,13 @@ impl IterableVariables for Aggregate { ) } } + +impl IterablePrimitives for Aggregate { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + self.aggregate.primitive_terms() + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + self.aggregate.primitive_terms_mut() + } +} diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index 44e756020..1be67ef1a 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -3,12 +3,18 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::{tag::Tag, IterableVariables, ProgramComponent, ProgramComponentKind}, + components::{ + tag::Tag, IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, + }, error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, origin::Origin, }; -use super::{primitive::variable::Variable, value_type::ValueType, Term}; +use super::{ + primitive::{variable::Variable, Primitive}, + value_type::ValueType, + Term, +}; /// Function term /// @@ -170,6 +176,20 @@ impl IterableVariables for FunctionTerm { } } +impl IterablePrimitives for FunctionTerm { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms.iter().flat_map(|term| term.primitive_terms())) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.terms + .iter_mut() + .flat_map(|term| term.primitive_terms_mut()), + ) + } +} + #[cfg(test)] mod test { use crate::rule_model::components::{term::primitive::variable::Variable, IterableVariables}; diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index 5e5cd4779..fb0fa0524 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -3,12 +3,18 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::{tag::Tag, IterableVariables, ProgramComponent, ProgramComponentKind}, + components::{ + tag::Tag, IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, + }, error::ValidationErrorBuilder, origin::Origin, }; -use super::{primitive::variable::Variable, value_type::ValueType, Term}; +use super::{ + primitive::{variable::Variable, Primitive}, + value_type::ValueType, + Term, +}; /// Map /// @@ -165,3 +171,21 @@ impl IterableVariables for Map { ) } } + +impl IterablePrimitives for Map { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + Box::new( + self.map + .iter() + .flat_map(|(key, value)| key.primitive_terms().chain(value.primitive_terms())), + ) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.map.iter_mut().flat_map(|(key, value)| { + key.primitive_terms_mut().chain(value.primitive_terms_mut()) + }), + ) + } +} diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index dd31affb1..8870dd1e5 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -7,7 +7,7 @@ use std::{fmt::Display, hash::Hash}; use operation_kind::OperationKind; use crate::rule_model::{ - components::{IterableVariables, ProgramComponent, ProgramComponentKind}, + components::{IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind}, error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, }; @@ -256,3 +256,17 @@ impl IterableVariables for Operation { ) } } + +impl IterablePrimitives for Operation { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + Box::new(self.subterms.iter().flat_map(|term| term.primitive_terms())) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.subterms + .iter_mut() + .flat_map(|term| term.primitive_terms_mut()), + ) + } +} diff --git a/nemo/src/rule_model/components/term/tuple.rs b/nemo/src/rule_model/components/term/tuple.rs index 57544520a..e0b9ac71f 100644 --- a/nemo/src/rule_model/components/term/tuple.rs +++ b/nemo/src/rule_model/components/term/tuple.rs @@ -3,12 +3,16 @@ use std::{fmt::Display, hash::Hash}; use crate::rule_model::{ - components::{IterableVariables, ProgramComponent, ProgramComponentKind}, + components::{IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind}, error::ValidationErrorBuilder, origin::Origin, }; -use super::{primitive::variable::Variable, value_type::ValueType, Term}; +use super::{ + primitive::{variable::Variable, Primitive}, + value_type::ValueType, + Term, +}; /// Tuple /// @@ -137,6 +141,20 @@ impl IterableVariables for Tuple { } } +impl IterablePrimitives for Tuple { + fn primitive_terms<'a>(&'a self) -> Box + 'a> { + Box::new(self.terms.iter().flat_map(|term| term.primitive_terms())) + } + + fn primitive_terms_mut<'a>(&'a mut self) -> Box + 'a> { + Box::new( + self.terms + .iter_mut() + .flat_map(|term| term.primitive_terms_mut()), + ) + } +} + #[cfg(test)] mod test { use crate::rule_model::components::{term::primitive::variable::Variable, IterableVariables}; diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index b9d7fd0a8..cdd64eed3 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -24,7 +24,7 @@ use super::{ }; /// Representation of a nemo program -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct Program { /// Origin of this component origin: Origin, @@ -57,6 +57,14 @@ impl Program { self.rules.iter() } + /// Return the rule at a particular index. + /// + /// # Panics + /// Panics if there is no rule at this position. + pub fn rule(&self, index: usize) -> &Rule { + &self.rules[index] + } + /// Return an iterator over all facts. pub fn facts(&self) -> impl Iterator { self.facts.iter() @@ -255,23 +263,23 @@ impl std::fmt::Display for Program { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { for import in self.imports() { import.fmt(f)?; - f.write_char('\n'); + f.write_char('\n')?; } for fact in self.facts() { fact.fmt(f)?; - f.write_char('\n'); + f.write_char('\n')?; } for rule in self.rules() { rule.fmt(f)?; - f.write_char('\n'); + f.write_char('\n')?; } for output in self.outputs() { output.fmt(f)?; - f.write_char('\n'); + f.write_char('\n')?; } for export in self.exports() { export.fmt(f)?; - f.write_char('\n'); + f.write_char('\n')?; } Ok(()) diff --git a/nemo/src/rule_model/term_map.rs b/nemo/src/rule_model/term_map.rs new file mode 100644 index 000000000..b06e18e21 --- /dev/null +++ b/nemo/src/rule_model/term_map.rs @@ -0,0 +1,41 @@ +//! This module defines [PrimitiveTermMap]. + +use std::collections::HashMap; + +use super::components::{term::primitive::Primitive, IterablePrimitives}; + +/// Map from [Primitive] terms to each other +/// that can be used to uniformly replace terms +#[derive(Debug, Default, Clone)] +pub struct PrimitiveTermMap { + map: HashMap, +} + +impl PrimitiveTermMap { + /// Create a new [PrimitiveTermMap]. + pub fn new>(iter: Iterator) -> Self { + Self { + map: iter.into_iter().collect(), + } + } + + /// Add a new mapping. + pub fn insert(&mut self, from: Primitive, to: Primitive) { + self.map.insert(from, to); + } + + /// Apply mapping to a program component. + pub fn apply(&self, component: &mut Component) { + for primitive in component.primitive_terms_mut() { + if let Some(term) = self.map.get(primitive) { + *primitive = term.clone(); + } + } + } +} + +impl From> for PrimitiveTermMap { + fn from(value: HashMap) -> Self { + Self { map: value } + } +} diff --git a/nemo/src/rule_model/util.rs b/nemo/src/rule_model/util.rs index 02133c880..f679e7d20 100644 --- a/nemo/src/rule_model/util.rs +++ b/nemo/src/rule_model/util.rs @@ -1,6 +1,6 @@ //! This module collects miscellaneous functionality for the rule model. -/// Macro that parses individual [super::component::term::Term]s +/// Macro that parses individual [Term][super::components::term::Term]s #[macro_export] macro_rules! term_list { // Base case diff --git a/nemo/src/syntax.rs b/nemo/src/syntax.rs index b09e6732e..433e254fe 100644 --- a/nemo/src/syntax.rs +++ b/nemo/src/syntax.rs @@ -53,6 +53,7 @@ pub mod directive { /// The string used in the keyword for the output directive. pub const OUTPUT: &str = "output"; + /// Syntax elements relating to import/export value formats. pub mod value_formats { /// The string used to represent the import/export format any pub const ANY: &str = "any"; diff --git a/nemo/src/table_manager.rs b/nemo/src/table_manager.rs index ddc05894d..8c2c8e41d 100644 --- a/nemo/src/table_manager.rs +++ b/nemo/src/table_manager.rs @@ -402,10 +402,7 @@ impl TableManager { order: &ColumnOrder, step: usize, ) -> String { - let predicate_name = predicate.name(); - let order_string = format!("{order:?}"); - - format!("{predicate_name} ({step}) {order_string}") + format!("{predicate} ({step}) {order:?}") } /// Generates an appropriate table name for a table that represents multiple subtables. @@ -415,13 +412,7 @@ impl TableManager { order: &ColumnOrder, steps: &Range, ) -> String { - let predicate_name = predicate.name(); - let order_string = format!("{order:?}"); - - format!( - "{predicate_name} ({}-{}) {order_string}", - steps.start, steps.end - ) + format!("{predicate} ({}-{}) {order:?}", steps.start, steps.end) } /// Generates an appropriate table name for a table that is a reordered version of another. @@ -432,10 +423,9 @@ impl TableManager { referenced_table_id: PermanentTableId, permutation: &Permutation, ) -> String { - let predicate_name = predicate.name(); let referenced_table_name = self.database.table_name(referenced_table_id); - format!("{predicate_name} ({step}) -> {referenced_table_name} {permutation}") + format!("{predicate} ({step}) -> {referenced_table_name} {permutation}") } /// Intitializes helper structures that are needed for handling the table associated with the predicate. diff --git a/nemo/src/util.rs b/nemo/src/util.rs index d15101c6e..370fb7357 100644 --- a/nemo/src/util.rs +++ b/nemo/src/util.rs @@ -1,3 +1,5 @@ //! This module collects miscellaneous functionality. pub mod labeled_graph; + +pub(crate) mod seperated_list; diff --git a/nemo/src/util/seperated_list.rs b/nemo/src/util/seperated_list.rs new file mode 100644 index 000000000..15539930f --- /dev/null +++ b/nemo/src/util/seperated_list.rs @@ -0,0 +1,16 @@ +//! This module contains helper functions to display separated lists of values. + +//! Object holding functions to create separated lists of values + +use std::fmt::Display; +#[derive(Debug, Copy, Clone)] +pub struct DisplaySeperatedList {} + +impl DisplaySeperatedList { + //! Display a seperated list of values + pub fn display(list: impl Iterator, separator: &str) -> String { + list.map(|entry| entry.to_string()) + .collect::>() + .join(separator) + } +} From ab2202525182ed6ec2ffa8e28c4771ad7ef27c65 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Thu, 12 Sep 2024 22:09:49 +0200 Subject: [PATCH 159/214] Implement missing functions for nemo-cli --- nemo-cli/src/cli.rs | 6 +- nemo-cli/src/main.rs | 209 +++++++----------- nemo/src/chase_model/components/export.rs | 2 +- nemo/src/chase_model/translation.rs | 2 +- .../{import.rs => import_export.rs} | 28 ++- nemo/src/execution/execution_engine.rs | 12 +- nemo/src/io/export_manager.rs | 113 ++++++++-- nemo/src/io/formats.rs | 7 +- nemo/src/io/formats/dsv.rs | 148 +------------ nemo/src/io/formats/rdf.rs | 8 +- nemo/src/io/formats/rdf/value_format.rs | 2 +- nemo/src/lib.rs | 3 +- nemo/src/parser.rs | 2 +- .../rule_model/components/import_export.rs | 8 +- .../components/import_export/file_formats.rs | 18 +- nemo/src/rule_model/components/term/map.rs | 20 +- nemo/src/rule_model/program.rs | 73 +++++- 17 files changed, 321 insertions(+), 340 deletions(-) rename nemo/src/chase_model/translation/{import.rs => import_export.rs} (90%) diff --git a/nemo-cli/src/cli.rs b/nemo-cli/src/cli.rs index 882fed86e..949bde197 100644 --- a/nemo-cli/src/cli.rs +++ b/nemo-cli/src/cli.rs @@ -106,7 +106,7 @@ pub(crate) struct OutputArgs { impl OutputArgs { /// Creates an output file manager with the current options pub(crate) fn export_manager(self) -> Result { - let export_manager = ExportManager::new() + let export_manager = ExportManager::default() .set_base_path(self.export_directory) .overwrite(self.overwrite) .compress(self.gz); @@ -120,11 +120,11 @@ pub(crate) struct TracingArgs { /// Facts for which a derivation trace should be computed; /// multiple facts can be separated by a semicolon, e.g. "P(a, b);Q(c)". #[arg(long = "trace", value_delimiter = ';', group = "trace-input")] - pub(crate) facts_to_be_traced: Option>, + pub(crate) facts: Option>, /// Specify one or multiple input files for the facts that should be traced. /// The file format is the same as for the "trace" CLI argument. #[arg(long = "trace-input-file", value_parser, group = "trace-input")] - pub(crate) trace_input_file: Option>, + pub(crate) input_file: Option>, /// File to export the trace to #[arg(long = "trace-output", requires = "trace-input")] pub(crate) output_file: Option, diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index e3f68e5b2..1646abe7b 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -19,26 +19,33 @@ pub mod cli; -use std::fs::{read_to_string, File}; +use std::fs::read_to_string; -use ariadne::{Color, ColorGenerator, Fmt, Label, Report, ReportKind, Source}; use clap::Parser; -use cli::{CliApp, Exporting, Reporting}; use colored::Colorize; + +use cli::{CliApp, Exporting, Reporting}; + use nemo::{ error::{Error, ReadingError}, execution::{DefaultExecutionEngine, ExecutionEngine}, - io::{ - parser::{parse_fact_str, parse_program_str}, - resource_providers::ResourceProviders, - ImportManager, - }, + io::{resource_providers::ResourceProviders, ImportManager}, meta::timing::{TimedCode, TimedDisplay}, - model::{ExportDirective, Program}, - parser::ParserErrorReport, - rule_model, + rule_model::{ + self, + components::{ + import_export::{file_formats::FileFormat, ExportDirective}, + tag::Tag, + term::map::Map, + }, + program::Program, + }, }; +fn default_export(predicate: Tag) -> ExportDirective { + ExportDirective::new(predicate, FileFormat::CSV, Map::empty_unnamed()) +} + /// Set exports according to command-line parameter. /// This disables all existing exports. fn override_exports(program: &mut Program, value: Exporting) { @@ -51,18 +58,18 @@ fn override_exports(program: &mut Program, value: Exporting) { let mut additional_exports = Vec::new(); match value { Exporting::Idb => { - for predicate in program.idb_predicates() { - additional_exports.push(ExportDirective::default(predicate)); + for predicate in program.derived_predicates() { + additional_exports.push(default_export(predicate)); } } Exporting::Edb => { - for predicate in program.edb_predicates() { - additional_exports.push(ExportDirective::default(predicate)); + for predicate in program.import_predicates() { + additional_exports.push(default_export(predicate)); } } Exporting::All => { - for predicate in program.predicates() { - additional_exports.push(ExportDirective::default(predicate)); + for predicate in program.all_predicates() { + additional_exports.push(default_export(predicate)); } } Exporting::None => {} @@ -151,6 +158,20 @@ fn print_memory_details(engine: &DefaultExecutionEngine) { println!("\nMemory report:\n\n{}", engine.memory_usage()); } +/// Retrieve all facts that need to be traced from the cli arguments. +fn parse_trace_facts(cli: &CliApp) -> Result, Error> { + let mut facts = cli.tracing.facts.clone().unwrap_or_default(); + + if let Some(input_files) = &cli.tracing.input_file { + for input_file in input_files { + let file_content = read_to_string(input_file)?; + facts.extend(file_content.split(';').map(str::to_string)); + } + } + + Ok(facts) +} + fn run(mut cli: CliApp) -> Result<(), Error> { TimedCode::instance().start(); TimedCode::instance().sub("Reading & Preprocessing").start(); @@ -161,28 +182,30 @@ fn run(mut cli: CliApp) -> Result<(), Error> { return Err(Error::MultipleFilesNotImplemented); } - let rules = cli.rules.pop().ok_or(Error::NoInput)?; - let rules_content = read_to_string(rules.clone()).map_err(|err| ReadingError::IoReading { - error: err, - filename: rules.to_string_lossy().to_string(), - })?; - - let program_ast = - match nemo::parser::Parser::initialize(&rules_content, rules.to_string_lossy().to_string()) - .parse() - { - Ok(program) => program, - Err((program, report)) => { - println!("{program}"); - report.eprint(report.build_reports())?; - std::process::exit(1); - } - }; - log::debug!("AST:\n{program_ast}"); + let program_file = cli.rules.pop().ok_or(Error::NoInput)?; + let program_filename = program_file.to_string_lossy().to_string(); + let program_content = + read_to_string(program_file.clone()).map_err(|err| ReadingError::IoReading { + error: err, + filename: program_filename.clone(), + })?; + + let program_ast = match nemo::parser::Parser::initialize( + &program_content, + program_filename.clone(), + ) + .parse() + { + Ok(program) => program, + Err((_program, report)) => { + report.eprint(report.build_reports())?; + std::process::exit(1); + } + }; - let program = match rule_model::translation::ASTProgramTranslation::initialize( - &rules_content, - rules.to_string_lossy().to_string(), + let mut program = match rule_model::translation::ASTProgramTranslation::initialize( + &program_content, + program_filename, ) .translate(&program_ast) { @@ -192,99 +215,21 @@ fn run(mut cli: CliApp) -> Result<(), Error> { std::process::exit(1); } }; - - println!("Parsing successful"); - std::process::exit(0); - - // let mut program = parse_program(rules_content)?; - // let (ast, errors) = parse_program_str(&rules_content); - - // if !errors.is_empty() { - // for error in errors { - // let color = Color::Red; - - // let r = Report::build(ReportKind::Error, String::from("test"), 100) - // .with_code(3) - // .with_message(&error.msg) - // .with_label( - // Label::new(( - // String::from("test"), - // error.pos.offset..(error.pos.offset + 1), - // )) - // .with_message(&error.msg) - // .with_color(color), - // ) - // .finish(); - // r.eprint((String::from("test"), Source::from(&rules_content)))?; - // } - // } - - // log::debug!("AST:\n{ast}"); - // TODO: Report errors! - // log::debug!("ERRORS:\n{_errors:#?}"); - // let program = nemo::rule_model::program::Program::from_ast(ast); - + override_exports(&mut program, cli.output.export_setting); log::info!("Rules parsed"); - // log::trace!("{:?}", program); - - let facts_to_be_traced: Option> = { - let raw_facts_to_be_traced: Option> = - cli.tracing.facts_to_be_traced.or_else(|| { - Some( - cli.tracing - .trace_input_file? - .into_iter() - .filter_map(|filename| { - match read_to_string(filename.clone()).map_err(|err| { - ReadingError::IoReading { - error: err, - filename: filename.to_string_lossy().to_string(), - } - }) { - Ok(inner) => Some(inner), - Err(err) => { - log::error!("!Error: Could not read trace input file {}. We continue by skipping it. Detailed error message: {err}", filename.to_string_lossy().to_string()); - None - } - } - }) - .flat_map(|fact_string| { - fact_string - .split(';') - .map(|s| s.trim().to_string()) - .collect::>() - }) - .collect(), - ) - }); - - // raw_facts_to_be_traced - // .map(|f| { - // f.into_iter() - // .map(/*parse_fact_str*/) // FIXME: Iterator over Strings and not &str - // .collect::, _>>() - // }) - // .transpose()? - None::> // NOTE: This is just a quick and dirty fix - }; - // FIXME: Change override exports to use the new rule model - // override_exports(&mut program, cli.output.export_setting); + let facts_to_be_traced = parse_trace_facts(&cli); let export_manager = cli.output.export_manager()?; - // Validate exports even if we do not intend to write data: - // FIXME: How does the new rule model handle exports? - // for export in program.exports() { - // export_manager.validate(export)?; - // } let import_manager = ImportManager::new(ResourceProviders::with_base_path(cli.import_directory)); - let mut engine: DefaultExecutionEngine = ExecutionEngine::initialize( - /*&program*/ todo!("change the old rule model to the new one"), - import_manager, - )?; + let mut engine: DefaultExecutionEngine = ExecutionEngine::initialize(&program, import_manager)?; + + for (predicate, handler) in engine.exports() { + export_manager.validate(&predicate, &handler)?; + } TimedCode::instance().sub("Reading & Preprocessing").stop(); @@ -295,22 +240,20 @@ fn run(mut cli: CliApp) -> Result<(), Error> { TimedCode::instance().sub("Reasoning").stop(); let mut stdout_used = false; + if !export_manager.write_disabled() { TimedCode::instance() .sub("Output & Final Materialization") .start(); log::info!("writing output"); - // FIXME: How are exports handled in the new rule model? - // for export_directive in program.exports() { - // if let Some(arity) = engine.predicate_arity(export_directive.predicate()) { - // stdout_used |= export_manager.export_table( - // export_directive, - // engine.predicate_rows(export_directive.predicate())?, - // arity, - // )?; - // } - // } + for (predicate, handler) in engine.exports() { + stdout_used |= export_manager.export_table( + &predicate, + &handler, + engine.predicate_rows(&predicate)?, + )?; + } TimedCode::instance() .sub("Output & Final Materialization") diff --git a/nemo/src/chase_model/components/export.rs b/nemo/src/chase_model/components/export.rs index d771f9f7a..93f06f96c 100644 --- a/nemo/src/chase_model/components/export.rs +++ b/nemo/src/chase_model/components/export.rs @@ -35,7 +35,7 @@ impl ChaseExport { } /// Return the handler. - pub(crate) fn _handler(&self) -> &Box { + pub(crate) fn handler(&self) -> &Box { &self.handler } diff --git a/nemo/src/chase_model/translation.rs b/nemo/src/chase_model/translation.rs index d8ab3cef3..a18f64ca3 100644 --- a/nemo/src/chase_model/translation.rs +++ b/nemo/src/chase_model/translation.rs @@ -3,7 +3,7 @@ pub(crate) mod aggregate; pub(crate) mod fact; pub(crate) mod filter; -pub(crate) mod import; +pub(crate) mod import_export; pub(crate) mod operation; pub(crate) mod rule; diff --git a/nemo/src/chase_model/translation/import.rs b/nemo/src/chase_model/translation/import_export.rs similarity index 90% rename from nemo/src/chase_model/translation/import.rs rename to nemo/src/chase_model/translation/import_export.rs index 1840ea9d7..d614eff16 100644 --- a/nemo/src/chase_model/translation/import.rs +++ b/nemo/src/chase_model/translation/import_export.rs @@ -74,11 +74,19 @@ impl ProgramChaseTranslation { &self, direction: Direction, predicate: Tag, - attributes: HashMap, + mut attributes: HashMap, file_format: FileFormat, ) -> Box { let arity = self.predicate_arity.get(&predicate).cloned(); + if attributes.get(&ImportExportAttribute::Resource).is_none() { + let default_file_name = format!("{}.{}", predicate, file_format.extension()); + attributes.insert( + ImportExportAttribute::Resource, + Term::from(default_file_name), + ); + } + match file_format { FileFormat::CSV => { Self::build_dsv_handler(Direction::Import, Some(b','), arity, &attributes) @@ -108,7 +116,7 @@ impl ProgramChaseTranslation { /// Read resource attribute and check compression. fn read_resource( - attributes: &HashMap, + attributes: &HashMap, ) -> (CompressionFormat, ImportExportResource) { attributes .get(&ImportExportAttribute::Resource) @@ -120,7 +128,7 @@ impl ProgramChaseTranslation { /// Read the [DsvValueFormats] from the attributes. fn read_dsv_value_formats( - attributes: &HashMap, + attributes: &HashMap, ) -> Option { let term = attributes.get(&ImportExportAttribute::Format)?; @@ -136,7 +144,7 @@ impl ProgramChaseTranslation { /// Read the [RdfValueFormats] from the attributes. fn read_rdf_value_formats( - attributes: &HashMap, + attributes: &HashMap, ) -> Option { let term = attributes.get(&ImportExportAttribute::Format)?; @@ -151,7 +159,7 @@ impl ProgramChaseTranslation { } /// Read the limit from the attributes. - fn read_limit(attributes: &HashMap) -> Option { + fn read_limit(attributes: &HashMap) -> Option { attributes .get(&ImportExportAttribute::Limit) .and_then(|term| ImportExportDirective::integer_value(term)) @@ -160,7 +168,7 @@ impl ProgramChaseTranslation { /// Read the compression format from the attributes. fn read_compression( - attributes: &HashMap, + attributes: &HashMap, ) -> Option { if let Some(term) = attributes.get(&ImportExportAttribute::Compression) { return Some( @@ -177,7 +185,7 @@ impl ProgramChaseTranslation { } /// Read the iri base path from the attributes. - fn read_base(attributes: &HashMap) -> Option> { + fn read_base(attributes: &HashMap) -> Option> { let term = attributes.get(&ImportExportAttribute::Base)?; Some(Iri::parse_unchecked( ImportExportDirective::plain_value(term) @@ -190,7 +198,7 @@ impl ProgramChaseTranslation { direction: Direction, delimiter: Option, arity: Option, - attributes: &HashMap, + attributes: &HashMap, ) -> Box { let (mut compression_format, resource) = Self::read_resource(attributes); @@ -229,7 +237,7 @@ impl ProgramChaseTranslation { direction: Direction, variant: RdfVariant, arity: Option, - attributes: &HashMap, + attributes: &HashMap, ) -> Box { let arity = arity.expect("rdf types have known arity"); let (mut compression_format, resource) = Self::read_resource(attributes); @@ -258,7 +266,7 @@ impl ProgramChaseTranslation { /// Build a [JsonHandler]. fn build_json_handler( - attributes: &HashMap, + attributes: &HashMap, ) -> Box { let (_, resource) = Self::read_resource(attributes); diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index ace548598..c288112ab 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -20,7 +20,7 @@ use crate::{ }, error::Error, execution::{planning::plan_tracing::TracingStrategy, tracing::trace::TraceDerivation}, - io::import_manager::ImportManager, + io::{formats::ImportExportHandler, import_manager::ImportManager}, rule_model::{ components::{ fact::Fact, @@ -276,6 +276,16 @@ impl ExecutionEngine { self.analysis.all_predicates.get(predicate).copied() } + /// Return a list of all all export predicates and their respective [ImportExportHandler]s, + /// which can be used for exporting into files. + pub fn exports(&self) -> Vec<(Tag, Box)> { + self.program + .exports() + .iter() + .map(|export| (export.predicate().clone(), export.handler().clone())) + .collect() + } + /// Counts the facts of a single predicate. /// /// TODO: Currently only counting of in-memory facts is supported, see diff --git a/nemo/src/io/export_manager.rs b/nemo/src/io/export_manager.rs index f3dc4be64..09f5cc94a 100644 --- a/nemo/src/io/export_manager.rs +++ b/nemo/src/io/export_manager.rs @@ -1,20 +1,21 @@ -//! This module contains the [ExportManager], which provides the main API to handle -//! [ExportDirective]s and to write tables to files. - +//! This module contains the [ExportManager]. use std::{ + fs::{create_dir_all, OpenOptions}, io::{ErrorKind, Write}, path::PathBuf, }; -use crate::{error::Error, rule_model::components::import_export::compression::CompressionFormat}; +use crate::{ + error::Error, + rule_model::components::{import_export::compression::CompressionFormat, tag::Tag}, +}; use nemo_physical::datavalues::AnyDataValue; use sanitise_file_name::{sanitise_with_options, Options}; use super::formats::ImportExportHandler; -/// Main object for exporting data to files and for accessing aspects -/// of [ExportDirective]s that might be of public interest. +/// Main object for exporting data to files. #[derive(Debug, Default)] pub struct ExportManager { /// The base path for writing files. @@ -64,16 +65,18 @@ impl ExportManager { self.disable_write } - /// Validates the given [ExportDirective]. + /// Validates the given [ImportExportHandler]. /// This also checks whether the specified file could (likely) be written. - pub fn validate(&self, handler: &dyn ImportExportHandler) -> Result<(), Error> { - // let handler = ImportExportHandlers::export_handler(export_directive)?; - + pub fn validate( + &self, + predicate: &Tag, + handler: &Box, + ) -> Result<(), Error> { if handler.resource_is_stdout() { return Ok(()); } - let path = self.output_file_path(handler); + let path = self.output_file_path(predicate, &**handler); let meta_info = path.metadata(); if let Err(err) = meta_info { @@ -92,11 +95,42 @@ impl ExportManager { } } - /// Get the output file name for the given [ExportDirective]. + /// Create a writer based on an export handler. The predicate is used to + /// obtain a default file name if needed. + /// + /// This function may already create directories, and should not be used if + /// [ExportManager::disable_write] is `true`. + fn writer( + &self, + export_handler: &dyn ImportExportHandler, + predicate: &Tag, + ) -> Result, Error> { + if export_handler.resource_is_stdout() { + Ok(Box::new(std::io::stdout().lock())) + } else { + let output_path = self.output_file_path(predicate, export_handler); + + log::info!("Exporting predicate \"{}\" to {output_path:?}", predicate); + + if let Some(parent) = output_path.parent() { + create_dir_all(parent)?; + } + + export_handler + .compression_format() + .file_writer(output_path, Self::open_options(self.overwrite)) + } + } + + /// Get the output file name for the given [ImportExportHandler]. /// /// This is a complete path (based on our base path), /// which includes all extensions. - fn output_file_path(&self, export_handler: &dyn ImportExportHandler) -> PathBuf { + fn output_file_path( + &self, + predicate: &Tag, + export_handler: &dyn ImportExportHandler, + ) -> PathBuf { let mut pred_path = self.base_path.to_path_buf(); let sanitize_options = Options::> { @@ -104,9 +138,11 @@ impl ExportManager { ..Default::default() }; - let file_name_unsafe = export_handler - .resource() - .unwrap_or_else(|| export_handler.file_extension()); + let file_name_unsafe = export_handler.resource().unwrap_or(format!( + "{}.{}", + predicate, + export_handler.file_extension() + )); let file_name = sanitise_with_options(&file_name_unsafe, &sanitize_options); pred_path.push(file_name); @@ -123,16 +159,18 @@ impl ExportManager { /// /// If this operation succeeds, then it returns `Ok(true)` if the resource is stdout /// and `Ok(false)` otherwise. - pub(crate) fn export_table_with_handler_writer<'a>( + pub fn export_table<'a>( &self, - export_handler: &dyn ImportExportHandler, - writer: Box, + predicate: &Tag, + export_handler: &Box, table: Option> + 'a>, ) -> Result { if self.disable_write { return Ok(false); } + let writer = self.writer(&**export_handler, predicate)?; + if let Some(table) = table { let table_writer = export_handler.writer(writer)?; table_writer.export_table_data(Box::new(table))?; @@ -140,4 +178,41 @@ impl ExportManager { Ok(export_handler.resource_is_stdout()) } + + /// Export a (possibly empty) table according to the given [ExportDirective], + /// but direct output into the given writer instead of using whatever + /// resource the directive specifies. + /// + /// The `predicate_arity` is the arity of the predicate that is to be exported. This information + /// is used for validation and as a hint to exporters that were not initialized with details + /// about the arity. + /// + /// This function ignores [ExportManager::disable_write]. + pub fn export_table_with_writer<'a>( + &self, + writer: Box, + export_handler: &Box, + table: Option> + 'a>, + ) -> Result<(), Error> { + if let Some(table) = table { + let table_writer = export_handler.writer(writer)?; + table_writer.export_table_data(Box::new(table))?; + } + + Ok(()) + } + + /// Provide suitable options writing to files under the given settings. + fn open_options(overwrite: bool) -> OpenOptions { + let mut options = OpenOptions::new(); + options.write(true); + + if overwrite { + options.create(true).truncate(true); + } else { + options.create_new(true); + }; + + options + } } diff --git a/nemo/src/io/formats.rs b/nemo/src/io/formats.rs index ca72a0464..0e8055e56 100644 --- a/nemo/src/io/formats.rs +++ b/nemo/src/io/formats.rs @@ -23,7 +23,7 @@ const PROGRESS_NOTIFY_INCREMENT: u64 = 10_000_000; /// Representation of a resource (file, URL, etc.) for import or export. #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) enum ImportExportResource { +pub enum ImportExportResource { /// A concrete resource string. Resource(Resource), /// Use stdout (only for export) @@ -60,7 +60,7 @@ impl ImportExportResource { /// that were used with this format, to create suitable [TableProvider] and [TableWriter] objects /// to read and write data in the given format, and to report information about the type of /// data that this format can handle (such as predicate arity and type). -pub(crate) trait ImportExportHandler: std::fmt::Debug + DynClone + Send { +pub trait ImportExportHandler: std::fmt::Debug + DynClone + Send { /// Return the associated [FileFormat]. fn file_format(&self) -> FileFormat; @@ -111,7 +111,7 @@ dyn_clone::clone_trait_object!(ImportExportHandler); /// We often share code for the two directions, and a direction /// is then used to enable smaller distinctions where needed. #[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub(crate) enum Direction { +pub enum Direction { /// Processing input. Import, /// Processing output. @@ -128,7 +128,6 @@ impl std::fmt::Display for Direction { } /// A trait for exporting table data, e.g., to some file. -// TODO Maybe this should be directly in io, since it is the interface to the OutputManager? pub trait TableWriter { /// Export a table. fn export_table_data<'a>( diff --git a/nemo/src/io/formats/dsv.rs b/nemo/src/io/formats/dsv.rs index 30a66e4f5..944b7aa3b 100644 --- a/nemo/src/io/formats/dsv.rs +++ b/nemo/src/io/formats/dsv.rs @@ -20,17 +20,6 @@ use crate::{ use super::{Direction, ImportExportHandler, ImportExportResource, TableWriter}; -/// Internal enum to distinguish variants of the DSV format. -#[allow(clippy::upper_case_acronyms)] -enum DsvVariant { - /// Delimiter-separated values - DSV, - /// Comma-separated values - CSV, - /// Tab-separated values - TSV, -} - /// An [ImportExportHandler] for delimiter-separated values. #[derive(Debug, Clone)] pub(crate) struct DsvHandler { @@ -48,7 +37,7 @@ pub(crate) struct DsvHandler { /// Compression format to be used compression_format: CompressionFormat, /// Direction of the operation. - direction: Direction, + _direction: Direction, } impl DsvHandler { @@ -59,7 +48,7 @@ impl DsvHandler { value_formats: DsvValueFormats, limit: Option, compression_format: CompressionFormat, - direction: Direction, + _direction: Direction, ) -> Self { Self { delimiter, @@ -67,140 +56,9 @@ impl DsvHandler { value_formats, limit, compression_format, - direction, + _direction, } } - - // /// Construct a DSV file handler with an arbitrary delimiter. - // pub(crate) fn try_new_dsv( - // attributes: &MapDataValue, - // direction: Direction, - // ) -> Result, ImportExportError> { - // Self::try_new(DsvVariant::DSV, attributes, direction) - // } - - // /// Construct a CSV file handler. - // pub(crate) fn try_new_csv( - // attributes: &MapDataValue, - // direction: Direction, - // ) -> Result, ImportExportError> { - // Self::try_new(DsvVariant::CSV, attributes, direction) - // } - - // /// Construct a TSV file handler. - // pub(crate) fn try_new_tsv( - // attributes: &MapDataValue, - // direction: Direction, - // ) -> Result, ImportExportError> { - // Self::try_new(DsvVariant::TSV, attributes, direction) - // } - - // /// Construct a DSV handler of the given variant. - // fn try_new( - // variant: DsvVariant, - // attributes: &MapDataValue, - // direction: Direction, - // ) -> Result, ImportExportError> { - // // Basic checks for unsupported attributes: - // ImportExportHandlers::check_attributes( - // attributes, - // &[ - // PARAMETER_NAME_FORMAT, - // PARAMETER_NAME_RESOURCE, - // PARAMETER_NAME_DSV_DELIMITER, - // PARAMETER_NAME_COMPRESSION, - // PARAMETER_NAME_LIMIT, - // ], - // )?; - - // let delimiter = Self::extract_delimiter(variant, attributes)?; - // let resource = ImportExportHandlers::extract_resource(attributes, direction)?; - // let value_formats = Self::extract_value_formats(attributes)?; - // let (compression_format, _) = - // ImportExportHandlers::extract_compression_format(attributes, &resource)?; - // let limit = - // ImportExportHandlers::extract_unsigned_integer(attributes, PARAMETER_NAME_LIMIT, true)?; - - // Ok(Box::new(Self { - // delimiter, - // resource, - // value_formats, - // limit, - // compression_format, - // direction, - // })) - // } - - // fn extract_value_formats( - // attributes: &MapDataValue, - // ) -> Result>, ImportExportError> { - // let value_format_strings = ImportExportHandlers::extract_value_format_strings(attributes)?; - - // if let Some(format_strings) = value_format_strings { - // Ok(Some(Self::formats_from_strings(format_strings)?)) - // } else { - // Ok(None) - // } - // } - - // fn formats_from_strings( - // value_format_strings: Vec, - // ) -> Result, ImportExportError> { - // let mut value_formats = Vec::with_capacity(value_format_strings.len()); - // for s in value_format_strings { - // value_formats.push(DsvValueFormat::from_string(s.as_str())?); - // } - // Ok(value_formats) - // } - - // fn extract_delimiter( - // variant: DsvVariant, - // attributes: &MapDataValue, - // ) -> Result { - // let delim_opt: Option; - // if let Some(string) = - // ImportExportHandlers::extract_string(attributes, PARAMETER_NAME_DSV_DELIMITER, true)? - // { - // if string.len() == 1 { - // delim_opt = Some(string.as_bytes()[0]); - // } else { - // return Err(ImportExportError::invalid_att_value_error( - // PARAMETER_NAME_DSV_DELIMITER, - // AnyDataValue::new_plain_string(string.to_owned()), - // "delimiter should be exactly one byte", - // )); - // } - // } else { - // delim_opt = None; - // } - - // let delimiter: u8 = match (variant, delim_opt) { - // (DsvVariant::DSV, Some(delim)) => delim, - // (DsvVariant::DSV, None) => { - // return Err(ImportExportError::MissingAttribute( - // PARAMETER_NAME_DSV_DELIMITER.to_string(), - // )); - // } - // (DsvVariant::CSV, None) => b',', - // (DsvVariant::TSV, None) => b'\t', - // (DsvVariant::CSV, Some(_)) | (DsvVariant::TSV, Some(_)) => { - // return Err(ImportExportError::UnknownAttribute( - // PARAMETER_NAME_DSV_DELIMITER.to_string(), - // )); - // } - // }; - - // Ok(delimiter) - // } - - // /// Returns the set value formats, or finds a default value based on the - // /// required arity. - // fn value_formats_or_default(&self, arity: usize) -> Vec { - // self.value_formats.clone().unwrap_or_else(|| { - // Self::formats_from_strings(ImportExportHandlers::default_value_format_strings(arity)) - // .unwrap() - // }) - // } } impl ImportExportHandler for DsvHandler { diff --git a/nemo/src/io/formats/rdf.rs b/nemo/src/io/formats/rdf.rs index c8f5c063c..eb550a4f7 100644 --- a/nemo/src/io/formats/rdf.rs +++ b/nemo/src/io/formats/rdf.rs @@ -1,5 +1,7 @@ //! Handler for resources of type RDF (Rsource Description Format). +#![allow(missing_docs)] + pub mod error; pub(crate) mod reader; pub(crate) mod value_format; @@ -70,7 +72,7 @@ pub struct RdfHandler { /// Compression format to be used compression_format: CompressionFormat, /// Direction of the operation. - direction: Direction, + _direction: Direction, } impl RdfHandler { @@ -82,7 +84,7 @@ impl RdfHandler { value_formats: RdfValueFormats, limit: Option, compression_format: CompressionFormat, - direction: Direction, + _direction: Direction, ) -> Self { Self { resource, @@ -91,7 +93,7 @@ impl RdfHandler { value_formats, limit, compression_format, - direction, + _direction, } } diff --git a/nemo/src/io/formats/rdf/value_format.rs b/nemo/src/io/formats/rdf/value_format.rs index a4bbecd1f..b96a58431 100644 --- a/nemo/src/io/formats/rdf/value_format.rs +++ b/nemo/src/io/formats/rdf/value_format.rs @@ -32,7 +32,7 @@ pub(crate) enum RdfValueFormat { /// Indicate what value parser should be used for each column. #[derive(Debug, Clone)] -pub(crate) struct RdfValueFormats(Vec); +pub struct RdfValueFormats(Vec); impl RdfValueFormats { pub(crate) fn new(formats: Vec) -> Self { diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 1f80bc828..5900ded0f 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -19,8 +19,7 @@ #![feature(iter_intersperse)] /// The crate for underlying physical operations. -#[macro_use] -extern crate nemo_physical; +pub extern crate nemo_physical; pub mod api; pub mod error; diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index b937c1a55..786a73cce 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -98,7 +98,7 @@ impl<'a> Parser<'a> { } } - /// Parse the input + /// Parse the input. pub fn parse(self) -> Result, (Program<'a>, ParserErrorReport<'a>)> { let parser_input = ParserInput::new(&self.input, self.state.clone()); diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index 011a4f7e5..a698aa6e4 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -89,14 +89,14 @@ impl ImportExportDirective { } /// Return a [HashMap] containing the attributes of this directive. - pub fn attribute_map(&self) -> HashMap { + pub fn attribute_map(&self) -> HashMap { let mut result = HashMap::new(); for (key, value) in self.attributes.key_value() { if let Some(name) = Self::plain_value(&key).and_then(|plain| ImportExportAttribute::from_name(&plain)) { - result.insert(name, value); + result.insert(name, value.clone()); } } @@ -390,7 +390,7 @@ impl ImportDirective { } /// Return the attributes. - pub fn attributes(&self) -> HashMap { + pub fn attributes(&self) -> HashMap { self.0.attribute_map() } @@ -473,7 +473,7 @@ impl ExportDirective { } /// Return the attributes. - pub fn attributes(&self) -> HashMap { + pub fn attributes(&self) -> HashMap { self.0.attribute_map() } diff --git a/nemo/src/rule_model/components/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs index 84c6d7e0e..1b5fbce86 100644 --- a/nemo/src/rule_model/components/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -33,7 +33,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::CSV)] #[assoc(extension = file_format::EXTENSION_CSV)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Resource, AttributeRequirement::Optional), (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), @@ -44,7 +44,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::DSV)] #[assoc(extension = file_format::EXTENSION_DSV)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Resource, AttributeRequirement::Optional), (ImportExportAttribute::Delimiter, AttributeRequirement::Required), (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), @@ -56,7 +56,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::TSV)] #[assoc(extension = file_format::EXTENSION_TSV)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Resource, AttributeRequirement::Optional), (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), (ImportExportAttribute::Compression, AttributeRequirement::Optional), @@ -67,7 +67,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::JSON)] #[assoc(extension = file_format::EXTENSION_JSON)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required) + (ImportExportAttribute::Resource, AttributeRequirement::Optional) ]))] #[assoc(arity = 3)] // TODO: In the future we probably want arbitrary arity here JSON, @@ -76,7 +76,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_NTRIPLES)] #[assoc(extension = file_format::EXTENSION_RDF_NTRIPLES)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Resource, AttributeRequirement::Optional), (ImportExportAttribute::Base, AttributeRequirement::Optional), (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), @@ -89,7 +89,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_NQUADS)] #[assoc(extension = file_format::EXTENSION_RDF_NQUADS)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Resource, AttributeRequirement::Optional), (ImportExportAttribute::Base, AttributeRequirement::Optional), (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), @@ -102,7 +102,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_TURTLE)] #[assoc(extension = file_format::EXTENSION_RDF_TURTLE)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Resource, AttributeRequirement::Optional), (ImportExportAttribute::Base, AttributeRequirement::Optional), (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), @@ -115,7 +115,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_XML)] #[assoc(extension = file_format::EXTENSION_RDF_XML)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Resource, AttributeRequirement::Optional), (ImportExportAttribute::Base, AttributeRequirement::Optional), (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), @@ -128,7 +128,7 @@ pub enum FileFormat { #[assoc(from_name = file_format::RDF_TRIG)] #[assoc(extension = file_format::EXTENSION_RDF_TRIG)] #[assoc(attributes = HashMap::from([ - (ImportExportAttribute::Resource, AttributeRequirement::Required), + (ImportExportAttribute::Resource, AttributeRequirement::Optional), (ImportExportAttribute::Base, AttributeRequirement::Optional), (ImportExportAttribute::Format, AttributeRequirement::Optional), (ImportExportAttribute::Limit, AttributeRequirement::Optional), diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index fb0fa0524..738501d31 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -42,7 +42,7 @@ impl Map { } } - /// Create a new [Map]. + /// Create a new [Map] without a name. pub fn new_unnamed>(map: Pairs) -> Self { Self { origin: Origin::Created, @@ -51,6 +51,24 @@ impl Map { } } + /// Create a new empty [Map]. + pub fn empty(name: &str) -> Self { + Self { + origin: Origin::Created, + tag: Some(Tag::new(name.to_string())), + map: Vec::default(), + } + } + + /// Create a new empty [Map]. + pub fn empty_unnamed() -> Self { + Self { + origin: Origin::Created, + tag: None, + map: Vec::default(), + } + } + /// Return the value type of this term. pub fn value_type(&self) -> ValueType { ValueType::Map diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index cdd64eed3..cc1947d2b 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -1,7 +1,7 @@ //! This module defines [Program]. use std::{ - collections::{hash_map::Entry, HashMap}, + collections::{hash_map::Entry, HashMap, HashSet}, fmt::Write, }; @@ -75,6 +75,42 @@ impl Program { self.outputs.iter() } + /// Return the set of all predicates that are defined by import statements. + pub fn import_predicates(&self) -> HashSet { + self.imports() + .map(|import| import.predicate().clone()) + .collect() + } + + /// Return the set of all predicates that can be derived by applying rules. + pub fn derived_predicates(&self) -> HashSet { + let rule_head = self + .rules() + .flat_map(|rule| rule.head()) + .map(|atom| atom.predicate().clone()); + let facts = self.facts().map(|fact| fact.predicate().clone()); + + rule_head.chain(facts).collect() + } + + /// Return the set of all predicates contained in this program. + pub fn all_predicates(&self) -> HashSet { + let rules = self.rules().flat_map(|rule| { + rule.head() + .iter() + .map(|atom| atom.predicate().clone()) + .chain(rule.body().iter().filter_map(|literal| match literal { + Literal::Positive(atom) | Literal::Negative(atom) => { + Some(atom.predicate().clone()) + } + Literal::Operation(_) => None, + })) + }); + let facts = self.facts().map(|fact| fact.predicate().clone()); + + rules.chain(facts).collect() + } + /// Return an iterator over all imports. pub fn imports_mut(&mut self) -> impl Iterator { self.imports.iter_mut() @@ -100,6 +136,39 @@ impl Program { self.outputs.iter_mut() } + /// Add a new export statement to the program. + pub fn add_export(&mut self, directive: ExportDirective) { + self.exports.push(directive); + } + + /// Add new export statements to the program. + pub fn add_exports>( + &mut self, + exports: Iterator, + ) { + self.exports.extend(exports.into_iter()) + } + + /// Remove all export statements + pub fn clear_exports(&mut self) { + self.exports.clear(); + } + + /// Remove all export statements + pub fn clear_imports(&mut self) { + self.imports.clear(); + } + + /// Add a new import statement to the program. + pub fn add_import(&mut self, directive: ImportDirective) { + self.imports.push(directive); + } + + /// Mark a predicate as an output predicate. + pub fn add_output(&mut self, predicate: Tag) { + self.outputs.push(Output::new(predicate)); + } + /// Check if a different arity was already used for the given predicate /// and report an error if this was the case. fn validate_arity( @@ -326,6 +395,6 @@ impl ProgramBuilder { /// Validate the current program. pub fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> { - self.program.validate(builder) + self.program.validate_global_properties(builder) } } From b3d82cd2c2a1236948ef224f3fc78a1af7e2ddd0 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 16 Sep 2024 13:23:21 +0200 Subject: [PATCH 160/214] Implement parse method for each program component --- nemo/src/parser.rs | 7 +-- nemo/src/rule_model/components.rs | 9 ++- nemo/src/rule_model/components/atom.rs | 26 +++++++-- nemo/src/rule_model/components/fact.rs | 11 +++- .../rule_model/components/import_export.rs | 20 +++++-- nemo/src/rule_model/components/literal.rs | 14 ++++- nemo/src/rule_model/components/output.rs | 31 +++++++++-- nemo/src/rule_model/components/parse.rs | 55 +++++++++++++++++++ nemo/src/rule_model/components/rule.rs | 24 +++++--- nemo/src/rule_model/components/term.rs | 18 ++++-- .../rule_model/components/term/aggregate.rs | 14 ++++- .../rule_model/components/term/function.rs | 24 +++++--- nemo/src/rule_model/components/term/map.rs | 24 +++++--- .../rule_model/components/term/operation.rs | 24 ++++++-- .../rule_model/components/term/primitive.rs | 30 +++++++--- .../components/term/primitive/ground.rs | 34 ++++++++++-- .../components/term/primitive/variable.rs | 21 +++++-- .../term/primitive/variable/existential.rs | 29 +++++++--- .../term/primitive/variable/universal.rs | 29 +++++++--- nemo/src/rule_model/components/term/tuple.rs | 24 ++++++-- nemo/src/rule_model/program.rs | 30 +++++++++- .../translation/directive/import_export.rs | 36 ++++++++---- nemo/src/rule_model/translation/rule.rs | 2 +- 23 files changed, 418 insertions(+), 118 deletions(-) create mode 100644 nemo/src/rule_model/components/parse.rs diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index 786a73cce..b1ef7c08e 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -36,7 +36,7 @@ pub type ParserResult<'a, Output> = IResult, Output, ParserError /// Parser for the nemo rule language #[derive(Debug)] pub struct Parser<'a> { - /// Reference to the text that is going to be parser + /// Reference to the text that is going to be parsed input: &'a str, /// Label of the input text, usually a path of the input file label: String, @@ -118,10 +118,5 @@ impl<'a> Parser<'a> { }, )) } - - // let error_tree = match transform_error_tree(Program::parse)(parser_input) { - // Ok((_input, program)) => return Ok(program), - // Err(error_tree) => error_tree, - // }; } } diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index 962d7c7a2..8ac9340f6 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -10,6 +10,7 @@ pub mod fact; pub mod import_export; pub mod literal; pub mod output; +pub mod parse; pub mod rule; pub mod tag; pub mod term; @@ -17,12 +18,10 @@ pub mod term; use std::fmt::{Debug, Display}; use enum_assoc::Assoc; +use parse::ComponentParseError; use term::primitive::{variable::Variable, Primitive}; -use super::{ - error::{ValidationError, ValidationErrorBuilder}, - origin::Origin, -}; +use super::{error::ValidationErrorBuilder, origin::Origin}; /// TODO: Think whether this is needed /// Types of [ProgramComponent]s @@ -106,7 +105,7 @@ pub trait ProgramComponent: Debug + Display { fn kind(&self) -> ProgramComponentKind; /// Construct this object from a string. - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized; diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index e696c920c..4aa2e87cb 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -2,12 +2,18 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, - origin::Origin, +use crate::{ + parse_component, + rule_model::{ + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, + translation::ASTProgramTranslation, + }, }; use super::{ + literal::Literal, + parse::ComponentParseError, tag::Tag, term::{ primitive::{variable::Variable, Primitive}, @@ -116,11 +122,21 @@ impl Hash for Atom { } impl ProgramComponent for Atom { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + let literal = parse_component!( + string, + crate::parser::ast::expression::Expression::parse_complex, + ASTProgramTranslation::build_body_literal + )?; + + if let Literal::Positive(atom) = literal { + return Ok(atom); + } + + Err(ComponentParseError::ParseError) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index 05ac184ba..eb3933075 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -7,14 +7,17 @@ use crate::{ atom::{ground_atom::GroundAtom, ChaseAtom}, ChaseComponent, }, + parse_component, rule_model::{ error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, + translation::ASTProgramTranslation, }, }; use super::{ atom::Atom, + parse::ComponentParseError, tag::Tag, term::{primitive::Primitive, Term}, IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, @@ -98,11 +101,15 @@ impl Hash for Fact { } impl ProgramComponent for Fact { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::Expression::parse_complex, + ASTProgramTranslation::build_head_atom + ).map(Fact::from) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index a698aa6e4..920e6121f 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -22,13 +22,17 @@ use crate::{ rdf::value_format::RdfValueFormat, Direction, }, + parse_component, + parser::ast::ProgramAST, rule_model::{ error::{hint::Hint, validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, + translation::ASTProgramTranslation, }, }; use super::{ + parse::ComponentParseError, tag::Tag, term::{map::Map, primitive::Primitive, Term}, ProgramComponent, ProgramComponentKind, @@ -416,11 +420,15 @@ impl Display for ImportDirective { } impl ProgramComponent for ImportDirective { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::directive::import::Import::parse, + ASTProgramTranslation::build_import + ) } fn origin(&self) -> &Origin { @@ -499,11 +507,15 @@ impl Display for ExportDirective { } impl ProgramComponent for ExportDirective { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::directive::export::Export::parse, + ASTProgramTranslation::build_export + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index 57df4db34..ed15b5a34 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -2,10 +2,14 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::error::{ValidationError, ValidationErrorBuilder}; +use crate::{ + parse_component, + rule_model::{error::ValidationErrorBuilder, translation::ASTProgramTranslation}, +}; use super::{ atom::Atom, + parse::ComponentParseError, term::{ operation::Operation, primitive::{variable::Variable, Primitive}, @@ -51,11 +55,15 @@ impl Display for Literal { } impl ProgramComponent for Literal { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::Expression::parse_complex, + ASTProgramTranslation::build_body_literal + ) } fn origin(&self) -> &crate::rule_model::origin::Origin { diff --git a/nemo/src/rule_model/components/output.rs b/nemo/src/rule_model/components/output.rs index 153245aa8..55f9073cb 100644 --- a/nemo/src/rule_model/components/output.rs +++ b/nemo/src/rule_model/components/output.rs @@ -2,9 +2,12 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{error::ValidationErrorBuilder, origin::Origin}; +use crate::{ + parser::ast::ProgramAST, + rule_model::{error::ValidationErrorBuilder, origin::Origin}, +}; -use super::{tag::Tag, ProgramComponent, ProgramComponentKind}; +use super::{parse::ComponentParseError, tag::Tag, ProgramComponent, ProgramComponentKind}; /// Output directive /// @@ -52,11 +55,31 @@ impl Hash for Output { } impl ProgramComponent for Output { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + use nom::InputLength; + + let input = + crate::parser::input::ParserInput::new(string, crate::parser::ParserState::default()); + let ast = match crate::parser::ast::directive::output::Output::parse(input) { + Ok((input, ast)) => { + if input.input_len() == 0 { + ast + } else { + return Err(ComponentParseError::ParseError); + } + } + Err(_) => return Err(ComponentParseError::ParseError), + }; + + if let Some(predicate) = ast.predicates().next() { + let tag = Tag::new(predicate.to_string()); + return Ok(Output::new(tag)); + } + + Err(ComponentParseError::ParseError) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/parse.rs b/nemo/src/rule_model/components/parse.rs new file mode 100644 index 000000000..6a9eaf340 --- /dev/null +++ b/nemo/src/rule_model/components/parse.rs @@ -0,0 +1,55 @@ +//! This module implements utility functions for parsing program components. + +use std::fmt::{Display, Pointer}; + +use crate::rule_model::error::TranslationError; + +#[derive(Debug)] +pub enum ComponentParseError { + /// Parse Error + ParseError, + /// Translation Error + TranslationError(TranslationError), +} + +impl Display for ComponentParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ComponentParseError::ParseError => f.write_str("error while parsing string"), + ComponentParseError::TranslationError(error) => error.fmt(f), + } + } +} + +#[macro_export] +macro_rules! parse_component { + ($string:expr, $parser:expr, $builder:expr) => {{ + use nom::InputLength; + + let input = + crate::parser::input::ParserInput::new($string, crate::parser::ParserState::default()); + let ast = match $parser(input) { + Ok((input, ast)) => { + if input.input_len() == 0 { + ast + } else { + return Err( + crate::rule_model::components::parse::ComponentParseError::ParseError, + ); + } + } + Err(_) => { + return Err(crate::rule_model::components::parse::ComponentParseError::ParseError) + } + }; + + let mut translation = ASTProgramTranslation::initialize($string, String::default()); + + match $builder(&mut translation, &ast) { + Ok(component) => Ok(component), + Err(error) => Err( + crate::rule_model::components::parse::ComponentParseError::TranslationError(error), + ), + } + }}; +} diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 270fddc17..8031d3fc7 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -2,17 +2,23 @@ use std::{collections::HashSet, fmt::Display, hash::Hash}; -use crate::rule_model::{ - error::{ - hint::Hint, info::Info, validation_error::ValidationErrorKind, ComplexErrorLabelKind, - ValidationErrorBuilder, +use crate::{ + parse_component, + parser::ast::ProgramAST, + rule_model::{ + error::{ + hint::Hint, info::Info, validation_error::ValidationErrorKind, ComplexErrorLabelKind, + ValidationErrorBuilder, + }, + origin::Origin, + translation::ASTProgramTranslation, }, - origin::Origin, }; use super::{ atom::Atom, literal::Literal, + parse::ComponentParseError, term::{ operation::Operation, primitive::{variable::Variable, Primitive}, @@ -264,11 +270,15 @@ impl Hash for Rule { } impl ProgramComponent for Rule { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::rule::Rule::parse, + ASTProgramTranslation::build_rule + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term.rs b/nemo/src/rule_model/components/term.rs index 449874218..1afcfc39d 100644 --- a/nemo/src/rule_model/components/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -30,12 +30,14 @@ use primitive::{ use tuple::Tuple; use value_type::ValueType; -use crate::rule_model::{ - error::{ValidationError, ValidationErrorBuilder}, - origin::Origin, +use crate::{ + parse_component, + rule_model::{ + error::ValidationErrorBuilder, origin::Origin, translation::ASTProgramTranslation, + }, }; -use super::{IterablePrimitives, IterableVariables, ProgramComponent}; +use super::{parse::ComponentParseError, IterablePrimitives, IterableVariables, ProgramComponent}; /// Term /// @@ -242,11 +244,15 @@ impl Display for Term { } impl ProgramComponent for Term { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::Expression::parse_complex, + ASTProgramTranslation::build_inner_term + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 22aa8a5bd..868116898 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -8,12 +8,16 @@ use nemo_physical::aggregates::operation::AggregateOperation; use strum_macros::EnumIter; use crate::{ + parse_component, + parser::ast::ProgramAST, rule_model::{ components::{ - IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, + parse::ComponentParseError, IterablePrimitives, IterableVariables, ProgramComponent, + ProgramComponentKind, }, error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, + translation::ASTProgramTranslation, }, syntax::builtin::aggregate, }; @@ -201,11 +205,15 @@ impl PartialOrd for Aggregate { } impl ProgramComponent for Aggregate { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::complex::aggregation::Aggregation::parse, + ASTProgramTranslation::build_aggregation + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index 1be67ef1a..660cc26ad 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -2,12 +2,18 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - components::{ - tag::Tag, IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, +use crate::{ + parse_component, + parser::ast::ProgramAST, + rule_model::{ + components::{ + parse::ComponentParseError, tag::Tag, IterablePrimitives, IterableVariables, + ProgramComponent, ProgramComponentKind, + }, + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, + translation::ASTProgramTranslation, }, - error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, - origin::Origin, }; use super::{ @@ -120,11 +126,15 @@ impl Hash for FunctionTerm { } impl ProgramComponent for FunctionTerm { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::complex::atom::Atom::parse, + ASTProgramTranslation::build_function + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index 738501d31..fdc652116 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -2,12 +2,18 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - components::{ - tag::Tag, IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind, +use crate::{ + parse_component, + parser::ast::ProgramAST, + rule_model::{ + components::{ + parse::ComponentParseError, tag::Tag, IterablePrimitives, IterableVariables, + ProgramComponent, ProgramComponentKind, + }, + error::ValidationErrorBuilder, + origin::Origin, + translation::ASTProgramTranslation, }, - error::ValidationErrorBuilder, - origin::Origin, }; use super::{ @@ -136,11 +142,15 @@ impl Hash for Map { } impl ProgramComponent for Map { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::complex::map::Map::parse, + ASTProgramTranslation::build_map + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index 8870dd1e5..8da33ac12 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -6,10 +6,18 @@ use std::{fmt::Display, hash::Hash}; use operation_kind::OperationKind; -use crate::rule_model::{ - components::{IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind}, - error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, - origin::Origin, +use crate::{ + parse_component, + parser::ast::ProgramAST, + rule_model::{ + components::{ + parse::ComponentParseError, IterablePrimitives, IterableVariables, ProgramComponent, + ProgramComponentKind, + }, + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, + translation::ASTProgramTranslation, + }, }; use super::{ @@ -196,11 +204,15 @@ impl Hash for Operation { } impl ProgramComponent for Operation { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::complex::operation::Operation::parse, + ASTProgramTranslation::build_operation + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/primitive.rs b/nemo/src/rule_model/components/term/primitive.rs index df26357a4..01d075f7f 100644 --- a/nemo/src/rule_model/components/term/primitive.rs +++ b/nemo/src/rule_model/components/term/primitive.rs @@ -9,13 +9,19 @@ use ground::GroundTerm; use nemo_physical::datavalues::AnyDataValue; use variable::{existential::ExistentialVariable, universal::UniversalVariable, Variable}; -use crate::rule_model::{ - components::{IterableVariables, ProgramComponent, ProgramComponentKind}, - error::ValidationErrorBuilder, - origin::Origin, +use crate::{ + parse_component, + rule_model::{ + components::{ + parse::ComponentParseError, IterableVariables, ProgramComponent, ProgramComponentKind, + }, + error::ValidationErrorBuilder, + origin::Origin, + translation::ASTProgramTranslation, + }, }; -use super::value_type::ValueType; +use super::{value_type::ValueType, Term}; /// Primitive term /// @@ -114,11 +120,21 @@ impl Display for Primitive { } impl ProgramComponent for Primitive { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + let term = parse_component!( + string, + crate::parser::ast::expression::Expression::parse_complex, + ASTProgramTranslation::build_inner_term + )?; + + if let Term::Primitive(primitive) = term { + return Ok(primitive); + } + + Err(ComponentParseError::ParseError) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/primitive/ground.rs b/nemo/src/rule_model/components/term/primitive/ground.rs index fee6d32e5..5c9032af1 100644 --- a/nemo/src/rule_model/components/term/primitive/ground.rs +++ b/nemo/src/rule_model/components/term/primitive/ground.rs @@ -4,12 +4,22 @@ use std::{fmt::Display, hash::Hash}; use nemo_physical::datavalues::{AnyDataValue, DataValue, IriDataValue, ValueDomain}; -use crate::rule_model::{ - components::{term::value_type::ValueType, ProgramComponent, ProgramComponentKind}, - error::{ValidationError, ValidationErrorBuilder}, - origin::Origin, +use crate::{ + parse_component, + rule_model::{ + components::{ + parse::ComponentParseError, + term::{value_type::ValueType, Term}, + ProgramComponent, ProgramComponentKind, + }, + error::ValidationErrorBuilder, + origin::Origin, + translation::ASTProgramTranslation, + }, }; +use super::Primitive; + /// Primitive ground term /// /// Represents a basic, indivisble constant value like integers, or strings. @@ -126,11 +136,23 @@ impl Hash for GroundTerm { } impl ProgramComponent for GroundTerm { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + let term = parse_component!( + string, + crate::parser::ast::expression::Expression::parse_basic, + ASTProgramTranslation::build_inner_term + )?; + + if let Term::Primitive(primitive) = term { + if let Primitive::Ground(ground) = primitive { + return Ok(ground); + } + } + + Err(ComponentParseError::ParseError) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs index ff4ed6850..4b15d4e15 100644 --- a/nemo/src/rule_model/components/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -5,10 +5,15 @@ use std::fmt::Display; use existential::ExistentialVariable; use universal::UniversalVariable; -use crate::rule_model::{ - components::ProgramComponentKind, - error::{ValidationError, ValidationErrorBuilder}, - origin::Origin, +use crate::{ + parse_component, + parser::ast::ProgramAST, + rule_model::{ + components::{parse::ComponentParseError, ProgramComponentKind}, + error::ValidationErrorBuilder, + origin::Origin, + translation::ASTProgramTranslation, + }, }; use super::ProgramComponent; @@ -115,11 +120,15 @@ impl Display for Variable { } impl ProgramComponent for Variable { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::basic::variable::Variable::parse, + ASTProgramTranslation::build_variable + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/primitive/variable/existential.rs b/nemo/src/rule_model/components/term/primitive/variable/existential.rs index 50706878f..d95044eeb 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/existential.rs @@ -2,13 +2,18 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - components::{ProgramComponent, ProgramComponentKind}, - error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, - origin::Origin, +use crate::{ + parse_component, + parser::ast::ProgramAST, + rule_model::{ + components::{parse::ComponentParseError, ProgramComponent, ProgramComponentKind}, + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, + translation::ASTProgramTranslation, + }, }; -use super::VariableName; +use super::{Variable, VariableName}; /// Existentially quantified variable /// @@ -67,11 +72,21 @@ impl Hash for ExistentialVariable { } impl ProgramComponent for ExistentialVariable { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + let variable = parse_component!( + string, + crate::parser::ast::expression::basic::variable::Variable::parse, + ASTProgramTranslation::build_variable + )?; + + if let Variable::Existential(existential) = variable { + return Ok(existential); + } + + Err(ComponentParseError::ParseError) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/primitive/variable/universal.rs b/nemo/src/rule_model/components/term/primitive/variable/universal.rs index 003420c3e..f22847969 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/universal.rs @@ -2,13 +2,18 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - components::{ProgramComponent, ProgramComponentKind}, - error::{validation_error::ValidationErrorKind, ValidationError, ValidationErrorBuilder}, - origin::Origin, +use crate::{ + parse_component, + parser::ast::ProgramAST, + rule_model::{ + components::{parse::ComponentParseError, ProgramComponent, ProgramComponentKind}, + error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, + origin::Origin, + translation::ASTProgramTranslation, + }, }; -use super::VariableName; +use super::{Variable, VariableName}; /// Universally quantified variable /// @@ -90,8 +95,18 @@ impl Hash for UniversalVariable { } impl ProgramComponent for UniversalVariable { - fn parse(_string: &str) -> Result { - todo!() + fn parse(string: &str) -> Result { + let variable = parse_component!( + string, + crate::parser::ast::expression::basic::variable::Variable::parse, + ASTProgramTranslation::build_variable + )?; + + if let Variable::Universal(universal) = variable { + return Ok(universal); + } + + Err(ComponentParseError::ParseError) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/components/term/tuple.rs b/nemo/src/rule_model/components/term/tuple.rs index e0b9ac71f..3439f11ae 100644 --- a/nemo/src/rule_model/components/term/tuple.rs +++ b/nemo/src/rule_model/components/term/tuple.rs @@ -2,10 +2,18 @@ use std::{fmt::Display, hash::Hash}; -use crate::rule_model::{ - components::{IterablePrimitives, IterableVariables, ProgramComponent, ProgramComponentKind}, - error::ValidationErrorBuilder, - origin::Origin, +use crate::{ + parse_component, + parser::ast::ProgramAST, + rule_model::{ + components::{ + parse::ComponentParseError, IterablePrimitives, IterableVariables, ProgramComponent, + ProgramComponentKind, + }, + error::ValidationErrorBuilder, + origin::Origin, + translation::ASTProgramTranslation, + }, }; use super::{ @@ -96,11 +104,15 @@ impl PartialOrd for Tuple { } impl ProgramComponent for Tuple { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + parse_component!( + string, + crate::parser::ast::expression::complex::tuple::Tuple::parse, + ASTProgramTranslation::build_tuple + ) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index cc1947d2b..769a0ba8c 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -5,7 +5,12 @@ use std::{ fmt::Write, }; -use crate::rule_model::components::tag::Tag; +use nom::InputLength; + +use crate::{ + parser::{ast::ProgramAST, input::ParserInput}, + rule_model::components::tag::Tag, +}; use super::{ components::{ @@ -13,6 +18,7 @@ use super::{ import_export::{ExportDirective, ImportDirective}, literal::Literal, output::Output, + parse::ComponentParseError, rule::Rule, ProgramComponent, ProgramComponentKind, }, @@ -21,6 +27,7 @@ use super::{ ValidationErrorBuilder, }, origin::Origin, + translation::ASTProgramTranslation, }; /// Representation of a nemo program @@ -277,11 +284,28 @@ impl Program { } impl ProgramComponent for Program { - fn parse(_string: &str) -> Result + fn parse(string: &str) -> Result where Self: Sized, { - todo!() + let input = ParserInput::new(string, crate::parser::ParserState::default()); + let ast = match crate::parser::ast::program::Program::parse(input) { + Ok((input, ast)) => { + if input.input_len() == 0 { + ast + } else { + return Err(ComponentParseError::ParseError); + } + } + Err(_) => return Err(ComponentParseError::ParseError), + }; + + let translation = ASTProgramTranslation::initialize(string, String::default()); + + match translation.translate(&ast) { + Ok(program) => Ok(program), + Err(_report) => Err(ComponentParseError::ParseError), // TODO: Skip validation + } } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/translation/directive/import_export.rs b/nemo/src/rule_model/translation/directive/import_export.rs index b84dc924c..42d3f7385 100644 --- a/nemo/src/rule_model/translation/directive/import_export.rs +++ b/nemo/src/rule_model/translation/directive/import_export.rs @@ -97,20 +97,28 @@ impl<'a> ASTProgramTranslation<'a> { } } - /// Handle a import ast node. - pub fn handle_import( + /// Given a [ast::directive::import::Import], build an [ImportDirective]. + pub fn build_import( &mut self, import: &'a ast::directive::import::Import, - ) -> Result<(), TranslationError> { + ) -> Result { let predicate = Tag::new(self.resolve_tag(import.predicate())?) .set_origin(self.register_node(import.predicate())); let attributes = self.build_map(import.instructions())?; let file_format = self.import_export_format(import.instructions())?; - let import_directive = self.register_component( + Ok(self.register_component( ImportDirective::new(predicate, file_format, attributes), import, - ); + )) + } + + /// Handle a import ast node. + pub fn handle_import( + &mut self, + import: &'a ast::directive::import::Import, + ) -> Result<(), TranslationError> { + let import_directive = self.build_import(import)?; let _ = import_directive.validate(&mut self.validation_error_builder); self.program_builder.add_import(import_directive); @@ -118,20 +126,28 @@ impl<'a> ASTProgramTranslation<'a> { Ok(()) } - /// Handle a export ast node. - pub fn handle_export( + /// Given a [ast::directive::export::Export], builds a [ExportDirective]. + pub fn build_export( &mut self, export: &'a ast::directive::export::Export, - ) -> Result<(), TranslationError> { + ) -> Result { let predicate = Tag::new(self.resolve_tag(export.predicate())?) .set_origin(self.register_node(export.predicate())); let attributes = self.build_map(export.instructions())?; let file_format = self.import_export_format(export.instructions())?; - let export_directive = self.register_component( + Ok(self.register_component( ExportDirective::new(predicate, file_format, attributes), export, - ); + )) + } + + /// Handle a export ast node. + pub fn handle_export( + &mut self, + export: &'a ast::directive::export::Export, + ) -> Result<(), TranslationError> { + let export_directive = self.build_export(export)?; let _ = export_directive.validate(&mut self.validation_error_builder); self.program_builder.add_export(export_directive); diff --git a/nemo/src/rule_model/translation/rule.rs b/nemo/src/rule_model/translation/rule.rs index 40669531e..768d5080f 100644 --- a/nemo/src/rule_model/translation/rule.rs +++ b/nemo/src/rule_model/translation/rule.rs @@ -40,7 +40,7 @@ impl<'a> ASTProgramTranslation<'a> { } /// Create a body [Literal] from the corresponding ast node. - fn build_body_literal( + pub(crate) fn build_body_literal( &mut self, body: &'a ast::expression::Expression<'a>, ) -> Result { From 5b34cc291308b884963660836a2eb5f1eed6722f Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Thu, 12 Sep 2024 16:09:43 +0200 Subject: [PATCH 161/214] Adjust language server and wasm for new parser --- nemo-language-server/src/language_server.rs | 203 ++++++++++-------- .../src/language_server/lsp_component.rs | 136 ++++++++++++ .../src/language_server/nemo_position.rs | 23 +- nemo-wasm/src/lib.rs | 128 ++++++----- nemo/src/io/formats/dsv.rs | 6 +- nemo/src/io/formats/dsv/value_format.rs | 4 +- nemo/src/parser.rs | 6 +- nemo/src/parser/error.rs | 8 +- nemo/src/parser/lsp.rs | 37 ---- nemo/src/parser/span.rs | 2 +- nemo/src/rule_model/components/fact.rs | 3 +- nemo/src/rule_model/error.rs | 12 +- nemo/src/rule_model/error/hint.rs | 2 +- nemo/src/rule_model/error/validation_error.rs | 2 +- nemo/src/rule_model/translation.rs | 5 + 15 files changed, 372 insertions(+), 205 deletions(-) create mode 100644 nemo-language-server/src/language_server/lsp_component.rs delete mode 100644 nemo/src/parser/lsp.rs diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index bf2379d0c..f1336f3a0 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -1,13 +1,21 @@ +mod lsp_component; +mod nemo_position; + use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::vec; use anyhow::anyhow; use futures::lock::Mutex; use line_index::{LineCol, LineIndex, WideEncoding}; -use nemo::io::parser::ast::program::Program; -use nemo::io::parser::ast::{AstNode, Position}; -use nemo::io::parser::parse_program_str; -use nemo_position::{lsp_position_to_nemo_position, PositionConversionError}; +use lsp_component::LSPComponent; +use nemo::parser::ast::program::Program; +use nemo::parser::ast::ProgramAST; +use nemo::parser::context::ParserContext; +use nemo::parser::span::CharacterPosition; +use nemo::parser::{Parser, ParserErrorReport}; +use nemo_position::{ + lsp_position_to_nemo_position, nemo_range_to_lsp_range, PositionConversionError, +}; use tower_lsp::lsp_types::{ Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, DocumentChangeOperation, DocumentChanges, DocumentSymbol, DocumentSymbolOptions, DocumentSymbolParams, @@ -19,10 +27,6 @@ use tower_lsp::lsp_types::{ }; use tower_lsp::{Client, LanguageServer}; -use self::nemo_position::nemo_range_to_lsp_range; - -mod nemo_position; - #[derive(Debug)] pub struct Backend { client: Client, @@ -90,15 +94,22 @@ impl Backend { let line_index = LineIndex::new(text); - let (_program, errors) = parse_program_str(text); + let (_program, errors): (Program, Option) = + Parser::initialize(text, text_document.uri.to_string()) + .parse() + .map(|prg| (prg, None)) + .unwrap_or_else(|(prg, err)| (prg, Some(err))); // Group errors by position and deduplicate error - let mut errors_by_posision: BTreeMap> = BTreeMap::new(); - for error in errors { - if let Some(set) = errors_by_posision.get_mut(&error.pos) { - set.insert(error.msg.clone()); + let mut errors_by_posision: BTreeMap> = BTreeMap::new(); + for error in errors.iter().flat_map(|report| report.errors()) { + if let Some(set) = errors_by_posision.get_mut(&error.position) { + set.insert(format!("expected `{}`", error.context[0].name())); } else { - errors_by_posision.insert(error.pos, std::iter::once(error.msg.clone()).collect()); + errors_by_posision.insert( + error.position, + std::iter::once(format!("expected `{}`", error.context[0].name())).collect(), + ); }; } @@ -250,7 +261,13 @@ impl LanguageServer for Backend { .map_err(Into::into) .map_err(jsonrpc_error)?; - let (program, _) = parse_program_str(&text); + let (program, _): (Program, Option) = Parser::initialize( + &text, + params.text_document_position.text_document.uri.to_string(), + ) + .parse() + .map(|prg| (prg, None)) + .unwrap_or_else(|(prg, err)| (prg, Some(err))); let node_path = find_in_ast(&program, position); @@ -292,17 +309,19 @@ impl LanguageServer for Backend { let text = info.text; let line_index = LineIndex::new(&text); - let (program, _) = parse_program_str(&text); + let (program, _): (Program, Option) = + Parser::initialize(&text, params.text_document.uri.to_string()) + .parse() + .map(|prg| (prg, None)) + .unwrap_or_else(|(prg, err)| (prg, Some(err))); - let document_symbol = ast_node_to_document_symbol(&line_index, &program) + let document_symbols = ast_node_to_document_symbol(&line_index, &program) .map_err(Into::into) .map_err(jsonrpc_error)? .ok_or(anyhow!("program has no document symbol")) .map_err(jsonrpc_error)?; - Ok(Some(DocumentSymbolResponse::Nested( - document_symbol.children.unwrap_or(vec![]), - ))) + Ok(Some(DocumentSymbolResponse::Nested(document_symbols))) } /// Finds references to symbol that was renamed and sends edit operations to language client @@ -322,7 +341,13 @@ impl LanguageServer for Backend { .map_err(Into::into) .map_err(jsonrpc_error)?; - let (program, _) = parse_program_str(&text); + let (program, _): (Program, Option) = Parser::initialize( + &text, + params.text_document_position.text_document.uri.to_string(), + ) + .parse() + .map(|prg| (prg, None)) + .unwrap_or_else(|(prg, err)| (prg, Some(err))); let node_path = find_in_ast(&program, position); @@ -345,7 +370,7 @@ impl LanguageServer for Backend { edits: referenced_nodes .into_iter() .filter_map(|node| { - node.lsp_range_to_rename().map(|renamed_node_range| { + node.range_renaming().map(|renamed_node_range| { Ok({ OneOf::Left(TextEdit { range: nemo_range_to_lsp_range(&line_index, renamed_node_range) @@ -383,7 +408,11 @@ impl LanguageServer for Backend { .map_err(Into::into) .map_err(jsonrpc_error)?; - let (program, _) = parse_program_str(&text); + let (program, _): (Program, Option) = + Parser::initialize(&text, params.text_document.uri.to_string()) + .parse() + .map(|prg| (prg, None)) + .unwrap_or_else(|(prg, err)| (prg, Some(err))); let node_path = find_in_ast(&program, position); @@ -396,7 +425,7 @@ impl LanguageServer for Backend { &line_index, indentified_node .node - .lsp_range_to_rename() + .range_renaming() .ok_or_else(|| anyhow!("identified node can not be renamed")) .map_err(jsonrpc_error)?, ) @@ -414,43 +443,45 @@ impl LanguageServer for Backend { fn node_with_range<'a>( line_index: &LineIndex, - node: &'a dyn AstNode, -) -> Option<(&'a dyn AstNode, Range)> { - nemo_range_to_lsp_range(line_index, node.range()) - .map(|range| (node, range)) // TODO: Print error, + node: &'a dyn ProgramAST<'a>, +) -> Option<(&'a dyn ProgramAST<'a>, Range)> { + nemo_range_to_lsp_range(line_index, node.span().range()) + .map(|range| (node, range)) // TODO: Handle error .ok() } struct IdentifiedNode<'a> { - node: &'a dyn AstNode, - identifier: String, - scoping_node: &'a dyn AstNode, + node: &'a dyn ProgramAST<'a>, + identifier: (ParserContext, String), + scoping_node: &'a dyn ProgramAST<'a>, } struct PariallyIdentifiedNode<'a> { - node: &'a dyn AstNode, - identifier: String, - identifier_scope: String, + node: &'a dyn ProgramAST<'a>, + identifier: (ParserContext, String), + identifier_scope: ParserContext, } /// Get identifier most specific to the position of the node path -fn node_path_deepest_identifier<'a>(node_path: &[&'a dyn AstNode]) -> Option> { +fn node_path_deepest_identifier<'a>( + node_path: &[&'a dyn ProgramAST<'a>], +) -> Option> { let mut info = None; for node in node_path.iter().rev() { match info { None => { - if let Some((identifier, identifier_scope)) = node.lsp_identifier() { + if let Some(lsp_ident) = node.identifier() { info = Some(PariallyIdentifiedNode { node: *node, - identifier, - identifier_scope, + identifier: lsp_ident.identifier().clone(), + identifier_scope: *lsp_ident.scope(), }); } } Some(ref info) => { - if let Some(parent_identifier) = node.lsp_identifier() - && parent_identifier.0.starts_with(&info.identifier_scope) + if let Some(parent_identifier) = node.identifier() + && parent_identifier.identifier().0 == info.identifier_scope { return Some(IdentifiedNode { node: info.node, @@ -469,7 +500,10 @@ fn node_path_deepest_identifier<'a>(node_path: &[&'a dyn AstNode]) -> Option(node: &'a dyn AstNode, identifier: &str) -> Vec<&'a dyn AstNode> { +fn find_by_identifier<'a>( + node: &'a dyn ProgramAST<'a>, + identifier: &(ParserContext, String), +) -> Vec<&'a dyn ProgramAST<'a>> { let mut references = Vec::new(); find_by_identifier_recurse(node, identifier, &mut references); @@ -478,26 +512,27 @@ fn find_by_identifier<'a>(node: &'a dyn AstNode, identifier: &str) -> Vec<&'a dy } fn find_by_identifier_recurse<'a>( - node: &'a dyn AstNode, - identifier: &str, - references: &mut Vec<&'a dyn AstNode>, + node: &'a dyn ProgramAST<'a>, + identifier: &(ParserContext, String), + references: &mut Vec<&'a dyn ProgramAST<'a>>, ) { if node - .lsp_identifier() - .map(|(i, _)| i == identifier) + .identifier() + .map(|ident| ident.identifier() == identifier) .unwrap_or(false) { references.push(node); } - if let Some(children) = node.children() { - for child in children { - find_by_identifier_recurse(child, identifier, references); - } - }; + for child in node.children() { + find_by_identifier_recurse(child, identifier, references); + } } -fn find_in_ast<'a>(node: &'a Program<'a>, position: Position) -> Vec<&'a dyn AstNode> { +fn find_in_ast<'a>( + node: &'a Program<'a>, + position: CharacterPosition, +) -> Vec<&'a dyn ProgramAST<'a>> { let mut path = Vec::new(); find_in_ast_recurse(node, position, &mut path); @@ -506,60 +541,60 @@ fn find_in_ast<'a>(node: &'a Program<'a>, position: Position) -> Vec<&'a dyn Ast } fn find_in_ast_recurse<'a>( - node: &'a dyn AstNode, - position: Position, - path: &mut Vec<&'a dyn AstNode>, + node: &'a dyn ProgramAST<'a>, + position: CharacterPosition, + path: &mut Vec<&'a dyn ProgramAST<'a>>, ) { path.push(node); - for child in node.children().iter().flatten() { - let range = child.range(); + for child in node.children() { + let range = child.span().range(); if range.start <= position && position < range.end { - find_in_ast_recurse(*child, position, path); + find_in_ast_recurse(child, position, path); break; // Assume no nodes overlap } } } -fn ast_node_to_document_symbol( +fn ast_node_to_document_symbol<'a>( line_index: &LineIndex, - node: &dyn AstNode, -) -> Result, PositionConversionError> { - let range = nemo_range_to_lsp_range(line_index, node.range())?; - - if let Some((name, kind)) = node.lsp_symbol_info() { - let children_results: Vec<_> = node - .children() + node: &'a dyn ProgramAST<'a>, +) -> Result>, PositionConversionError> { + let range = nemo_range_to_lsp_range(line_index, node.span().range())?; + + let children_results: Vec<_> = node + .children() + .into_iter() + .map(|child| ast_node_to_document_symbol(line_index, child)) + .collect(); + let mut children = Vec::with_capacity(children_results.len()); + for child_result in children_results { + child_result? .into_iter() .flatten() - .map(|child| ast_node_to_document_symbol(line_index, child)) - .collect(); - let mut children = Vec::with_capacity(children_results.len()); - for child_result in children_results { - child_result? - .into_iter() - .for_each(|symbol| children.push(symbol)) - } - let children = if children.is_empty() { - None - } else { - Some(children) - }; + .for_each(|symbol| children.push(symbol)) + } + let children = if children.is_empty() { + None + } else { + Some(children) + }; - Ok(Some( + if let Some(symb_info) = node.symbol_info() { + Ok(Some(vec![ #[allow(deprecated)] DocumentSymbol { children, detail: None, - kind, - name, + kind: *symb_info.kind(), + name: symb_info.name().to_string(), range, selection_range: range, tags: None, deprecated: None, }, - )) + ])) } else { - Ok(None) + Ok(children) } } diff --git a/nemo-language-server/src/language_server/lsp_component.rs b/nemo-language-server/src/language_server/lsp_component.rs new file mode 100644 index 000000000..3b78e79a1 --- /dev/null +++ b/nemo-language-server/src/language_server/lsp_component.rs @@ -0,0 +1,136 @@ +//! This module defines traits and data structures +//! relating to the language server protocol support. + +use nemo::parser::{ast::ProgramAST, context::ParserContext, span::CharacterRange}; +use tower_lsp::lsp_types::SymbolKind; + +/// An LSP Identifier +#[derive(Debug)] +pub(super) struct LSPIdentifier { + identifier: (ParserContext, String), + scope: ParserContext, +} + +impl LSPIdentifier { + /// Get Indentifier String of [`LSPIdentifier`] + pub(super) fn identifier(&self) -> &(ParserContext, String) { + &self.identifier + } + + /// Get Scope String of [`LSPIdentifier`] + pub(super) fn scope(&self) -> &ParserContext { + &self.scope + } +} + +/// Information about the symbol +#[derive(Debug)] +pub(super) struct LSPSymbolInfo { + name: String, + kind: SymbolKind, +} + +impl LSPSymbolInfo { + /// Get Name of [`LSPSymbolInfo`] + pub(super) fn name(&self) -> &str { + &self.name + } + + /// Get [`SymbolKind`] of [`LSPSymbolInfo`] + pub(super) fn kind(&self) -> &SymbolKind { + &self.kind + } +} + +/// Trait implemented by objects that correspond to +/// that correspond to objects identified by the LSP +pub(super) trait LSPComponent { + /// Return a an [`LSPIdentifier`]. + /// + /// The identifier scope will scope this identifier up to any [`LSPComponent`] + /// that has the identifier scope as its type (aka. context). + /// + /// This can be used to restict rename operations to be local, e.g. for variable idenfiers inside of rules. + fn identifier(&self) -> Option; + + /// Return information about this symbol, e.g. for syntax highlighting + fn symbol_info(&self) -> Option; + + /// Range of the part of the node that should be renamed or [None] if the node can not be renamed + fn range_renaming(&self) -> Option; +} + +impl<'a, T: ?Sized> LSPComponent for T +where + T: ProgramAST<'a>, +{ + fn identifier(&self) -> Option { + let scope = match self.context() { + ParserContext::Number | ParserContext::Variable | ParserContext::RdfLiteral => { + Some(ParserContext::Rule) + } + ParserContext::Iri + | ParserContext::Constant + | ParserContext::String + | ParserContext::StructureTag + | ParserContext::Rule + | ParserContext::Prefix => Some(ParserContext::Program), + _ => None, + }; + + scope.map(|scope| LSPIdentifier { + scope, + identifier: (self.context(), self.span().0.fragment().to_string()), + }) + } + + fn symbol_info(&self) -> Option { + let kind = match self.context() { + ParserContext::DataType => Some(SymbolKind::TYPE_PARAMETER), + ParserContext::Variable => Some(SymbolKind::VARIABLE), + ParserContext::Iri => Some(SymbolKind::STRING), + ParserContext::Constant | ParserContext::RdfLiteral | ParserContext::Blank => { + Some(SymbolKind::CONSTANT) + } + ParserContext::StructureTag => Some(SymbolKind::FUNCTION), + ParserContext::Number => Some(SymbolKind::NUMBER), + ParserContext::String => Some(SymbolKind::STRING), + ParserContext::Boolean => Some(SymbolKind::BOOLEAN), + ParserContext::Base + | ParserContext::Declare + | ParserContext::Export + | ParserContext::Import + | ParserContext::Output + | ParserContext::Prefix + | ParserContext::UnknownDirective => Some(SymbolKind::PROPERTY), + ParserContext::Arithmetic + | ParserContext::Negation + | ParserContext::AggregationTag + | ParserContext::OperationTag + | ParserContext::Infix => Some(SymbolKind::OPERATOR), + ParserContext::Program => Some(SymbolKind::FILE), + _ => None, + }; + + kind.map(|kind| LSPSymbolInfo { + kind, + name: format!("{}: {}", self.context().name(), self.span().0.fragment()), + }) + } + + fn range_renaming(&self) -> Option { + let allows_renaming = matches!( + self.context(), + ParserContext::Variable + | ParserContext::Iri + | ParserContext::Constant + | ParserContext::Number + | ParserContext::String + | ParserContext::RdfLiteral + | ParserContext::StructureTag + | ParserContext::Prefix + ); + + allows_renaming.then_some(self.span().range()) + } +} diff --git a/nemo-language-server/src/language_server/nemo_position.rs b/nemo-language-server/src/language_server/nemo_position.rs index 782016991..5f0f0742e 100644 --- a/nemo-language-server/src/language_server/nemo_position.rs +++ b/nemo-language-server/src/language_server/nemo_position.rs @@ -1,19 +1,21 @@ //! LSP position: //! //! * line: u32 index of the line, first line gets index 0 -//! * offset: u32 index of the UTF-16 code point within the line, first column gets index 0 +//! * character: u32 index of the UTF-16 code point within the line, first column gets index 0 //! //! Nemo position: //! //! * line: u32 index of the line, first line gets index 1 -//! * offset: u32 index of the UTF-8 code point (byte) within the line, first column gets index 0 +//! * column: u32 index of the UTF-8 character within the line, first column gets index 1 +//! * offset: usize index of the UTF-8 code point (byte) from the start of the parser input (0-indexed) use anyhow::anyhow; use line_index::{LineCol, LineIndex, WideEncoding, WideLineCol}; +use nemo::parser::span::{CharacterPosition, CharacterRange}; #[derive(Debug)] pub enum PositionConversionError { - NemoPosition(nemo::io::parser::ast::Position), + NemoPosition(CharacterPosition), LspPosition(tower_lsp::lsp_types::Position), LspLineCol(LineCol), } @@ -27,10 +29,10 @@ impl From for anyhow::Error { fn line_col_to_nemo_position( line_index: &LineIndex, line_col: LineCol, -) -> Result { - Ok(nemo::io::parser::ast::Position { +) -> Result { + Ok(CharacterPosition { line: line_col.line + 1, - column: line_col.col, + column: line_col.col + 1, offset: line_index .offset(line_col) .ok_or(PositionConversionError::LspLineCol(line_col))? @@ -42,7 +44,7 @@ fn line_col_to_nemo_position( pub fn lsp_position_to_nemo_position( line_index: &LineIndex, position: tower_lsp::lsp_types::Position, -) -> Result { +) -> Result { let line_col = line_index .to_utf8( WideEncoding::Utf16, @@ -56,7 +58,7 @@ pub fn lsp_position_to_nemo_position( line_col_to_nemo_position(line_index, line_col) } -fn nemo_position_to_line_col(position: nemo::io::parser::ast::Position) -> LineCol { +fn nemo_position_to_line_col(position: CharacterPosition) -> LineCol { LineCol { line: position.line - 1, col: position.column - 1, @@ -66,9 +68,8 @@ fn nemo_position_to_line_col(position: nemo::io::parser::ast::Position) -> LineC /// Converts a source position to a LSP position pub fn nemo_position_to_lsp_position( line_index: &LineIndex, - position: nemo::io::parser::ast::Position, + position: CharacterPosition, ) -> Result { - // TODO: Find out what UTF encoding nemo parser uses let wide_line_col = line_index .to_wide(WideEncoding::Utf16, nemo_position_to_line_col(position)) .ok_or(PositionConversionError::NemoPosition(position))?; @@ -82,7 +83,7 @@ pub fn nemo_position_to_lsp_position( /// Converts a Nemo range to a LSP range pub fn nemo_range_to_lsp_range( line_index: &LineIndex, - range: nemo::io::parser::ast::Range, + range: CharacterRange, ) -> Result { Ok(tower_lsp::lsp_types::Range { start: nemo_position_to_lsp_position(line_index, range.start)?, diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index a7d54f8d2..faae47392 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -14,16 +14,14 @@ use nemo::execution::tracing::trace::ExecutionTraceTree; use nemo::execution::tracing::trace::TraceFactHandle; use nemo::execution::ExecutionEngine; -use nemo::io::compression_format::CompressionFormat; -use nemo::io::parser::old::parse_fact; -use nemo::io::parser::old::parse_program; use nemo::io::resource_providers::{ResourceProvider, ResourceProviders}; use nemo::io::ImportManager; -use nemo::model::Atom; -use nemo::model::Fact; -use nemo::model::Identifier; -use nemo::model::PrimitiveTerm; -use nemo::model::Term; +use nemo::rule_model::components::import_export::attributes::ImportExportAttribute; +use nemo::rule_model::components::ProgramComponent; +use nemo::rule_model::components::{ + fact::Fact, import_export::compression::CompressionFormat, tag::Tag, + term::primitive::Primitive, term::Term, +}; use nemo_physical::datavalues::AnyDataValue; use nemo_physical::datavalues::DataValue; use nemo_physical::error::ExternalReadingError; @@ -38,17 +36,25 @@ use web_sys::FileReaderSync; mod language_server; +const PROGRAM_LABEL: &str = "nemo-web"; + #[wasm_bindgen] #[derive(Clone)] -pub struct NemoProgram(nemo::model::Program); +pub struct NemoProgram(nemo::rule_model::program::Program); #[derive(Error, Debug)] enum WasmOrInternalNemoError { /// Nemo-internal error #[error(transparent)] - NemoError(#[from] nemo::error::Error), + Nemo(#[from] nemo::error::Error), + #[error("ComponentParseError: {0:#?}")] + ComponentParse(nemo::rule_model::components::parse::ComponentParseError), + #[error("ParserError: {0:#?}")] + Parser(Vec), + #[error("ProgramError: {0:#?}")] + Program(Vec), #[error("Internal reflection error: {0:#?}")] - ReflectionError(JsValue), + Reflection(JsValue), } #[wasm_bindgen] @@ -69,10 +75,20 @@ impl NemoError { impl NemoProgram { #[wasm_bindgen(constructor)] pub fn new(input: &str) -> Result { - parse_program(input) - .map(NemoProgram) - .map_err(WasmOrInternalNemoError::NemoError) + nemo::parser::Parser::initialize(input, PROGRAM_LABEL.to_string()) + .parse() + .map_err(|(_, report)| WasmOrInternalNemoError::Parser(report.errors().clone())) .map_err(NemoError) + .and_then(|ast| { + nemo::rule_model::translation::ASTProgramTranslation::initialize( + input, + PROGRAM_LABEL.to_string(), + ) + .translate(&ast) + .map_err(|report| WasmOrInternalNemoError::Program(report.errors().clone())) + .map_err(NemoError) + .map(NemoProgram) + }) } /// Get all resources that are referenced in import directives of the program. @@ -82,36 +98,33 @@ impl NemoProgram { /// just make sure that things validate upon creation, and make sure that problems /// are detected early. #[wasm_bindgen(js_name = "getResourcesUsedInImports")] - pub fn resources_used_in_imports(&self) -> Result { + pub fn resources_used_in_imports(&self) -> Set { let js_set = Set::new(&JsValue::undefined()); for directive in self.0.imports() { - let resource = ImportManager::resource(directive) - .map_err(WasmOrInternalNemoError::NemoError) - .map_err(NemoError)?; - js_set.add(&JsValue::from(resource)); + if let Some(resource) = directive.attributes().get(&ImportExportAttribute::Resource) { + js_set.add(&JsValue::from(resource.to_string())); + } } - Ok(js_set) + js_set } // If there are no outputs, marks all predicates as outputs. #[wasm_bindgen(js_name = "markDefaultOutputs")] pub fn mark_default_output_predicates(&mut self) { - if self.0.output_predicates().next().is_none() { - let mut additional_outputs = Vec::new(); - for predicate in self.0.predicates() { - additional_outputs.push(predicate); + if self.0.outputs().next().is_none() { + for predicate in self.0.all_predicates() { + self.0.add_output(predicate) } - self.0.add_output_predicates(additional_outputs); } } #[wasm_bindgen(js_name = "getOutputPredicates")] pub fn output_predicates(&self) -> Array { self.0 - .output_predicates() - .map(|id| JsValue::from(id.name())) + .outputs() + .map(|o| JsValue::from(o.predicate().to_string())) .collect() } @@ -119,8 +132,8 @@ impl NemoProgram { pub fn edb_predicates(&self) -> Set { let js_set = Set::new(&JsValue::undefined()); - for identifier in self.0.edb_predicates().into_iter() { - js_set.add(&JsValue::from(identifier.name())); + for tag in self.0.import_predicates().into_iter() { + js_set.add(&JsValue::from(tag.to_string())); } js_set @@ -249,12 +262,12 @@ impl NemoEngine { // Parse JavaScript object into `HashMap` let mut resource_blobs = HashMap::new(); for key in Reflect::own_keys(&resource_blobs_js_value) - .map_err(WasmOrInternalNemoError::ReflectionError) + .map_err(WasmOrInternalNemoError::Reflection) .map_err(NemoError)? { if let Some(resource) = key.as_string() { let value = Reflect::get(&resource_blobs_js_value, &key) - .map_err(WasmOrInternalNemoError::ReflectionError) + .map_err(WasmOrInternalNemoError::Reflection) .map_err(NemoError)?; let blob: Blob = JsCast::dyn_into(value).unwrap(); @@ -267,14 +280,14 @@ impl NemoEngine { } else { ResourceProviders::from(vec![Box::new( BlobResourceProvider::new(resource_blobs) - .map_err(WasmOrInternalNemoError::ReflectionError) + .map_err(WasmOrInternalNemoError::Reflection) .map_err(NemoError)?, )]) }; let import_manager = ImportManager::new(resource_providers); let engine = ExecutionEngine::initialize(&program.0, import_manager) - .map_err(WasmOrInternalNemoError::NemoError) + .map_err(WasmOrInternalNemoError::Nemo) .map_err(NemoError)?; Ok(NemoEngine { @@ -287,7 +300,7 @@ impl NemoEngine { pub fn reason(&mut self) -> Result<(), NemoError> { self.engine .execute() - .map_err(WasmOrInternalNemoError::NemoError) + .map_err(WasmOrInternalNemoError::Nemo) .map_err(NemoError) } @@ -305,8 +318,8 @@ impl NemoEngine { pub fn result(&mut self, predicate: String) -> Result { let iter = self .engine - .predicate_rows(&Identifier::from(predicate)) - .map_err(WasmOrInternalNemoError::NemoError) + .predicate_rows(&Tag::from(predicate)) + .map_err(WasmOrInternalNemoError::Nemo) .map_err(NemoError)?; let results = NemoResults(Box::new( @@ -323,12 +336,15 @@ impl NemoEngine { predicate: String, sync_access_handle: web_sys::FileSystemSyncAccessHandle, ) -> Result<(), NemoError> { - use nemo::{ - io::ExportManager, - model::{ExportDirective, Identifier}, + use nemo::io::{ + formats::{ + dsv::{value_format::DsvValueFormats, DsvHandler}, + Direction, ImportExportHandler, ImportExportResource, + }, + ExportManager, }; - let identifier = Identifier::from(predicate.clone()); + let identifier = Tag::from(predicate.clone()); let Some(arity) = self.engine.predicate_arity(&identifier) else { return Ok(()); @@ -337,7 +353,7 @@ impl NemoEngine { let Some(record_iter) = self .engine .predicate_rows(&identifier) - .map_err(WasmOrInternalNemoError::NemoError) + .map_err(WasmOrInternalNemoError::Nemo) .map_err(NemoError)? else { return Ok(()); @@ -345,12 +361,19 @@ impl NemoEngine { let writer = SyncAccessHandleWriter(sync_access_handle); - let export_spec = ExportDirective::default(identifier); + let export_handler: Box = Box::new(DsvHandler::new( + b',', + ImportExportResource::Stdout, + DsvValueFormats::default(arity), + None, + CompressionFormat::None, + Direction::Export, + )); let export_manager: ExportManager = Default::default(); export_manager - .export_table_with_writer(&export_spec, Box::new(writer), Some(record_iter), arity) - .map_err(WasmOrInternalNemoError::NemoError) + .export_table_with_writer(Box::new(writer), &export_handler, Some(record_iter)) + .map_err(WasmOrInternalNemoError::Nemo) .map_err(NemoError) } @@ -361,21 +384,20 @@ impl NemoEngine { ) -> Result)>, NemoError> { let iter = self .engine - .predicate_rows(&Identifier::from(predicate.clone())) - .map_err(WasmOrInternalNemoError::NemoError) + .predicate_rows(&Tag::from(predicate.clone())) + .map_err(WasmOrInternalNemoError::Nemo) .map_err(NemoError)?; let terms_to_trace_opt: Option> = iter.into_iter().flatten().nth(row_index); if let Some(terms_to_trace) = terms_to_trace_opt { - let fact_to_trace: Fact = Fact(Atom::new( - Identifier::from(predicate), + let fact_to_trace: Fact = Fact::new( + &predicate, terms_to_trace .into_iter() - .map(|term| Term::Primitive(PrimitiveTerm::from(term))) - .collect(), - )); + .map(|term| Term::Primitive(Primitive::from(term))), + ); let (trace, handles) = self .engine @@ -433,8 +455,8 @@ impl NemoEngine { &mut self, fact: &str, ) -> Result)>, NemoError> { - let parsed_fact = parse_fact(fact.to_owned()) - .map_err(WasmOrInternalNemoError::NemoError) + let parsed_fact = Fact::parse(fact) + .map_err(WasmOrInternalNemoError::ComponentParse) .map_err(NemoError)?; let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![parsed_fact]); diff --git a/nemo/src/io/formats/dsv.rs b/nemo/src/io/formats/dsv.rs index 944b7aa3b..92b900c95 100644 --- a/nemo/src/io/formats/dsv.rs +++ b/nemo/src/io/formats/dsv.rs @@ -1,7 +1,7 @@ //! Handler for resources of type DSV (delimiter-separated values). pub(crate) mod reader; -pub(crate) mod value_format; +pub mod value_format; pub(crate) mod writer; use std::io::{BufRead, Write}; @@ -22,7 +22,7 @@ use super::{Direction, ImportExportHandler, ImportExportResource, TableWriter}; /// An [ImportExportHandler] for delimiter-separated values. #[derive(Debug, Clone)] -pub(crate) struct DsvHandler { +pub struct DsvHandler { /// The specific delimiter for this format. delimiter: u8, /// The resource to write to/read from. @@ -42,7 +42,7 @@ pub(crate) struct DsvHandler { impl DsvHandler { /// Create a new [DsvHandler]. - pub(crate) fn new( + pub fn new( delimiter: u8, resource: ImportExportResource, value_formats: DsvValueFormats, diff --git a/nemo/src/io/formats/dsv/value_format.rs b/nemo/src/io/formats/dsv/value_format.rs index 0f52d73c9..022c66871 100644 --- a/nemo/src/io/formats/dsv/value_format.rs +++ b/nemo/src/io/formats/dsv/value_format.rs @@ -53,7 +53,7 @@ pub(crate) enum DsvValueFormat { /// Indicate what value parser should be used for each column. #[derive(Debug, Clone)] -pub(crate) struct DsvValueFormats(Vec); +pub struct DsvValueFormats(Vec); impl DsvValueFormats { pub(crate) fn new(formats: Vec) -> Self { @@ -61,7 +61,7 @@ impl DsvValueFormats { } /// Return a list of [DsvValueFormat]s with default entries. - pub(crate) fn default(arity: usize) -> Self { + pub fn default(arity: usize) -> Self { Self((0..arity).map(|_| DsvValueFormat::Anything).collect()) } diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index b1ef7c08e..fd4a49081 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -4,7 +4,6 @@ pub mod ast; pub mod context; pub mod error; pub mod input; -pub mod lsp; pub mod span; use std::{cell::RefCell, ops::Range, rc::Rc}; @@ -86,6 +85,11 @@ impl<'a> ParserErrorReport<'a> { .finish() }) } + + /// Return raw [`ParserError`s] + pub fn errors(&self) -> &Vec { + &self.errors + } } impl<'a> Parser<'a> { diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index 00367262a..3198ede51 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -29,12 +29,12 @@ pub type ParserErrorTree<'a> = GenericErrorTree< >; /// Error while parsing a nemo program -#[derive(Debug)] -pub(crate) struct ParserError { +#[derive(Clone, Debug)] +pub struct ParserError { /// Position where the error occurred - pub(crate) position: CharacterPosition, + pub position: CharacterPosition, /// Parsing stack - pub(crate) context: Vec, + pub context: Vec, } /// Skip a statement, returning an error token. diff --git a/nemo/src/parser/lsp.rs b/nemo/src/parser/lsp.rs deleted file mode 100644 index 434a10e60..000000000 --- a/nemo/src/parser/lsp.rs +++ /dev/null @@ -1,37 +0,0 @@ -//! This module defines traits and data structures -//! relating to the language server protocol support. -//! TODO: Document this better - -use tower_lsp::lsp_types::SymbolKind; - -use super::span::CharacterRange; - -/// An LSP Identifier -#[derive(Debug)] -pub struct LSPIdentifier { - identifier: String, - scope: String, -} - -/// Information about the symbol -#[derive(Debug)] -pub struct LSPSymbolInfo { - name: String, - kind: SymbolKind, -} - -/// Trait implemented by objects that correspond to -/// that correspond to objects identified by the LSP -pub trait LSPComponent { - /// Return a an [LSPIdentifier]. - /// - /// The identifier scope will scope this identifier up to any [`AstNode`] - /// that has an identifier that has this node's identifier scope as a prefix. - /// - /// This can be used to restict rename operations to be local, e.g. for variable idenfiers inside of rules. - fn identifier(&self) -> Option; - /// Return information about this symbol, e.g. for syntax highlighting - fn symbol_info(&self) -> Option; - /// Range of the part of the node that should be renamed or [None] if the node can not be renamed - fn range_renaming(&self) -> Option; -} diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 0a4ce2761..dd9f70e38 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -70,7 +70,7 @@ impl CharacterRange { /// Maker for a region of text within a string slice #[derive(Debug, Clone, Copy)] -pub struct Span<'a>(pub(crate) LocatedSpan<&'a str>); +pub struct Span<'a>(pub LocatedSpan<&'a str>); impl<'a> From> for Span<'a> { fn from(value: LocatedSpan<&'a str>) -> Self { diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index eb3933075..69d126ec2 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -109,7 +109,8 @@ impl ProgramComponent for Fact { string, crate::parser::ast::expression::Expression::parse_complex, ASTProgramTranslation::build_head_atom - ).map(Fact::from) + ) + .map(Fact::from) } fn origin(&self) -> &Origin { diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index cf928409a..ceb93d8db 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -41,7 +41,7 @@ pub enum ComplexErrorLabelKind { } /// Label of a [ComplexError] -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct ComplexErrorLabel where Reference: Debug, @@ -55,7 +55,7 @@ where } /// Complex error that additional information to an error -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct ComplexError where Reference: Debug, @@ -107,7 +107,7 @@ where message: Message, ) -> &mut Self { self.labels.push(ComplexErrorLabel { - kind: kind, + kind, reference, message: message.to_string(), }); @@ -165,7 +165,7 @@ where } /// Error that occurs during validation of a program. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct ValidationError { /// The kind of error kind: ValidationErrorKind, @@ -213,7 +213,7 @@ impl ValidationErrorBuilder { } /// Error that occurs while translating the ast into the logical representation -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct TranslationError { /// The type of error that occurred kind: TranslationErrorKind, @@ -269,7 +269,7 @@ impl TranslationError { } /// Error that may occur while translating or validating a nemo program -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum ProgramError { /// Error occurred while translating /// the AST representation into the logical representation diff --git a/nemo/src/rule_model/error/hint.rs b/nemo/src/rule_model/error/hint.rs index 26dbc9969..8aa7705ed 100644 --- a/nemo/src/rule_model/error/hint.rs +++ b/nemo/src/rule_model/error/hint.rs @@ -6,7 +6,7 @@ pub(crate) mod similar; use enum_assoc::Assoc; /// Hints for error messages -#[derive(Assoc, Debug)] +#[derive(Assoc, Clone, Debug)] #[func(pub fn message(&self) -> String)] pub enum Hint { #[assoc(message = "unnamed universal variables may be expressed with an underscore `_`".to_string())] diff --git a/nemo/src/rule_model/error/validation_error.rs b/nemo/src/rule_model/error/validation_error.rs index 72185a28c..37337d553 100644 --- a/nemo/src/rule_model/error/validation_error.rs +++ b/nemo/src/rule_model/error/validation_error.rs @@ -7,7 +7,7 @@ use thiserror::Error; use crate::rule_model::components::term::primitive::variable::Variable; /// Types of errors that occur while building the logical rule model -#[derive(Assoc, Error, Debug)] +#[derive(Assoc, Error, Clone, Debug)] #[func(pub fn note(&self) -> Option<&'static str>)] #[func(pub fn code(&self) -> usize)] pub enum ValidationErrorKind { diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index 5210a9937..020cad927 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -137,6 +137,11 @@ impl<'a> ProgramErrorReport<'a> { }) .collect() } + + /// Return raw [`ProgramError`s] + pub fn errors(&self) -> &Vec { + &self.errors + } } impl<'a> ASTProgramTranslation<'a> { From 668991bf8336d8bce7a2ed273bb391ee1b381a4d Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 16 Sep 2024 16:57:00 +0200 Subject: [PATCH 162/214] Revert to old syntax --- nemo-physical/src/datasources/tuple_writer.rs | 2 +- nemo-physical/src/datavalues/syntax.rs | 2 +- nemo/src/rule_model/components.rs | 1 - nemo/src/syntax.rs | 14 +++++++------- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/nemo-physical/src/datasources/tuple_writer.rs b/nemo-physical/src/datasources/tuple_writer.rs index 438622f3e..451d8d029 100644 --- a/nemo-physical/src/datasources/tuple_writer.rs +++ b/nemo-physical/src/datasources/tuple_writer.rs @@ -74,7 +74,7 @@ impl<'a> TupleWriter<'a> { } #[cfg(test)] -pub mod test { +mod test { use std::cell::RefCell; use crate::{ diff --git a/nemo-physical/src/datavalues/syntax.rs b/nemo-physical/src/datavalues/syntax.rs index 2c0fdf3fd..f53062fee 100644 --- a/nemo-physical/src/datavalues/syntax.rs +++ b/nemo-physical/src/datavalues/syntax.rs @@ -19,7 +19,7 @@ pub mod map { /// Closing delimiter for tuples. pub const CLOSE: &str = "}"; /// Assignment operator for maps. - pub const KEY_VALUE_ASSIGN: &str = ":"; + pub const KEY_VALUE_ASSIGN: &str = "="; /// Separator for key-value pairs in maps pub const SEPARATOR: &str = ","; } diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index 8ac9340f6..00316ce55 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -23,7 +23,6 @@ use term::primitive::{variable::Variable, Primitive}; use super::{error::ValidationErrorBuilder, origin::Origin}; -/// TODO: Think whether this is needed /// Types of [ProgramComponent]s #[derive(Assoc, Debug, Copy, Clone, Eq, PartialEq)] #[func(pub fn name(&self) -> &'static str)] diff --git a/nemo/src/syntax.rs b/nemo/src/syntax.rs index 433e254fe..b3465fcbf 100644 --- a/nemo/src/syntax.rs +++ b/nemo/src/syntax.rs @@ -24,7 +24,7 @@ pub mod directive { pub const PREFIX: &str = "prefix"; /// The token used to separate prefix and name - pub const NAMESPACE_SEPARATOR: &str = "::"; + pub const NAMESPACE_SEPARATOR: &str = ":"; /// The token used to assign the prefix in the prefix directive. pub const PREFIX_ASSIGNMENT: &str = ":"; @@ -128,15 +128,15 @@ pub mod comment { //! This module contains the syntax definitions for comments. /// The token identifying top level documentation comments. - pub const TOP_LEVEL: &str = "//!"; + pub const TOP_LEVEL: &str = "%!"; /// The token identifying documentation comments. - pub const DOC_COMMENT: &str = "///"; + pub const DOC_COMMENT: &str = "%%%"; /// The token identifying normal comments. - pub const COMMENT: &str = "//"; - /// The token to handle four slashes as a norma comment and not a doc comment. - pub const COMMENT_LONG: &str = "////"; + pub const COMMENT: &str = "%"; + /// The token to handle four comment symbols as a normal comment and not a doc comment. + pub const COMMENT_LONG: &str = "%%%%"; /// The continuation of the comment syntax - pub(crate) const COMMENT_EXT: &str = "/"; + pub(crate) const COMMENT_EXT: &str = "%"; /// The opening token for closed comments. pub const CLOSED_OPEN: &str = "/*"; /// The closing token for closed comments. From 045b226907544505aff8d3e174263b5a9e1175ca Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 16 Sep 2024 17:28:25 +0200 Subject: [PATCH 163/214] Uncomment tracing in cli app --- nemo-cli/src/cli.rs | 4 +-- nemo-cli/src/main.rs | 72 +++++++++++++++++++++++++------------------- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/nemo-cli/src/cli.rs b/nemo-cli/src/cli.rs index 949bde197..354746553 100644 --- a/nemo-cli/src/cli.rs +++ b/nemo-cli/src/cli.rs @@ -105,9 +105,9 @@ pub(crate) struct OutputArgs { impl OutputArgs { /// Creates an output file manager with the current options - pub(crate) fn export_manager(self) -> Result { + pub(crate) fn export_manager(&self) -> Result { let export_manager = ExportManager::default() - .set_base_path(self.export_directory) + .set_base_path(self.export_directory.clone()) .overwrite(self.overwrite) .compress(self.gz); Ok(export_manager) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 1646abe7b..1dddf492c 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -19,7 +19,7 @@ pub mod cli; -use std::fs::read_to_string; +use std::fs::{read_to_string, File}; use clap::Parser; use colored::Colorize; @@ -34,10 +34,13 @@ use nemo::{ rule_model::{ self, components::{ + fact::Fact, import_export::{file_formats::FileFormat, ExportDirective}, tag::Tag, term::map::Map, + ProgramComponent, }, + error::ValidationErrorBuilder, program::Program, }, }; @@ -218,12 +221,10 @@ fn run(mut cli: CliApp) -> Result<(), Error> { override_exports(&mut program, cli.output.export_setting); log::info!("Rules parsed"); - let facts_to_be_traced = parse_trace_facts(&cli); - let export_manager = cli.output.export_manager()?; - - let import_manager = - ImportManager::new(ResourceProviders::with_base_path(cli.import_directory)); + let import_manager = ImportManager::new(ResourceProviders::with_base_path( + cli.import_directory.clone(), + )); let mut engine: DefaultExecutionEngine = ExecutionEngine::initialize(&program, import_manager)?; @@ -284,31 +285,40 @@ fn run(mut cli: CliApp) -> Result<(), Error> { print_memory_details(&engine); } - // NOTE: As a quick and dirty fix I commented this out, because `program.clone()` did not exist - // if let Some(facts) = facts_to_be_traced { - // let (trace, handles) = engine.trace(program.clone(), facts.clone()); - - // match cli.tracing.output_file { - // Some(output_file) => { - // let filename = output_file.to_string_lossy().to_string(); - // let trace_json = trace.json(&handles); - - // let mut json_file = File::create(output_file)?; - // if serde_json::to_writer(&mut json_file, &trace_json).is_err() { - // return Err(Error::SerializationError { filename }); - // } - // } - // None => { - // for (fact, handle) in facts.into_iter().zip(handles) { - // if let Some(tree) = trace.tree(handle) { - // println!("\n{}", tree.to_ascii_art()); - // } else { - // println!("\n{fact} was not derived"); - // } - // } - // } - // } - // } + let tracing_facts = parse_trace_facts(&cli)?; + if !tracing_facts.is_empty() { + let mut facts = Vec::::with_capacity(tracing_facts.len()); + for fact_string in &tracing_facts { + let fact = Fact::parse(fact_string).unwrap(); // TODO: Handle errors + let mut builder = ValidationErrorBuilder::default(); + if fact.validate(&mut builder).is_err() {} // TODO: Handle errors + + facts.push(fact); + } + + let (trace, handles) = engine.trace(program, facts); + + match cli.tracing.output_file { + Some(output_file) => { + let filename = output_file.to_string_lossy().to_string(); + let trace_json = trace.json(&handles); + + let mut json_file = File::create(output_file)?; + if serde_json::to_writer(&mut json_file, &trace_json).is_err() { + return Err(Error::SerializationError { filename }); + } + } + None => { + for (fact, handle) in tracing_facts.into_iter().zip(handles) { + if let Some(tree) = trace.tree(handle) { + println!("\n{}", tree.to_ascii_art()); + } else { + println!("\n{fact} was not derived"); + } + } + } + } + } Ok(()) } From ae98a8f516c8dd9d31c36b05ead21a9587869a6a Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 16 Sep 2024 20:25:23 +0200 Subject: [PATCH 164/214] Clean up error structs --- Cargo.lock | 1 + nemo-cli/Cargo.toml | 1 + nemo-cli/src/error.rs | 46 ++++++++ nemo-cli/src/main.rs | 109 +++++++++++------- nemo/src/error.rs | 18 --- .../execution/selection_strategy/strategy.rs | 2 +- 6 files changed, 114 insertions(+), 63 deletions(-) create mode 100644 nemo-cli/src/error.rs diff --git a/Cargo.lock b/Cargo.lock index ebaf384b9..68854a7c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1523,6 +1523,7 @@ dependencies = [ "predicates", "serde_json", "test-log", + "thiserror", ] [[package]] diff --git a/nemo-cli/Cargo.toml b/nemo-cli/Cargo.toml index aeb4a2895..3532cc0c9 100644 --- a/nemo-cli/Cargo.toml +++ b/nemo-cli/Cargo.toml @@ -20,6 +20,7 @@ clap = { version = "4.0.32", features = [ "derive", "cargo", "env" ] } colored = "2" env_logger = "*" serde_json = "1.0.108" +thiserror = "1.0" nemo = { path = "../nemo" } ariadne = "0.4.1" diff --git a/nemo-cli/src/error.rs b/nemo-cli/src/error.rs new file mode 100644 index 000000000..af5ff3e75 --- /dev/null +++ b/nemo-cli/src/error.rs @@ -0,0 +1,46 @@ +//! This module defines all the errors that can occur while executing nemo-cli. + +use thiserror::Error; + +/// Error that occur during execution of Nemo's CLI app +#[derive(Error, Debug)] +pub enum CliError { + /// Error if no input rule files are specified + #[error("no input file was given")] + NoInput, + /// Error if the user asked for an unimplemented feature + #[error("multiple rule files are currently unsupported")] + MultipleFilesNotImplemented, + /// Error while serializing data to a file + #[error("Error while serializing data to {filename}.")] + SerializationError { + /// Name of the file where data could not have been serialized into + filename: String, + }, + /// Errors on reading a file + #[error("failed to read `{filename}`: {error}")] + IoReading { + /// Contains the wrapped error + error: std::io::Error, + /// Filename which caused the error + filename: String, + }, + /// Error while parsing fact for tracing + #[error("unable to parse fact: {fact}")] + TracingInvalidFact { + /// Incorrectly formatted fact + fact: String, + }, + /// Error while parsing a rule file + #[error("unable to parse program `{filename}`")] + ProgramParsing { + /// Filename of the rule file + filename: String, + }, + /// Error resulting from io operations + #[error(transparent)] + IoError(#[from] std::io::Error), + /// Error originating from nemo + #[error(transparent)] + NemoError(#[from] nemo::error::Error), +} diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 1dddf492c..b484145e9 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -18,6 +18,7 @@ #![feature(macro_metavar_expr)] pub mod cli; +pub mod error; use std::fs::{read_to_string, File}; @@ -26,8 +27,9 @@ use colored::Colorize; use cli::{CliApp, Exporting, Reporting}; +use error::CliError; use nemo::{ - error::{Error, ReadingError}, + error::Error, execution::{DefaultExecutionEngine, ExecutionEngine}, io::{resource_providers::ResourceProviders, ImportManager}, meta::timing::{TimedCode, TimedDisplay}, @@ -175,20 +177,70 @@ fn parse_trace_facts(cli: &CliApp) -> Result, Error> { Ok(facts) } -fn run(mut cli: CliApp) -> Result<(), Error> { +/// Deal with tracing +fn handle_tracing( + cli: &CliApp, + engine: &mut DefaultExecutionEngine, + program: Program, +) -> Result<(), CliError> { + let tracing_facts = parse_trace_facts(&cli)?; + if !tracing_facts.is_empty() { + let mut facts = Vec::::with_capacity(tracing_facts.len()); + for fact_string in &tracing_facts { + let fact = Fact::parse(fact_string).map_err(|_| CliError::TracingInvalidFact { + fact: fact_string.clone(), + })?; + let mut builder = ValidationErrorBuilder::default(); + if fact.validate(&mut builder).is_err() { + return Err(CliError::TracingInvalidFact { + fact: fact_string.clone(), + }); + } + + facts.push(fact); + } + + let (trace, handles) = engine.trace(program, facts); + + match &cli.tracing.output_file { + Some(output_file) => { + let filename = output_file.to_string_lossy().to_string(); + let trace_json = trace.json(&handles); + + let mut json_file = File::create(output_file)?; + if serde_json::to_writer(&mut json_file, &trace_json).is_err() { + return Err(CliError::SerializationError { filename }); + } + } + None => { + for (fact, handle) in tracing_facts.into_iter().zip(handles) { + if let Some(tree) = trace.tree(handle) { + println!("\n{}", tree.to_ascii_art()); + } else { + println!("\n{fact} was not derived"); + } + } + } + } + } + + Ok(()) +} + +fn run(mut cli: CliApp) -> Result<(), CliError> { TimedCode::instance().start(); TimedCode::instance().sub("Reading & Preprocessing").start(); log::info!("Parsing rules ..."); if cli.rules.len() > 1 { - return Err(Error::MultipleFilesNotImplemented); + return Err(CliError::MultipleFilesNotImplemented); } - let program_file = cli.rules.pop().ok_or(Error::NoInput)?; + let program_file = cli.rules.pop().ok_or(CliError::NoInput)?; let program_filename = program_file.to_string_lossy().to_string(); let program_content = - read_to_string(program_file.clone()).map_err(|err| ReadingError::IoReading { + read_to_string(program_file.clone()).map_err(|err| CliError::IoReading { error: err, filename: program_filename.clone(), })?; @@ -202,20 +254,24 @@ fn run(mut cli: CliApp) -> Result<(), Error> { Ok(program) => program, Err((_program, report)) => { report.eprint(report.build_reports())?; - std::process::exit(1); + return Err(CliError::ProgramParsing { + filename: program_filename.clone(), + }); } }; let mut program = match rule_model::translation::ASTProgramTranslation::initialize( &program_content, - program_filename, + program_filename.clone(), ) .translate(&program_ast) { Ok(program) => program, Err(report) => { report.eprint(report.build_reports().into_iter())?; - std::process::exit(1); + return Err(CliError::ProgramParsing { + filename: program_filename, + }); } }; override_exports(&mut program, cli.output.export_setting); @@ -285,42 +341,7 @@ fn run(mut cli: CliApp) -> Result<(), Error> { print_memory_details(&engine); } - let tracing_facts = parse_trace_facts(&cli)?; - if !tracing_facts.is_empty() { - let mut facts = Vec::::with_capacity(tracing_facts.len()); - for fact_string in &tracing_facts { - let fact = Fact::parse(fact_string).unwrap(); // TODO: Handle errors - let mut builder = ValidationErrorBuilder::default(); - if fact.validate(&mut builder).is_err() {} // TODO: Handle errors - - facts.push(fact); - } - - let (trace, handles) = engine.trace(program, facts); - - match cli.tracing.output_file { - Some(output_file) => { - let filename = output_file.to_string_lossy().to_string(); - let trace_json = trace.json(&handles); - - let mut json_file = File::create(output_file)?; - if serde_json::to_writer(&mut json_file, &trace_json).is_err() { - return Err(Error::SerializationError { filename }); - } - } - None => { - for (fact, handle) in tracing_facts.into_iter().zip(handles) { - if let Some(tree) = trace.tree(handle) { - println!("\n{}", tree.to_ascii_art()); - } else { - println!("\n{fact} was not derived"); - } - } - } - } - } - - Ok(()) + handle_tracing(&cli, &mut engine, program) } fn main() { diff --git a/nemo/src/error.rs b/nemo/src/error.rs index f80a3cdd3..eaa7f989e 100644 --- a/nemo/src/error.rs +++ b/nemo/src/error.rs @@ -16,21 +16,9 @@ pub use nemo_physical::error::ReadingError; #[allow(variant_size_differences)] #[derive(Error, Debug)] pub enum Error { - /// Error which implies a needed Rollback - #[error("Rollback due to csv-error")] - Rollback(usize), /// Build selection strategy errror #[error(transparent)] SelectionStrategyError(#[from] SelectionStrategyError), - /// Error when converting floating type to integer point value - #[error("Floating type could not be converted to integer value")] - FloatingPointToInteger, - /// Error if no input rule files are specified - #[error("No inputs were specified")] - NoInput, - /// Error if the user asked for an unimplemented feature - #[error("Multiple file support is not yet implemented")] - MultipleFilesNotImplemented, /// Rule analysis errors #[error(transparent)] RuleAnalysisError(#[from] RuleAnalysisError), @@ -65,12 +53,6 @@ pub enum Error { /// The operation causing the failure operation: String, }, - /// Error while serializing data to a file - #[error("Error while serializing data to {filename}.")] - SerializationError { - /// Name of the file where data could not have been serialized into - filename: String, - }, /// Error related to handling of file formats #[error(transparent)] FileFormatError(#[from] ImportExportError), diff --git a/nemo/src/execution/selection_strategy/strategy.rs b/nemo/src/execution/selection_strategy/strategy.rs index 9ea70b5cf..b7767246d 100644 --- a/nemo/src/execution/selection_strategy/strategy.rs +++ b/nemo/src/execution/selection_strategy/strategy.rs @@ -7,7 +7,7 @@ use crate::chase_model::{analysis::program_analysis::RuleAnalysis, components::r /// Errors that can occur while creating a strategy. #[derive(Error, Debug, Copy, Clone)] pub enum SelectionStrategyError { - /// Non-Stratifyable + /// Rules of the program cannot be stratified #[error("The rules of the program are not stratified.")] NonStratifiedProgram, } From da6f438771df227f25230c3b028045428d627158 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 16 Sep 2024 20:46:25 +0200 Subject: [PATCH 165/214] Reimplement Nemo Rust API --- nemo/src/api.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/nemo/src/api.rs b/nemo/src/api.rs index 1ccaca1eb..6885c0a14 100644 --- a/nemo/src/api.rs +++ b/nemo/src/api.rs @@ -25,8 +25,10 @@ use std::{fs::read_to_string, path::PathBuf}; use crate::{ error::{Error, ReadingError}, - execution::DefaultExecutionEngine, - rule_model::components::tag::Tag, + execution::{DefaultExecutionEngine, ExecutionEngine}, + io::{resource_providers::ResourceProviders, ImportManager}, + parser::Parser, + rule_model::{components::tag::Tag, translation::ASTProgramTranslation}, }; /// Reasoning Engine exposed by the API @@ -49,11 +51,15 @@ pub fn load(file: PathBuf) -> Result { /// /// # Error /// Returns an appropriate [Error] variant on parsing and feature check issues. -pub fn load_string(_input: String) -> Result { - // let (ast, _errors) = parse_program_str(&input); - // let _program = Program::from_ast(ast); - todo!("ExecutionEngine has to use the new rule model") - // ExecutionEngine::initialize(&program, ImportManager::new(ResourceProviders::default())) +pub fn load_string(input: String) -> Result { + let program_ast = Parser::initialize(&input, String::default()) + .parse() + .map_err(|_| Error::ProgramParseError)?; + let program = ASTProgramTranslation::initialize(&input, String::default()) + .translate(&program_ast) + .map_err(|_| Error::ProgramParseError)?; + + ExecutionEngine::initialize(&program, ImportManager::new(ResourceProviders::default())) } /// Executes the reasoning process of the [Engine]. From 0d5ed9393ffacc9964476c03bb1e26787c9d0898 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 16 Sep 2024 20:46:38 +0200 Subject: [PATCH 166/214] Remove unused error structs --- nemo/src/error.rs | 14 +- nemo/src/io.rs | 1 - nemo/src/io/error.rs | 78 ---- nemo/src/io/formats/import_export.rs | 538 --------------------------- nemo/src/io/formats/json.rs | 9 - nemo/src/io/formats/rdf.rs | 140 ------- 6 files changed, 4 insertions(+), 776 deletions(-) delete mode 100644 nemo/src/io/error.rs delete mode 100644 nemo/src/io/formats/import_export.rs diff --git a/nemo/src/error.rs b/nemo/src/error.rs index eaa7f989e..1fbb99230 100644 --- a/nemo/src/error.rs +++ b/nemo/src/error.rs @@ -7,7 +7,7 @@ use thiserror::Error; use crate::{ chase_model::analysis::program_analysis::RuleAnalysisError, - execution::selection_strategy::strategy::SelectionStrategyError, io::error::ImportExportError, + execution::selection_strategy::strategy::SelectionStrategyError, }; pub use nemo_physical::error::ReadingError; @@ -22,6 +22,9 @@ pub enum Error { /// Rule analysis errors #[error(transparent)] RuleAnalysisError(#[from] RuleAnalysisError), + /// Error occurred during parsing + #[error("error while parsing program")] + ProgramParseError, /// IO Error #[error(transparent)] IO(#[from] std::io::Error), @@ -47,15 +50,6 @@ pub enum Error { /// Error in the physical layer #[error(transparent)] PhysicalError(#[from] nemo_physical::error::Error), - /// Error when trying to lookup unary operations - #[error("The unary operation {operation} is unknown.")] - UnknownUnaryOpertation { - /// The operation causing the failure - operation: String, - }, - /// Error related to handling of file formats - #[error(transparent)] - FileFormatError(#[from] ImportExportError), /// Error related to the creation of data values #[error(transparent)] DataValueCreationError(#[from] DataValueCreationError), diff --git a/nemo/src/io.rs b/nemo/src/io.rs index ec43e5561..05842779d 100644 --- a/nemo/src/io.rs +++ b/nemo/src/io.rs @@ -3,7 +3,6 @@ //! This module acts as a mediation layer between the logical and physical layer and offers traits to allow both layers an abstract view on the io process. pub mod compression_format; -pub mod error; pub mod export_manager; pub mod formats; pub mod import_manager; diff --git a/nemo/src/io/error.rs b/nemo/src/io/error.rs deleted file mode 100644 index e9b839613..000000000 --- a/nemo/src/io/error.rs +++ /dev/null @@ -1,78 +0,0 @@ -//! This module contains errors relating to io. - -use std::path::PathBuf; - -use nemo_physical::datavalues::AnyDataValue; -use thiserror::Error; - -use crate::rule_model::components::import_export::file_formats::FileFormat; - -/// Errors related to the creation and usage of [ImportExportHandler]s. -#[derive(Debug, Error)] -pub enum ImportExportError { - /// Format is not supported for reading. - #[error(r#"Format "{0}" cannot be read"#)] - UnsupportedRead(FileFormat), - /// Format is not supported for writing. - #[error(r#"Format "{0}" cannot be written"#)] - UnsupportedWrite(FileFormat), - /// A required attribute is missing. - #[error(r#"Missing required attribute "{0}""#)] - MissingAttribute(String), - /// A given attribute is not valid for the format. - #[error(r#"Unknown attribute "{0}""#)] - UnknownAttribute(String), - /// File format name is not known. - #[error(r#"Unknown file format "{0}""#)] - UnknownFileFormat(String), - /// Attribute value is invalid. - #[error(r#"Invalid attribute value "{value}" for attribute "{attribute}": {description}"#)] - InvalidAttributeValue { - /// The given value. - value: AnyDataValue, - /// The attribute the value was given for. - attribute: AnyDataValue, - /// A description of why the value was invalid. - description: String, - }, - /// Value format is unsupported for this format. - #[error(r#"Unsupported value format "{value_format}" for format {format}"#)] - InvalidValueFormat { - /// The given value format. - value_format: String, - /// The file format. - format: FileFormat, - }, - /// Arity is unsupported for this format. - #[error(r#"import produces tuples of arity {arity}, but it should be arity {expected}"#)] - InvalidArity { - /// The given arity. - arity: usize, - /// The expected arity. - expected: usize, - }, - /// Arity is unsupported for this format, exact value is required. - #[error(r#"unsupported arity "{arity}" for format {format}, must be {required}"#)] - InvalidArityExact { - /// The given arity. - arity: usize, - /// The required arity. - required: usize, - /// The file format. - format: FileFormat, - }, - /// Format does not support complex types - #[error(r"Format {format} does not support complex types")] - UnsupportedComplexTypes { - /// The file format. - format: FileFormat, - }, - /// File could not be read - #[error(r#"File "{path}" could not be read."#)] - IoError { - /// Contains the wrapped error - error: std::io::Error, - /// Path that could not be read - path: PathBuf, - }, -} diff --git a/nemo/src/io/formats/import_export.rs b/nemo/src/io/formats/import_export.rs deleted file mode 100644 index 11234a7d3..000000000 --- a/nemo/src/io/formats/import_export.rs +++ /dev/null @@ -1,538 +0,0 @@ -// //! Definitions for the [ImportExportHandler] trait that provides the main -// //! handle for supported file formats, and of [ImportExportHandlers] as a -// //! main entry point for obtaining such handlers. - -// use std::{ -// collections::HashSet, -// io::{BufRead, Write}, -// path::PathBuf, -// }; - -// use dyn_clone::DynClone; -// use nemo_physical::{ -// datasources::table_providers::TableProvider, -// datavalues::{AnyDataValue, DataValue, MapDataValue, TupleDataValue, ValueDomain}, -// resource::Resource, -// }; - -// use crate::{ -// error::Error, -// io::compression_format::CompressionFormat, -// model::{ -// ExportDirective, FileFormat, ImportDirective, ImportExportDirective, -// PARAMETER_NAME_COMPRESSION, PARAMETER_NAME_FORMAT, PARAMETER_NAME_RESOURCE, -// VALUE_COMPRESSION_GZIP, VALUE_COMPRESSION_NONE, VALUE_FORMAT_ANY, VALUE_FORMAT_SKIP, -// }, -// }; - -// use thiserror::Error; - -// use super::{ -// json::JsonHandler, -// types::{Direction, TableWriter}, -// DsvHandler, RdfHandler, -// }; - -// /// Struct with static methods to manage the conversion of [ImportExportDirective]s to -// /// [ImportExportHandler]s. -// pub(crate) struct ImportExportHandlers; - -// impl ImportExportHandlers { -// /// Obtain an [ImportExportHandler] for the given [ImportDirective], and return -// /// an error if the given attributes are not suitable for the chosen format. -// pub(crate) fn import_handler( -// directive: &ImportDirective, -// ) -> Result, ImportExportError> { -// Self::handler(&directive.0, Direction::Import) -// } - -// /// Obtain an [ImportExportHandler] for the given [ExportDirective], and return -// /// an error if the given attributes are not suitable for the chosen format. -// pub(crate) fn export_handler( -// directive: &ExportDirective, -// ) -> Result, ImportExportError> { -// Self::handler(&directive.0, Direction::Export) -// } - -// /// Obtain an [ImportExportHandler] for the given [ImportExportDirective], and return -// /// an error if the given attributes are not suitable for the chosen format. -// fn handler( -// directive: &ImportExportDirective, -// direction: Direction, -// ) -> Result, ImportExportError> { -// match directive.format { -// FileFormat::CSV => DsvHandler::try_new_csv(&directive.attributes, direction), -// FileFormat::DSV => DsvHandler::try_new_dsv(&directive.attributes, direction), -// FileFormat::TSV => DsvHandler::try_new_tsv(&directive.attributes, direction), -// FileFormat::JSON => { -// if direction == Direction::Export { -// Err(ImportExportError::UnsupportedWrite(FileFormat::JSON)) -// } else { -// JsonHandler::try_new_import(&directive.attributes) -// } -// } -// FileFormat::RDF(variant) => { -// RdfHandler::try_new(variant, &directive.attributes, direction) -// } -// } -// } - -// /// Check if all given attributes are among the valid attributes, -// /// and return an error otherwise. -// pub(super) fn check_attributes( -// attributes: &MapDataValue, -// valid_attributes: &[&str], -// ) -> Result<(), ImportExportError> { -// let given: HashSet = attributes -// .map_keys() -// .expect("map values always have keys") -// .cloned() -// .collect(); -// let valid: HashSet = valid_attributes -// .iter() -// .map(|att| AnyDataValue::new_iri(att.to_string())) -// .collect(); - -// if let Some(unknown) = given.difference(&valid).next() { -// return Err(ImportExportError::UnknownAttribute(unknown.to_string())); -// } -// Ok(()) -// } - -// /// Extract the resource from the given attributes. This can be [ImportExportResource::Unspecified] -// /// for export (where we can use default names). If the value is invalid or missing for import, an -// /// error is returned. -// pub(super) fn extract_resource( -// attributes: &MapDataValue, -// direction: Direction, -// ) -> Result { -// let resource: Option = -// Self::extract_string_or_iri(attributes, PARAMETER_NAME_RESOURCE, true)?; - -// if let Some(string) = resource { -// if string.is_empty() { -// Ok(ImportExportResource::Stdout) -// } else { -// Ok(ImportExportResource::Resource(string)) -// } -// } else { -// if direction == Direction::Import { -// return Err(ImportExportError::MissingAttribute( -// PARAMETER_NAME_RESOURCE.to_string(), -// )); -// } -// Ok(ImportExportResource::Unspecified) -// } -// } - -// /// Extract the compression format from the given attributes, and possibly resource. -// /// If a resource is given, then the resource name without the compression-specific -// /// extension is also returned. -// /// -// /// An error is returned if an unknown compression format was explicitly specified, -// /// or if the compression format of the resource is not in agreement with an explicitly -// /// stated one. -// pub(super) fn extract_compression_format( -// attributes: &MapDataValue, -// resource: &ImportExportResource, -// ) -> Result<(Option, Option), ImportExportError> { -// let cf_name = Self::extract_string_or_iri(attributes, PARAMETER_NAME_COMPRESSION, true) -// .expect("no errors with allow missing"); - -// let stated_compression_format: Option; -// if let Some(cf_name) = &cf_name { -// match cf_name.as_str() { -// VALUE_COMPRESSION_NONE => stated_compression_format = Some(CompressionFormat::None), -// VALUE_COMPRESSION_GZIP => stated_compression_format = Some(CompressionFormat::Gzip), -// _ => { -// return Err(ImportExportError::invalid_att_value_error( -// PARAMETER_NAME_COMPRESSION, -// AnyDataValue::new_plain_string(cf_name.to_owned()), -// format!( -// "unknown compression format, supported formats: {:?}", -// [VALUE_COMPRESSION_GZIP, VALUE_COMPRESSION_NONE] -// ) -// .as_str(), -// )); -// } -// } -// } else { -// stated_compression_format = None; -// } - -// let resource_compression_format: Option; -// let inner_resource: Option; -// if let ImportExportResource::Resource(res) = resource { -// let (rcf, inner_res) = CompressionFormat::from_resource(res); -// resource_compression_format = Some(rcf); -// inner_resource = Some(inner_res); -// } else { -// resource_compression_format = None; -// inner_resource = None; -// } - -// match (stated_compression_format, resource_compression_format) { -// (Some(scf), None) => Ok((Some(scf), inner_resource)), -// (None, Some(rcf)) => Ok((Some(rcf), inner_resource)), -// (Some(scf), Some(rcf)) => { -// if scf == rcf { -// Ok((Some(scf), inner_resource)) -// } else { -// Err(ImportExportError::invalid_att_value_error( -// PARAMETER_NAME_COMPRESSION, -// AnyDataValue::new_plain_string( -// cf_name.expect("given if stated compression is known"), -// ), -// "compression method should match resource extension" -// .to_string() -// .as_str(), -// )) -// } -// } -// (None, None) => Ok((None, inner_resource)), -// } -// } - -// /// Extract a string value for the given attribute name. Returns an error if the -// /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). -// /// It can be specified whether it should be allowed that the atttribute is not set at all (and -// /// `None` would then be returned). If given, the value must always be a string, however. -// pub(super) fn extract_string( -// attributes: &MapDataValue, -// attribute_name: &str, -// allow_missing: bool, -// ) -> Result, ImportExportError> { -// if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { -// match c.value_domain() { -// ValueDomain::PlainString => Ok(Some(c.to_plain_string_unchecked())), -// _ => Err(ImportExportError::invalid_att_value_error( -// attribute_name, -// c.clone(), -// "expecting string value", -// )), -// } -// } else { -// Ok(None) -// } -// } - -// /// Extract a string or IRI value for the given attribute name. Returns an error if the -// /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). -// /// It can be specified whether it should be allowed that the atttribute is not set at all (and -// /// `None` would then be returned). If given, the value must always be a string or an IRI, however. -// /// This method is used for parameters that are actually strings in nature, but where we conveniently want to -// /// allow the user to omit the quotes. -// pub(super) fn extract_string_or_iri( -// attributes: &MapDataValue, -// attribute_name: &str, -// allow_missing: bool, -// ) -> Result, ImportExportError> { -// if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { -// if let Some(s) = Self::string_from_datavalue(&c) { -// Ok(Some(s)) -// } else { -// Err(ImportExportError::invalid_att_value_error( -// attribute_name, -// c.clone(), -// "expecting string or IRI value", -// )) -// } -// } else { -// Ok(None) -// } -// } - -// /// Extract an unsigned integer value for the given attribute name. Returns an error if the -// /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). -// /// It can be specified whether it should be allowed that the attribute is not set at all (and -// /// `None` would then be returned). If given, the value must always be an integer, however. -// pub(super) fn extract_unsigned_integer( -// attributes: &MapDataValue, -// attribute_name: &str, -// allow_missing: bool, -// ) -> Result, ImportExportError> { -// if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { -// if c.fits_into_u64() { -// Ok(Some(c.to_u64_unchecked())) -// } else { -// Err(ImportExportError::invalid_att_value_error( -// attribute_name, -// c.clone(), -// "expecting unsigned integer value that fits into 64bits", -// )) -// } -// } else { -// Ok(None) -// } -// } - -// /// Extract an IRI value string for the given attribute name. Returns an error if the -// /// value is mistyped ([ImportExportError::InvalidAttributeValue]) or missing ([ImportExportError::MissingAttribute]). -// /// It can be specified whether it should be allowed that the atttribute is not set at all (and -// /// `None` would then be returned). If given, the value must always be an IRI, however. -// pub(super) fn extract_iri( -// attributes: &MapDataValue, -// attribute_name: &str, -// allow_missing: bool, -// ) -> Result, ImportExportError> { -// if let Some(c) = Self::extract_att_value(attributes, attribute_name, allow_missing)? { -// match c.value_domain() { -// ValueDomain::Iri => Ok(Some(c.to_iri_unchecked())), -// _ => Err(ImportExportError::invalid_att_value_error( -// attribute_name, -// c.clone(), -// "expecting IRI value", -// )), -// } -// } else { -// Ok(None) -// } -// } - -// /// Extract a value for the given attribute name. The boolean flag constrols -// /// if an error should be generated if the attribute is missing, or if `Ok(None)` -// /// should be returned in this case. -// pub(super) fn extract_att_value( -// attributes: &MapDataValue, -// attribute_name: &str, -// allow_missing: bool, -// ) -> Result, ImportExportError> { -// if let Some(c) = attributes.map_element(&AnyDataValue::new_iri(attribute_name.to_string())) -// { -// Ok(Some(c.clone())) -// } else if allow_missing { -// return Ok(None); -// } else { -// return Err(ImportExportError::MissingAttribute( -// attribute_name.to_string(), -// )); -// } -// } - -// /// Extract the list of strings that specify value formats. If no list is given, `None` -// /// is returned. Errors may occur if the attribute is given but the value is not a list of strings, -// /// or if all values are skipped. -// /// -// /// See [ImportExportHandlers::extract_value_format_strings_and_arity] for a method that also -// /// checks the arity information, and uses it to make default formats if needed. -// pub(super) fn extract_value_format_strings( -// attributes: &MapDataValue, -// ) -> Result>, ImportExportError> { -// let value_format_strings: Option>; -// if let Some(c) = Self::extract_att_value(attributes, PARAMETER_NAME_FORMAT, true)? { -// let mut value_formats: Vec = Vec::new(); -// if c.value_domain() == ValueDomain::Tuple { -// for i in 0..c.len_unchecked() { -// let v = c.tuple_element_unchecked(i); -// if let Some(s) = Self::string_from_datavalue(v) { -// value_formats.push(s); -// } else { -// return Err(ImportExportError::invalid_att_value_error( -// PARAMETER_NAME_FORMAT, -// v.clone(), -// "list must contain strings only", -// )); -// } -// } -// } else { -// return Err(ImportExportError::invalid_att_value_error( -// PARAMETER_NAME_FORMAT, -// c.clone(), -// "expecting list of value formats", -// )); -// } -// value_format_strings = Some(value_formats); -// } else { -// value_format_strings = None; -// } - -// // Check if any non-skipped value is contained -// if let Some(true) = value_format_strings -// .as_ref() -// .map(|v| v.iter().all(|fmt| *fmt == VALUE_FORMAT_SKIP)) -// { -// return Err(ImportExportError::invalid_att_value_error( -// PARAMETER_NAME_FORMAT, -// Self::datavalue_from_format_strings(&value_format_strings.expect("checked above")), -// "cannot import/export zero-ary data", -// )); -// } - -// Ok(value_format_strings) -// } - -// /// Returns a list of string names of value formats that can be used as a -// /// default if only the arity of a predicate is known. -// pub(super) fn default_value_format_strings(arity: usize) -> Vec { -// vec![VALUE_FORMAT_ANY; arity] -// .into_iter() -// .map(|s| s.to_string()) -// .collect() -// } - -// /// Get a list of value format strings while taking the expected arity of data in -// /// the file into account. -// /// -// /// Formats will first be extracted from the attributes. For import, the total number -// /// of formats must match the expected file arity. For export, the total number of -// /// non-skip formats must match the expected file arity. -// /// -// /// If no formats are given, we assume that "skip" is not used, so file arity = -// /// predicate arity = format number, and we can make a list of default value formats. -// /// `None` is only returned if the file arity was not given (in which case this function -// /// is the same as [ImportExportHandlers::extract_value_format_strings]). -// /// -// /// The given `file_arity` is not checked: callers are expected to have ensured that it -// /// is a non-zero usize that fits into i64. -// pub(super) fn extract_value_format_strings_with_file_arity( -// attributes: &MapDataValue, -// file_arity: Option, -// direction: Direction, -// ) -> Result>, ImportExportError> { -// let value_format_strings: Option> = -// Self::extract_value_format_strings(attributes)?; - -// if let Some(file_arity) = file_arity { -// if let Some(ref vfs) = value_format_strings { -// let declared_file_arity = match direction { -// Direction::Import => vfs.len(), -// Direction::Export => vfs.iter().fold(0, |acc: usize, fmt| { -// // Only count formats other than VALUE_FORMAT_SKIP: -// if *fmt == VALUE_FORMAT_SKIP { -// acc -// } else { -// acc + 1 -// } -// }), -// }; - -// // Check if arity is consistent with given value formats. -// if file_arity != declared_file_arity { -// return Err(ImportExportError::invalid_att_value_error( -// PARAMETER_NAME_FORMAT, -// Self::datavalue_from_format_strings(vfs), -// format!( -// "value format declaration must be compatible with expected arity {} of tuples in file", -// file_arity -// ) -// .as_str(), -// )); -// } - -// Ok(value_format_strings) -// } else { -// Ok(Some(Self::default_value_format_strings(file_arity))) -// } -// } else { -// Ok(value_format_strings) -// } -// } - -// /// Turn a list of formats into a data value for error reporting. -// fn datavalue_from_format_strings(format_strings: &[String]) -> AnyDataValue { -// TupleDataValue::from_iter( -// format_strings -// .iter() -// .map(|format| AnyDataValue::new_plain_string(format.to_owned())) -// .collect::>(), -// ) -// .into() -// } - -// /// Extract a string from an [AnyDataValue] that is a plain string -// /// or IRI. This is in particularly used to allow users to omit the quotes -// /// around simple attribute values. -// fn string_from_datavalue(v: &AnyDataValue) -> Option { -// match v.value_domain() { -// ValueDomain::PlainString => Some(v.to_plain_string_unchecked()), -// ValueDomain::Iri => Some(v.to_iri_unchecked()), -// _ => None, -// } -// } -// } - -// /// Errors related to the creation and usage of [ImportExportHandler]s. -// #[derive(Debug, Error)] -// pub enum ImportExportError { -// /// Format is not supported for reading. -// #[error(r#"Format "{0}" cannot be read"#)] -// UnsupportedRead(FileFormat), -// /// Format is not supported for writing. -// #[error(r#"Format "{0}" cannot be written"#)] -// UnsupportedWrite(FileFormat), -// /// A required attribute is missing. -// #[error(r#"Missing required attribute "{0}""#)] -// MissingAttribute(String), -// /// A given attribute is not valid for the format. -// #[error(r#"Unknown attribute "{0}""#)] -// UnknownAttribute(String), -// /// File format name is not known. -// #[error(r#"Unknown file format "{0}""#)] -// UnknownFileFormat(String), -// /// Attribute value is invalid. -// #[error(r#"Invalid attribute value "{value}" for attribute "{attribute}": {description}"#)] -// InvalidAttributeValue { -// /// The given value. -// value: AnyDataValue, -// /// The attribute the value was given for. -// attribute: AnyDataValue, -// /// A description of why the value was invalid. -// description: String, -// }, -// /// Value format is unsupported for this format. -// #[error(r#"Unsupported value format "{value_format}" for format {format}"#)] -// InvalidValueFormat { -// /// The given value format. -// value_format: String, -// /// The file format. -// format: FileFormat, -// }, -// /// Arity is unsupported for this format. -// #[error(r#"import produces tuples of arity {arity}, but it should be arity {expected}"#)] -// InvalidArity { -// /// The given arity. -// arity: usize, -// /// The expected arity. -// expected: usize, -// }, -// /// Arity is unsupported for this format, exact value is required. -// #[error(r#"unsupported arity "{arity}" for format {format}, must be {required}"#)] -// InvalidArityExact { -// /// The given arity. -// arity: usize, -// /// The required arity. -// required: usize, -// /// The file format. -// format: FileFormat, -// }, -// /// Format does not support complex types -// #[error(r"Format {format} does not support complex types")] -// UnsupportedComplexTypes { -// /// The file format. -// format: FileFormat, -// }, -// /// File could not be read -// #[error(r#"File "{path}" could not be read."#)] -// IoError { -// /// Contains the wrapped error -// error: std::io::Error, -// /// Path that could not be read -// path: PathBuf, -// }, -// } - -// impl ImportExportError { -// /// Convenience method to create ImportExportError::InvalidAttributeValue from static strings, which is a common -// /// task in handlers. -// pub(crate) fn invalid_att_value_error( -// attribute: &str, -// value: AnyDataValue, -// reason: &str, -// ) -> ImportExportError { -// ImportExportError::InvalidAttributeValue { -// attribute: AnyDataValue::new_iri(attribute.to_string()), -// value: value.clone(), -// description: reason.to_string(), -// } -// } -// } diff --git a/nemo/src/io/formats/json.rs b/nemo/src/io/formats/json.rs index 3693e5f1e..def102846 100644 --- a/nemo/src/io/formats/json.rs +++ b/nemo/src/io/formats/json.rs @@ -22,15 +22,6 @@ impl JsonHandler { pub fn new(resource: ImportExportResource) -> Self { Self { resource } } - - // pub(crate) fn try_new_import( - // attributes: &MapDataValue, - // ) -> Result, ImportExportError> { - // // todo: check attributes - // let resource = ImportExportHandler::extract_resource(attributes, Direction::Import)?; - - // Ok(Box::new(JsonHandler { resource })) - // } } impl ImportExportHandler for JsonHandler { diff --git a/nemo/src/io/formats/rdf.rs b/nemo/src/io/formats/rdf.rs index eb550a4f7..860f0e28a 100644 --- a/nemo/src/io/formats/rdf.rs +++ b/nemo/src/io/formats/rdf.rs @@ -96,146 +96,6 @@ impl RdfHandler { _direction, } } - - // /// Construct an RDF handler of the given variant. - // pub(crate) fn try_new( - // variant: RdfVariant, - // attributes: &MapDataValue, - // direction: Direction, - // ) -> Result, ImportExportError> { - // // Basic checks for unsupported attributes: - // ImportExportHandlers::check_attributes( - // attributes, - // &[ - // PARAMETER_NAME_RESOURCE, - // PARAMETER_NAME_BASE, - // PARAMETER_NAME_COMPRESSION, - // PARAMETER_NAME_FORMAT, - // PARAMETER_NAME_LIMIT, - // ], - // )?; - - // let resource = ImportExportHandlers::extract_resource(attributes, direction)?; - - // let base: Option>; - // if let Some(base_string) = - // ImportExportHandlers::extract_iri(attributes, PARAMETER_NAME_BASE, true)? - // { - // if let Ok(b) = Iri::parse(base_string.clone()) { - // // TODO: Export should not accept base as parameter, since we cannot use it - // base = Some(b); - // } else { - // return Err(ImportExportError::invalid_att_value_error( - // PARAMETER_NAME_BASE, - // AnyDataValue::new_iri(base_string.clone()), - // "must be a valid IRI", - // )); - // } - // } else { - // base = None; - // } - - // let (compression_format, inner_resource) = - // ImportExportHandlers::extract_compression_format(attributes, &resource)?; - - // let refined_variant: RdfVariant; - // if variant == RdfVariant::Unspecified { - // if let Some(ref res) = inner_resource { - // refined_variant = Self::rdf_variant_from_resource(res); - // } else { - // // We can still guess a default format based on the arity - // // information provided on import/export: - // refined_variant = RdfVariant::Unspecified; - // } - // } else { - // refined_variant = variant; - // } - - // let value_formats = Self::extract_value_formats(attributes, refined_variant, direction)?; - // let limit = - // ImportExportHandlers::extract_unsigned_integer(attributes, PARAMETER_NAME_LIMIT, true)?; - - // Ok(Box::new(Self { - // resource, - // base, - // variant: refined_variant, - // value_formats, - // limit, - // compression_format, - // direction, - // })) - // } - - // fn extract_value_formats( - // attributes: &MapDataValue, - // variant: RdfVariant, - // direction: Direction, - // ) -> Result>, ImportExportError> { - // // Input arity for known formats: - // let arity = match variant { - // RdfVariant::Unspecified => None, - // RdfVariant::NTriples | RdfVariant::Turtle | RdfVariant::RDFXML => Some(3), - // RdfVariant::NQuads | RdfVariant::TriG => Some(4), - // }; - - // let value_format_strings = - // ImportExportHandlers::extract_value_format_strings_with_file_arity( - // attributes, arity, direction, - // )?; - - // if let Some(format_strings) = value_format_strings { - // Ok(Some(Self::formats_from_strings(format_strings)?)) - // } else { - // Ok(None) - // } - // } - - // fn formats_from_strings( - // value_format_strings: Vec, - // ) -> Result, ImportExportError> { - // let mut value_formats = Vec::with_capacity(value_format_strings.len()); - // for s in value_format_strings { - // value_formats.push(RdfValueFormat::from_string(s.as_str())?); - // } - // Ok(value_formats) - // } - - // /// Extract [RdfVariant] from file extension. The resource should already - // /// have been stripped of any compression-related extensions. - // fn rdf_variant_from_resource(resource: &Resource) -> RdfVariant { - // match resource { - // resource if resource.ends_with(".ttl") => RdfVariant::Turtle, - // resource if resource.ends_with(".rdf") => RdfVariant::RDFXML, - // resource if resource.ends_with(".nt") => RdfVariant::NTriples, - // resource if resource.ends_with(".nq") => RdfVariant::NQuads, - // resource if resource.ends_with(".trig") => RdfVariant::TriG, - // _ => RdfVariant::Unspecified, - // } - // } - - // /// Returns the set RDF variant, or finds a default value based on the - // /// required arity. An error occurs if the arity is not compatible with - // /// any variant of RDF. - // fn rdf_variant_or_default(&self, arity: usize) -> Result { - // if self.variant == RdfVariant::Unspecified { - // match arity { - // 3 => Ok(RdfVariant::NTriples), - // 4 => Ok(RdfVariant::NQuads), - // _ => Err(ImportExportError::InvalidArity { arity, expected: 3 }), - // } - // } else { - // Ok(self.variant) - // } - // } - - // /// Returns the set value formats, or finds a default value based on the - // /// required arity. - // fn value_formats_or_default(&self, arity: usize) -> Vec { - // self.value_formats.clone().unwrap_or_else(|| { - // Self::formats_from_strings(ImportExportHandlers::default_value_format_strings(arity)) - // .unwrap() - // }) - // } } impl ImportExportHandler for RdfHandler { From 9345a6ae2eb08734268d3f3592b628ef49707905 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 16 Sep 2024 21:43:21 +0200 Subject: [PATCH 167/214] Fix compilation errors in python api --- nemo-python/src/lib.rs | 108 ++++++++++++++---------- nemo/src/chase_model.rs | 4 + nemo/src/chase_model/components.rs | 2 +- nemo/src/chase_model/components/atom.rs | 2 +- nemo/src/rule_model/term_map.rs | 32 ++++++- 5 files changed, 99 insertions(+), 49 deletions(-) diff --git a/nemo-python/src/lib.rs b/nemo-python/src/lib.rs index 66fa7ddb2..e57392db8 100644 --- a/nemo-python/src/lib.rs +++ b/nemo-python/src/lib.rs @@ -1,20 +1,19 @@ // FIXME: remove this once the pyo3 macros don't trigger this #![allow(non_local_definitions)] -use std::{ - collections::{HashMap, HashSet}, - fs::read_to_string, - time::Duration, -}; +use std::{collections::HashSet, fs::read_to_string, time::Duration}; use nemo::{ + chase_model::ChaseAtom, datavalues::{AnyDataValue, DataValue}, + error::Error, execution::{tracing::trace::ExecutionTraceTree, ExecutionEngine}, - io::{lexer::Error, resource_providers::ResourceProviders, ExportManager, ImportManager}, + io::{resource_providers::ResourceProviders, ExportManager, ImportManager}, meta::timing::TimedCode, - model::{ - chase_model::{ChaseAtom, ChaseFact}, - ExportDirective, Identifier, Variable, + rule_model::{ + components::{fact::Fact, tag::Tag, term::primitive::Primitive, ProgramComponent}, + error::ValidationErrorBuilder, + term_map::PrimitiveTermMap, }, }; @@ -48,7 +47,7 @@ impl PythonResult for (T, Vec) { #[pyclass] #[derive(Clone)] -struct NemoProgram(nemo::model::Program); +struct NemoProgram(nemo::rule_model::program::Program); #[pyfunction] fn load_file(file: String) -> PyResult { @@ -58,24 +57,33 @@ fn load_file(file: String) -> PyResult { #[pyfunction] fn load_string(rules: String) -> PyResult { - // let ast = nemo::io::parser::parse_program_str(&rules).py_res()?; - // let program = nemo::rule_model::program::Program::from_ast(ast); - // let program = todo!("update NemoProgram to use the new rule model"); - // Ok(NemoProgram(program)) - todo!() + let program_ast = nemo::parser::Parser::initialize(&rules, String::default()) + .parse() + .map_err(|_| Error::ProgramParseError) + .py_res()?; + let program = + nemo::rule_model::translation::ASTProgramTranslation::initialize(&rules, String::default()) + .translate(&program_ast) + .map_err(|_| Error::ProgramParseError) + .py_res()?; + + Ok(NemoProgram(program)) } #[pymethods] impl NemoProgram { fn output_predicates(&self) -> Vec { - self.0.output_predicates().map(|id| id.name()).collect() + self.0 + .outputs() + .map(|output| output.predicate().to_string()) + .collect() } fn edb_predicates(&self) -> HashSet { self.0 - .edb_predicates() + .import_predicates() .into_iter() - .map(|id| id.name()) + .map(|predicate| predicate.to_string()) .collect() } } @@ -88,7 +96,7 @@ impl NemoOutputManager { #[new] #[pyo3(signature=(path, overwrite=false, gzip=false))] fn py_new(path: String, overwrite: bool, gzip: bool) -> PyResult { - let export_manager = ExportManager::new() + let export_manager = ExportManager::default() .set_base_path(path.into()) .overwrite(overwrite) .compress(gzip); @@ -199,7 +207,7 @@ fn datavalue_to_python(py: Python<'_>, v: AnyDataValue) -> PyResult } #[pyclass] -struct NemoFact(ChaseFact); +struct NemoFact(nemo::chase_model::GroundAtom); #[pymethods] impl NemoFact { @@ -210,8 +218,7 @@ impl NemoFact { fn constants<'a>(&self, py: Python<'a>) -> PyResult>> { self.0 .terms() - .iter() - .map(|c| datavalue_to_python(py, c.clone())) + .map(|c| datavalue_to_python(py, c.value())) .collect() } @@ -262,16 +269,15 @@ impl NemoTrace { } } -fn assignement_to_dict( - assignment: &HashMap, - py: Python, -) -> PyResult { +fn assignement_to_dict(assignment: &PrimitiveTermMap, py: Python) -> PyResult { let dict = PyDict::new_bound(py); - for (variable, value) in assignment { - dict.set_item( - variable.to_string(), - datavalue_to_python(py, value.clone())?, - )?; + for (variable, term) in assignment { + if let Primitive::Ground(ground) = term { + dict.set_item( + variable.to_string(), + datavalue_to_python(py, ground.value())?, + )?; + } } Ok(dict.to_object(py)) @@ -407,12 +413,14 @@ impl NemoEngine { Ok(()) } - fn trace(&mut self, fact: String) -> Option { - let (ast, _errors) = nemo::io::parser::parse_fact_str(&fact); /*.py_res().ok()?;*/ - // TODO: Report errors... - let parsed_fact = nemo::rule_model::components::fact::Fact::from_ast(ast); - let parsed_fact = todo!(); - let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![parsed_fact]); + fn trace(&mut self, fact_string: String) -> Option { + let fact = Fact::parse(&fact_string).ok()?; + let mut builder = ValidationErrorBuilder::default(); + if fact.validate(&mut builder).is_err() { + return None; + } + + let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![fact]); let handle = *handles .first() .expect("Function trace always returns a handle for each input fact"); @@ -432,9 +440,20 @@ impl NemoEngine { predicate: String, output_manager: &Bound, ) -> PyResult<()> { - let identifier = Identifier::from(predicate); + let tag = Tag::new(predicate); + + let Some(_arity) = self.engine.predicate_arity(&tag) else { + return Ok(()); + }; - let Some(arity) = self.engine.predicate_arity(&identifier) else { + let export_handler = if let Some((_, handler)) = self + .engine + .exports() + .iter() + .find(|(predicate, _)| *predicate == tag) + { + handler.clone() + } else { return Ok(()); }; @@ -442,9 +461,9 @@ impl NemoEngine { .borrow() .0 .export_table( - &ExportDirective::default(identifier.clone()), - self.engine.predicate_rows(&identifier).py_res()?, - arity, + &tag, + &export_handler, + self.engine.predicate_rows(&tag).py_res()?, ) .py_res()?; @@ -452,10 +471,7 @@ impl NemoEngine { } fn result(mut slf: PyRefMut<'_, Self>, predicate: String) -> PyResult> { - let iter = slf - .engine - .predicate_rows(&Identifier::from(predicate)) - .py_res()?; + let iter = slf.engine.predicate_rows(&Tag::new(predicate)).py_res()?; let results = NemoResults(Box::new( iter.into_iter().flatten().collect::>().into_iter(), )); diff --git a/nemo/src/chase_model.rs b/nemo/src/chase_model.rs index d8ebf7aec..82821e85a 100644 --- a/nemo/src/chase_model.rs +++ b/nemo/src/chase_model.rs @@ -4,3 +4,7 @@ pub(crate) mod analysis; pub(crate) mod components; pub(crate) mod translation; + +// TODO: This is required for tracing, try to use logical model instead +pub use components::atom::ground_atom::GroundAtom; +pub use components::atom::ChaseAtom; diff --git a/nemo/src/chase_model/components.rs b/nemo/src/chase_model/components.rs index 18503b42b..69e177ac9 100644 --- a/nemo/src/chase_model/components.rs +++ b/nemo/src/chase_model/components.rs @@ -16,7 +16,7 @@ pub(crate) mod rule; pub(crate) mod term; /// Trait implemented by components of the chase model -pub(crate) trait ChaseComponent { +pub trait ChaseComponent { /// Return the [Origin] of this component. fn origin(&self) -> &Origin; diff --git a/nemo/src/chase_model/components/atom.rs b/nemo/src/chase_model/components/atom.rs index 3fc95f4dc..1bfd99a23 100644 --- a/nemo/src/chase_model/components/atom.rs +++ b/nemo/src/chase_model/components/atom.rs @@ -11,7 +11,7 @@ use crate::rule_model::components::{tag::Tag, IterableVariables}; use super::ChaseComponent; /// Tagged list of terms. -pub(crate) trait ChaseAtom: ChaseComponent + IterableVariables + Display { +pub trait ChaseAtom: ChaseComponent + IterableVariables + Display { /// Type of the terms within the atom. type TypeTerm; diff --git a/nemo/src/rule_model/term_map.rs b/nemo/src/rule_model/term_map.rs index b06e18e21..8efd9f8be 100644 --- a/nemo/src/rule_model/term_map.rs +++ b/nemo/src/rule_model/term_map.rs @@ -1,6 +1,9 @@ //! This module defines [PrimitiveTermMap]. -use std::collections::HashMap; +use std::collections::{ + hash_map::{IntoIter, Iter, IterMut}, + HashMap, +}; use super::components::{term::primitive::Primitive, IterablePrimitives}; @@ -39,3 +42,30 @@ impl From> for PrimitiveTermMap { Self { map: value } } } + +impl IntoIterator for PrimitiveTermMap { + type Item = (Primitive, Primitive); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.map.into_iter() + } +} + +impl<'a> IntoIterator for &'a PrimitiveTermMap { + type Item = (&'a Primitive, &'a Primitive); + type IntoIter = Iter<'a, Primitive, Primitive>; + + fn into_iter(self) -> Self::IntoIter { + self.map.iter() + } +} + +impl<'a> IntoIterator for &'a mut PrimitiveTermMap { + type Item = (&'a Primitive, &'a mut Primitive); + type IntoIter = IterMut<'a, Primitive, Primitive>; + + fn into_iter(self) -> Self::IntoIter { + self.map.iter_mut() + } +} From e02c7e805bea1aa345cbc8178e93f5199cd08b46 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 17 Sep 2024 09:29:37 +0200 Subject: [PATCH 168/214] Clippy --- nemo-cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index b484145e9..b204798bd 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -183,7 +183,7 @@ fn handle_tracing( engine: &mut DefaultExecutionEngine, program: Program, ) -> Result<(), CliError> { - let tracing_facts = parse_trace_facts(&cli)?; + let tracing_facts = parse_trace_facts(cli)?; if !tracing_facts.is_empty() { let mut facts = Vec::::with_capacity(tracing_facts.len()); for fact_string in &tracing_facts { From 9ab76fc722724dfe07df1215f5cbe217d3035a5c Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 17 Sep 2024 11:25:54 +0200 Subject: [PATCH 169/214] Clippy --- nemo-cli/src/main.rs | 6 +- nemo-language-server/src/language_server.rs | 10 ++-- nemo-python/src/lib.rs | 6 +- nemo-wasm/src/lib.rs | 2 +- nemo/src/api.rs | 2 +- .../components/atom/ground_atom.rs | 2 +- .../components/atom/primitive_atom.rs | 2 +- .../components/atom/variable_atom.rs | 2 +- nemo/src/chase_model/components/export.rs | 4 +- nemo/src/chase_model/components/import.rs | 4 +- nemo/src/chase_model/translation/aggregate.rs | 14 ++--- nemo/src/chase_model/translation/fact.rs | 11 +--- nemo/src/chase_model/translation/filter.rs | 8 +-- .../chase_model/translation/import_export.rs | 21 ++++--- nemo/src/chase_model/translation/operation.rs | 4 +- nemo/src/chase_model/translation/rule.rs | 29 +++++----- nemo/src/execution/execution_engine.rs | 4 +- .../planning/operations/operation.rs | 2 +- nemo/src/io/export_manager.rs | 10 ++-- nemo/src/io/formats.rs | 5 +- nemo/src/parser.rs | 6 +- nemo/src/parser/ast.rs | 2 +- nemo/src/parser/ast/comment/wsoc.rs | 2 +- nemo/src/parser/ast/directive/unknown.rs | 2 +- .../parser/ast/expression/basic/boolean.rs | 2 +- .../src/parser/ast/expression/basic/number.rs | 12 ++-- .../ast/expression/complex/arithmetic.rs | 22 +++----- .../parser/ast/expression/complex/infix.rs | 2 +- nemo/src/parser/ast/program.rs | 2 +- nemo/src/parser/ast/tag/structure.rs | 15 ++--- nemo/src/parser/ast/token.rs | 9 +-- nemo/src/parser/context.rs | 12 ++-- nemo/src/parser/error.rs | 35 ++++++------ nemo/src/parser/span.rs | 15 +---- nemo/src/rule_model/components.rs | 7 ++- nemo/src/rule_model/components/atom.rs | 29 +++++----- nemo/src/rule_model/components/fact.rs | 21 +++---- .../rule_model/components/import_export.rs | 47 +++++++--------- .../components/import_export/file_formats.rs | 2 +- nemo/src/rule_model/components/literal.rs | 2 +- nemo/src/rule_model/components/output.rs | 4 +- nemo/src/rule_model/components/parse.rs | 12 ++-- nemo/src/rule_model/components/rule.rs | 43 ++++++-------- nemo/src/rule_model/components/term.rs | 56 +------------------ .../rule_model/components/term/aggregate.rs | 14 ++--- .../rule_model/components/term/function.rs | 24 ++++---- nemo/src/rule_model/components/term/map.rs | 4 +- .../rule_model/components/term/operation.rs | 16 +++--- .../term/operation/operation_kind.rs | 8 +-- .../rule_model/components/term/primitive.rs | 2 +- .../components/term/primitive/ground.rs | 10 ++-- .../components/term/primitive/variable.rs | 2 +- .../term/primitive/variable/existential.rs | 4 +- .../term/primitive/variable/universal.rs | 4 +- nemo/src/rule_model/components/term/tuple.rs | 14 +++-- nemo/src/rule_model/error.rs | 8 +-- nemo/src/rule_model/program.rs | 22 ++++---- nemo/src/rule_model/translation.rs | 2 +- nemo/src/rule_model/util.rs | 12 ++-- nemo/src/syntax.rs | 3 +- 60 files changed, 278 insertions(+), 380 deletions(-) diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index b204798bd..840166a53 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -191,7 +191,7 @@ fn handle_tracing( fact: fact_string.clone(), })?; let mut builder = ValidationErrorBuilder::default(); - if fact.validate(&mut builder).is_err() { + if fact.validate(&mut builder).is_none() { return Err(CliError::TracingInvalidFact { fact: fact_string.clone(), }); @@ -285,7 +285,7 @@ fn run(mut cli: CliApp) -> Result<(), CliError> { let mut engine: DefaultExecutionEngine = ExecutionEngine::initialize(&program, import_manager)?; for (predicate, handler) in engine.exports() { - export_manager.validate(&predicate, &handler)?; + export_manager.validate(&predicate, &*handler)?; } TimedCode::instance().sub("Reading & Preprocessing").stop(); @@ -307,7 +307,7 @@ fn run(mut cli: CliApp) -> Result<(), CliError> { for (predicate, handler) in engine.exports() { stdout_used |= export_manager.export_table( &predicate, - &handler, + &*handler, engine.predicate_rows(&predicate)?, )?; } diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index f1336f3a0..1db2b558b 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -98,7 +98,7 @@ impl Backend { Parser::initialize(text, text_document.uri.to_string()) .parse() .map(|prg| (prg, None)) - .unwrap_or_else(|(prg, err)| (prg, Some(err))); + .unwrap_or_else(|(prg, err)| (*prg, Some(err))); // Group errors by position and deduplicate error let mut errors_by_posision: BTreeMap> = BTreeMap::new(); @@ -267,7 +267,7 @@ impl LanguageServer for Backend { ) .parse() .map(|prg| (prg, None)) - .unwrap_or_else(|(prg, err)| (prg, Some(err))); + .unwrap_or_else(|(prg, err)| (*prg, Some(err))); let node_path = find_in_ast(&program, position); @@ -313,7 +313,7 @@ impl LanguageServer for Backend { Parser::initialize(&text, params.text_document.uri.to_string()) .parse() .map(|prg| (prg, None)) - .unwrap_or_else(|(prg, err)| (prg, Some(err))); + .unwrap_or_else(|(prg, err)| (*prg, Some(err))); let document_symbols = ast_node_to_document_symbol(&line_index, &program) .map_err(Into::into) @@ -347,7 +347,7 @@ impl LanguageServer for Backend { ) .parse() .map(|prg| (prg, None)) - .unwrap_or_else(|(prg, err)| (prg, Some(err))); + .unwrap_or_else(|(prg, err)| (*prg, Some(err))); let node_path = find_in_ast(&program, position); @@ -412,7 +412,7 @@ impl LanguageServer for Backend { Parser::initialize(&text, params.text_document.uri.to_string()) .parse() .map(|prg| (prg, None)) - .unwrap_or_else(|(prg, err)| (prg, Some(err))); + .unwrap_or_else(|(prg, err)| (*prg, Some(err))); let node_path = find_in_ast(&program, position); diff --git a/nemo-python/src/lib.rs b/nemo-python/src/lib.rs index e57392db8..3b8d9fdec 100644 --- a/nemo-python/src/lib.rs +++ b/nemo-python/src/lib.rs @@ -416,9 +416,7 @@ impl NemoEngine { fn trace(&mut self, fact_string: String) -> Option { let fact = Fact::parse(&fact_string).ok()?; let mut builder = ValidationErrorBuilder::default(); - if fact.validate(&mut builder).is_err() { - return None; - } + fact.validate(&mut builder)?; let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![fact]); let handle = *handles @@ -462,7 +460,7 @@ impl NemoEngine { .0 .export_table( &tag, - &export_handler, + &*export_handler, self.engine.predicate_rows(&tag).py_res()?, ) .py_res()?; diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index faae47392..a370f2a58 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -372,7 +372,7 @@ impl NemoEngine { let export_manager: ExportManager = Default::default(); export_manager - .export_table_with_writer(Box::new(writer), &export_handler, Some(record_iter)) + .export_table_with_writer(Box::new(writer), &*export_handler, Some(record_iter)) .map_err(WasmOrInternalNemoError::Nemo) .map_err(NemoError) } diff --git a/nemo/src/api.rs b/nemo/src/api.rs index 6885c0a14..f081d10a3 100644 --- a/nemo/src/api.rs +++ b/nemo/src/api.rs @@ -77,7 +77,7 @@ pub fn output_predicates(engine: &Engine) -> Vec { engine .program() .exports() - .into_iter() + .iter() .map(|export| export.predicate()) .cloned() .collect() diff --git a/nemo/src/chase_model/components/atom/ground_atom.rs b/nemo/src/chase_model/components/atom/ground_atom.rs index 74b9c93a9..b54583130 100644 --- a/nemo/src/chase_model/components/atom/ground_atom.rs +++ b/nemo/src/chase_model/components/atom/ground_atom.rs @@ -122,7 +122,7 @@ impl TryFrom for GroundAtom { type Error = GroundAtomConversionError; fn try_from(value: Atom) -> Result { - let origin = value.origin().clone(); + let origin = *value.origin(); let predicate = value.predicate(); let mut terms = Vec::new(); diff --git a/nemo/src/chase_model/components/atom/primitive_atom.rs b/nemo/src/chase_model/components/atom/primitive_atom.rs index 728e5d763..c98ac1afa 100644 --- a/nemo/src/chase_model/components/atom/primitive_atom.rs +++ b/nemo/src/chase_model/components/atom/primitive_atom.rs @@ -131,7 +131,7 @@ impl TryFrom for PrimitiveAtom { type Error = PrimitiveAtomConversionError; fn try_from(value: Atom) -> Result { - let origin = value.origin().clone(); + let origin = *value.origin(); let predicate = value.predicate(); let mut terms = Vec::new(); diff --git a/nemo/src/chase_model/components/atom/variable_atom.rs b/nemo/src/chase_model/components/atom/variable_atom.rs index a9e5ce55c..ef593c5f8 100644 --- a/nemo/src/chase_model/components/atom/variable_atom.rs +++ b/nemo/src/chase_model/components/atom/variable_atom.rs @@ -115,7 +115,7 @@ impl TryFrom for VariableAtom { type Error = VariableAtomConversionError; fn try_from(value: Atom) -> Result { - let origin = value.origin().clone(); + let origin = *value.origin(); let predicate = value.predicate(); let mut terms = Vec::new(); diff --git a/nemo/src/chase_model/components/export.rs b/nemo/src/chase_model/components/export.rs index 93f06f96c..f55975e7e 100644 --- a/nemo/src/chase_model/components/export.rs +++ b/nemo/src/chase_model/components/export.rs @@ -35,8 +35,8 @@ impl ChaseExport { } /// Return the handler. - pub(crate) fn handler(&self) -> &Box { - &self.handler + pub(crate) fn handler(&self) -> Box { + self.handler.clone() } /// Return the arity of this import. diff --git a/nemo/src/chase_model/components/import.rs b/nemo/src/chase_model/components/import.rs index c181c7f20..b8d9e819f 100644 --- a/nemo/src/chase_model/components/import.rs +++ b/nemo/src/chase_model/components/import.rs @@ -35,8 +35,8 @@ impl ChaseImport { } /// Return the handler. - pub(crate) fn handler(&self) -> &Box { - &self.handler + pub(crate) fn handler(&self) -> Box { + self.handler.clone() } /// Return the arity of this import. diff --git a/nemo/src/chase_model/translation/aggregate.rs b/nemo/src/chase_model/translation/aggregate.rs index 04b8e57b2..b72204e55 100644 --- a/nemo/src/chase_model/translation/aggregate.rs +++ b/nemo/src/chase_model/translation/aggregate.rs @@ -29,9 +29,9 @@ impl ProgramChaseTranslation { &mut self, result: &mut ChaseRule, aggregate: &crate::rule_model::components::term::aggregate::Aggregate, - group_by_variables: &Vec, + group_by_variables: &[Variable], ) -> ChaseAggregate { - let origin = aggregate.origin().clone(); + let origin = *aggregate.origin(); let kind = aggregate.aggregate_kind(); let input_variable = match aggregate.aggregate_term() { Term::Primitive(Primitive::Variable(variable)) => variable.clone(), @@ -71,7 +71,7 @@ impl ProgramChaseTranslation { input_variable, output_variable, distinct_variables, - group_by_variables.clone(), + group_by_variables.to_vec(), ) } @@ -88,10 +88,10 @@ impl ProgramChaseTranslation { &mut self, result: &mut ChaseRule, operation: &crate::rule_model::components::term::operation::Operation, - group_by_variables: &Vec, + group_by_variables: &[Variable], chase_aggregate: &mut Option, ) -> OperationTerm { - let origin = operation.origin().clone(); + let origin = *operation.origin(); let kind = operation.operation_kind(); let mut subterms = Vec::new(); @@ -130,7 +130,7 @@ impl ProgramChaseTranslation { &mut self, result: &mut ChaseRule, operation: &crate::rule_model::components::term::operation::Operation, - group_by_variables: &Vec, + group_by_variables: &[Variable], output_variable: Variable, chase_aggregate: &mut Option, ) -> ChaseOperation { @@ -141,6 +141,6 @@ impl ProgramChaseTranslation { chase_aggregate, ); - ChaseOperation::new(output_variable, operation_term).set_origin(operation.origin().clone()) + ChaseOperation::new(output_variable, operation_term).set_origin(*operation.origin()) } } diff --git a/nemo/src/chase_model/translation/fact.rs b/nemo/src/chase_model/translation/fact.rs index 69363001d..5a14bc2bd 100644 --- a/nemo/src/chase_model/translation/fact.rs +++ b/nemo/src/chase_model/translation/fact.rs @@ -21,18 +21,13 @@ impl ProgramChaseTranslation { &mut self, fact: &crate::rule_model::components::fact::Fact, ) -> GroundAtom { - let origin = fact.origin().clone(); + let origin = *fact.origin(); let predicate = fact.predicate().clone(); let mut terms = Vec::new(); for term in fact.subterms() { - if let Term::Primitive(primitive) = term { - if let Primitive::Ground(value) = primitive { - terms.push(value.clone()); - continue; - } else { - unreachable!("invalid program: fact contains non-ground values") - } + if let Term::Primitive(Primitive::Ground(value)) = term { + terms.push(value.clone()); } else { unreachable!("invalid program: fact contains non-primitive values") } diff --git a/nemo/src/chase_model/translation/filter.rs b/nemo/src/chase_model/translation/filter.rs index fa47c10cf..8891a6b30 100644 --- a/nemo/src/chase_model/translation/filter.rs +++ b/nemo/src/chase_model/translation/filter.rs @@ -24,7 +24,7 @@ impl ProgramChaseTranslation { variable: &Variable, operation: &crate::rule_model::components::term::operation::Operation, ) -> ChaseFilter { - let origin = operation.origin().clone(); + let origin = *operation.origin(); let operation = Self::build_operation_term(operation); let filter = OperationTerm::Operation( @@ -35,7 +35,7 @@ impl ProgramChaseTranslation { operation, ], ) - .set_origin(origin.clone()), + .set_origin(origin), ); ChaseFilter::new(filter).set_origin(origin) @@ -50,8 +50,8 @@ impl ProgramChaseTranslation { OperationTerm::Primitive(term.clone()), ], ) - .set_origin(term.origin().clone()); + .set_origin(*term.origin()); - ChaseFilter::new(OperationTerm::Operation(filter)).set_origin(term.origin().clone()) + ChaseFilter::new(OperationTerm::Operation(filter)).set_origin(*term.origin()) } } diff --git a/nemo/src/chase_model/translation/import_export.rs b/nemo/src/chase_model/translation/import_export.rs index d614eff16..790a9ff3f 100644 --- a/nemo/src/chase_model/translation/import_export.rs +++ b/nemo/src/chase_model/translation/import_export.rs @@ -32,7 +32,7 @@ impl ProgramChaseTranslation { &self, import: &crate::rule_model::components::import_export::ImportDirective, ) -> ChaseImport { - let origin = import.origin().clone(); + let origin = *import.origin(); let predicate = import.predicate().clone(); let attributes = import.attributes(); let file_format = import.file_format(); @@ -53,7 +53,7 @@ impl ProgramChaseTranslation { &self, export: &crate::rule_model::components::import_export::ExportDirective, ) -> ChaseExport { - let origin = export.origin().clone(); + let origin = *export.origin(); let predicate = export.predicate().clone(); let attributes = export.attributes(); let file_format = export.file_format(); @@ -79,13 +79,12 @@ impl ProgramChaseTranslation { ) -> Box { let arity = self.predicate_arity.get(&predicate).cloned(); - if attributes.get(&ImportExportAttribute::Resource).is_none() { - let default_file_name = format!("{}.{}", predicate, file_format.extension()); - attributes.insert( - ImportExportAttribute::Resource, - Term::from(default_file_name), - ); - } + attributes + .entry(ImportExportAttribute::Resource) + .or_insert_with(|| { + let default_file_name = format!("{}.{}", predicate, file_format.extension()); + Term::from(default_file_name) + }); match file_format { FileFormat::CSV => { @@ -120,7 +119,7 @@ impl ProgramChaseTranslation { ) -> (CompressionFormat, ImportExportResource) { attributes .get(&ImportExportAttribute::Resource) - .and_then(|term| ImportExportDirective::string_value(term)) + .and_then(ImportExportDirective::string_value) .map(|resource| CompressionFormat::from_resource(&resource)) .map(|(format, resource)| (format, ImportExportResource::from_string(resource))) .expect("invalid program: missing resource in import/export") @@ -162,7 +161,7 @@ impl ProgramChaseTranslation { fn read_limit(attributes: &HashMap) -> Option { attributes .get(&ImportExportAttribute::Limit) - .and_then(|term| ImportExportDirective::integer_value(term)) + .and_then(ImportExportDirective::integer_value) .map(|limit| u64::try_from(limit).unwrap_or_default()) } diff --git a/nemo/src/chase_model/translation/operation.rs b/nemo/src/chase_model/translation/operation.rs index 043fd779b..085d043f5 100644 --- a/nemo/src/chase_model/translation/operation.rs +++ b/nemo/src/chase_model/translation/operation.rs @@ -25,7 +25,7 @@ impl ProgramChaseTranslation { pub(crate) fn build_operation_term( operation: &crate::rule_model::components::term::operation::Operation, ) -> OperationTerm { - let origin = operation.origin().clone(); + let origin = *operation.origin(); let kind = operation.operation_kind(); let mut subterms = Vec::new(); @@ -54,7 +54,7 @@ impl ProgramChaseTranslation { output_variable: &Variable, operation: &crate::rule_model::components::term::operation::Operation, ) -> ChaseOperation { - let origin = operation.origin().clone(); + let origin = *operation.origin(); let operation = Self::build_operation_term(operation); ChaseOperation::new(output_variable.clone(), operation).set_origin(origin) diff --git a/nemo/src/chase_model/translation/rule.rs b/nemo/src/chase_model/translation/rule.rs index c8845c200..84ff90c77 100644 --- a/nemo/src/chase_model/translation/rule.rs +++ b/nemo/src/chase_model/translation/rule.rs @@ -87,16 +87,16 @@ impl ProgramChaseTranslation { for literal in rule.body() { if let Literal::Operation(operation) = literal { - if let Some((left, term)) = operation.variable_assignment() { - if let Term::Primitive(Primitive::Variable(right)) = term { - // Operation has the form ?left = ?right - if let Some(assigned) = assignment.get(left) { - assignment.insert(right.clone(), assigned.clone()); - } else if let Some(assigned) = assignment.get(right) { - assignment.insert(left.clone(), assigned.clone()); - } else { - assignment.insert(left.clone(), right.clone()); - } + if let Some((left, Term::Primitive(Primitive::Variable(right)))) = + operation.variable_assignment() + { + // Operation has the form ?left = ?right + if let Some(assigned) = assignment.get(left) { + assignment.insert(right.clone(), assigned.clone()); + } else if let Some(assigned) = assignment.get(right) { + assignment.insert(left.clone(), assigned.clone()); + } else { + assignment.insert(left.clone(), right.clone()); } } } @@ -126,7 +126,7 @@ impl ProgramChaseTranslation { /// # Panics /// Panics if atom contains a structured term or an aggregate. fn build_body_atom(&mut self, atom: &Atom) -> (VariableAtom, Vec) { - let origin = atom.origin().clone(); + let origin = *atom.origin(); let predicate = atom.predicate().clone(); let mut variables = Vec::new(); @@ -234,8 +234,7 @@ impl ProgramChaseTranslation { if let Literal::Operation(operation) = literal { let new_operation = Self::build_operation_term(operation); - let new_filter = - ChaseFilter::new(new_operation).set_origin(operation.origin().clone()); + let new_filter = ChaseFilter::new(new_operation).set_origin(*operation.origin()); result.add_positive_filter(new_filter); } @@ -244,11 +243,11 @@ impl ProgramChaseTranslation { /// Translates each head atom into the [PrimitiveAtom], /// while taking care of operations and aggregates. - fn handle_head(&mut self, result: &mut ChaseRule, head: &Vec) { + fn handle_head(&mut self, result: &mut ChaseRule, head: &[Atom]) { let mut chase_aggregate: Option = None; for (head_index, atom) in head.iter().enumerate() { - let origin = atom.origin().clone(); + let origin = *atom.origin(); let predicate = atom.predicate().clone(); let mut terms = Vec::new(); diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index c288112ab..65db1bede 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -141,7 +141,7 @@ impl ExecutionEngine { // Add all the import specifications for import in program.imports() { let table_source = TableSource::new( - input_manager.table_provider_from_handler(&**import.handler())?, + input_manager.table_provider_from_handler(&*import.handler())?, import.arity(), ); @@ -282,7 +282,7 @@ impl ExecutionEngine { self.program .exports() .iter() - .map(|export| (export.predicate().clone(), export.handler().clone())) + .map(|export| (export.predicate().clone(), export.handler())) .collect() } diff --git a/nemo/src/execution/planning/operations/operation.rs b/nemo/src/execution/planning/operations/operation.rs index a7cd81520..453d1e140 100644 --- a/nemo/src/execution/planning/operations/operation.rs +++ b/nemo/src/execution/planning/operations/operation.rs @@ -20,7 +20,7 @@ pub(super) fn operation_term_to_function_tree( Primitive::Ground(datavalue) => FunctionTree::constant(datavalue.value()), Primitive::Variable(variable) => FunctionTree::reference( *translation - .get(&variable) + .get(variable) .expect("Every variable must be known"), ), }, diff --git a/nemo/src/io/export_manager.rs b/nemo/src/io/export_manager.rs index 09f5cc94a..ebde5c603 100644 --- a/nemo/src/io/export_manager.rs +++ b/nemo/src/io/export_manager.rs @@ -70,13 +70,13 @@ impl ExportManager { pub fn validate( &self, predicate: &Tag, - handler: &Box, + handler: &dyn ImportExportHandler, ) -> Result<(), Error> { if handler.resource_is_stdout() { return Ok(()); } - let path = self.output_file_path(predicate, &**handler); + let path = self.output_file_path(predicate, handler); let meta_info = path.metadata(); if let Err(err) = meta_info { @@ -162,14 +162,14 @@ impl ExportManager { pub fn export_table<'a>( &self, predicate: &Tag, - export_handler: &Box, + export_handler: &dyn ImportExportHandler, table: Option> + 'a>, ) -> Result { if self.disable_write { return Ok(false); } - let writer = self.writer(&**export_handler, predicate)?; + let writer = self.writer(export_handler, predicate)?; if let Some(table) = table { let table_writer = export_handler.writer(writer)?; @@ -191,7 +191,7 @@ impl ExportManager { pub fn export_table_with_writer<'a>( &self, writer: Box, - export_handler: &Box, + export_handler: &dyn ImportExportHandler, table: Option> + 'a>, ) -> Result<(), Error> { if let Some(table) = table { diff --git a/nemo/src/io/formats.rs b/nemo/src/io/formats.rs index 0e8055e56..70ee9dc0f 100644 --- a/nemo/src/io/formats.rs +++ b/nemo/src/io/formats.rs @@ -50,8 +50,9 @@ impl ImportExportResource { } } -/// An [ImportExportHandler] represents a data format for input and/or output, and provides -/// specific methods for handling data of that format. Each handler is configured by format-specific +/// An [ImportExportHandler] represents a data format for input and/or output. +/// +/// It provides specific methods for handling data of that format. Each handler is configured by format-specific /// attributes, which define the behavior in detail, including the kind of data that this format /// is compatible with. The attributes are provided when creating the format, and should then /// be validated. diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index fd4a49081..bad1781e2 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -103,8 +103,8 @@ impl<'a> Parser<'a> { } /// Parse the input. - pub fn parse(self) -> Result, (Program<'a>, ParserErrorReport<'a>)> { - let parser_input = ParserInput::new(&self.input, self.state.clone()); + pub fn parse(self) -> Result, (Box>, ParserErrorReport<'a>)> { + let parser_input = ParserInput::new(self.input, self.state.clone()); let (_, program) = Program::parse(parser_input).expect("parsing should always succeed"); @@ -112,7 +112,7 @@ impl<'a> Parser<'a> { Ok(program) } else { Err(( - program, + Box::new(program), ParserErrorReport { input: self.input, label: self.label, diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 02b2354ac..c6186d499 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -52,7 +52,7 @@ pub(crate) fn ast_to_ascii_tree<'a>(node: &'a dyn ProgramAST<'a>) -> Tree { Tree::Node( format!( "{} \x1b[34m@{}:{} {colour}{str}\x1b[0m", - node.context().name().to_string(), + node.context().name(), node.span().0.location_line(), node.span().0.get_utf8_column() ), diff --git a/nemo/src/parser/ast/comment/wsoc.rs b/nemo/src/parser/ast/comment/wsoc.rs index 1fc1df7da..d48f61220 100644 --- a/nemo/src/parser/ast/comment/wsoc.rs +++ b/nemo/src/parser/ast/comment/wsoc.rs @@ -60,7 +60,7 @@ impl<'a> WSoC<'a> { rest, Self { _span: input_span.until_rest(&rest_span), - comments: comments.into_iter().filter_map(|comment| comment).collect(), + comments: comments.into_iter().flatten().collect(), }, ) }) diff --git a/nemo/src/parser/ast/directive/unknown.rs b/nemo/src/parser/ast/directive/unknown.rs index e06fc50a4..43c10d9c3 100644 --- a/nemo/src/parser/ast/directive/unknown.rs +++ b/nemo/src/parser/ast/directive/unknown.rs @@ -53,7 +53,7 @@ impl<'a> UnknownDirective<'a> { if let Ok((rest, matched)) = Token::name(input.clone()) { let mut is_known = false; - for directive in DirectiveKind::iter().map(|kind| kind.token()).flatten() { + for directive in DirectiveKind::iter().flat_map(|kind| kind.token()) { if matched.to_string() == directive.name() { is_known = true; break; diff --git a/nemo/src/parser/ast/expression/basic/boolean.rs b/nemo/src/parser/ast/expression/basic/boolean.rs index e6f0602a9..95c59531b 100644 --- a/nemo/src/parser/ast/expression/basic/boolean.rs +++ b/nemo/src/parser/ast/expression/basic/boolean.rs @@ -49,7 +49,7 @@ impl<'a> Boolean<'a> { ( rest, BooleanValue::token(result.kind()) - .expect(&format!("unexpected token: {:?}", result.kind())), + .unwrap_or_else(|| panic!("unexpected token: {:?}", result.kind())), ) }) } diff --git a/nemo/src/parser/ast/expression/basic/number.rs b/nemo/src/parser/ast/expression/basic/number.rs index af10bc189..4c64ecb72 100644 --- a/nemo/src/parser/ast/expression/basic/number.rs +++ b/nemo/src/parser/ast/expression/basic/number.rs @@ -98,20 +98,16 @@ impl<'a> Number<'a> { /// Recreate the number string without the type marker. fn number_string(&self) -> String { - let integer = format!( - "{}{}", - self.integer_sign.print(), - self.integer.span().0.to_string() - ); + let integer = format!("{}{}", self.integer_sign.print(), self.integer.span().0); let fractional = if let Some(fractional) = &self.fractional { - format!(".{}", fractional.span().0.to_string()) + format!(".{}", fractional.span().0) } else { String::default() }; let exponent = if let Some((sign, exponent)) = &self.exponent { - format!("e{}{}", sign.print(), exponent.span().0.to_string()) + format!("e{}{}", sign.print(), exponent.span().0) } else { String::default() }; @@ -150,7 +146,7 @@ impl<'a> Number<'a> { ( rest, NumberSign::token(&sign.kind()) - .expect(&format!("unexpected token: {:?}", sign.kind())), + .unwrap_or_else(|| panic!("unexpected token: {:?}", sign.kind())), ) }) } diff --git a/nemo/src/parser/ast/expression/complex/arithmetic.rs b/nemo/src/parser/ast/expression/complex/arithmetic.rs index 2510b4b3a..c51d00e7d 100644 --- a/nemo/src/parser/ast/expression/complex/arithmetic.rs +++ b/nemo/src/parser/ast/expression/complex/arithmetic.rs @@ -45,23 +45,23 @@ pub enum ArithmeticOperation { impl ArithmeticOperation { /// Parse additive operation. - pub fn parse_additive<'a>(input: ParserInput<'a>) -> ParserResult<'a, Self> { + pub fn parse_additive(input: ParserInput<'_>) -> ParserResult<'_, Self> { alt((Token::plus, Token::minus))(input).map(|(rest, result)| { ( rest, ArithmeticOperation::token(result.kind()) - .expect(&format!("unexpected token: {:?}", result.kind())), + .unwrap_or_else(|| panic!("unexpected token: {:?}", result.kind())), ) }) } /// Parse multiplicative operation. - pub fn parse_multiplicative<'a>(input: ParserInput<'a>) -> ParserResult<'a, Self> { + pub fn parse_multiplicative(input: ParserInput<'_>) -> ParserResult<'_, Self> { alt((Token::star, Token::division))(input).map(|(rest, result)| { ( rest, ArithmeticOperation::token(result.kind()) - .expect(&format!("unexpected token: {:?}", result.kind())), + .unwrap_or_else(|| panic!("unexpected token: {:?}", result.kind())), ) }) } @@ -101,7 +101,7 @@ impl<'a> Arithmetic<'a> { pub fn ascii_tree(&self) -> String { let mut output = String::new(); write_tree(&mut output, &ast_to_ascii_tree(self)).unwrap(); - format!("{output}") + output.to_string() } } @@ -231,10 +231,8 @@ impl<'a> ProgramAST<'a> for Arithmetic<'a> { Self: Sized + 'a, { let arithmetic_parser = |input: ParserInput<'a>| { - if let Ok((rest, expression)) = Self::parse_sum(input.clone()) { - if let Expression::Arithmetic(result) = expression { - return Ok((rest, result)); - } + if let Ok((rest, Expression::Arithmetic(result))) = Self::parse_sum(input.clone()) { + return Ok((rest, result)); } Err(nom::Err::Error(ParserErrorTree::Base { @@ -265,7 +263,7 @@ mod test { }; /// Count the number of expressions contained in an arithmetic expression - fn count_expression<'a>(expression: &Expression<'a>) -> usize { + fn count_expression(expression: &Expression) -> usize { match expression { Expression::Arithmetic(arithmetic) => { count_expression(arithmetic.left()) + count_expression(arithmetic.right()) @@ -293,10 +291,6 @@ mod test { for (input, expected) in test { let parser_input = ParserInput::new(input, ParserState::default()); let result = all_consuming(Arithmetic::parse)(parser_input); - match &result { - Ok((_, ast)) => println!("{ast}"), - Err(_) => assert!(false), - } let result = result.unwrap(); assert_eq!( diff --git a/nemo/src/parser/ast/expression/complex/infix.rs b/nemo/src/parser/ast/expression/complex/infix.rs index 5697834f2..69143b533 100644 --- a/nemo/src/parser/ast/expression/complex/infix.rs +++ b/nemo/src/parser/ast/expression/complex/infix.rs @@ -89,7 +89,7 @@ impl<'a> InfixExpression<'a> { ( rest, InfixExpressionKind::token(result.kind()) - .expect(&format!("unexpected token: {:?}", result.kind())), + .unwrap_or_else(|| panic!("unexpected token: {:?}", result.kind())), ) }) } diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 38f7728b7..54a35cec4 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -51,7 +51,7 @@ impl<'a> Program<'a> { pub fn ascii_tree(&self) -> String { let mut output = String::new(); write_tree(&mut output, &ast_to_ascii_tree(self)).unwrap(); - format!("{output}") + output.to_string() } } diff --git a/nemo/src/parser/ast/tag/structure.rs b/nemo/src/parser/ast/tag/structure.rs index 42a913fca..2239bd92e 100644 --- a/nemo/src/parser/ast/tag/structure.rs +++ b/nemo/src/parser/ast/tag/structure.rs @@ -1,5 +1,7 @@ //! This module defines [StructureTag]. +use std::fmt::Display; + use nom::{branch::alt, combinator::map, sequence::separated_pair}; use crate::parser::{ @@ -41,17 +43,16 @@ impl<'a> StructureTag<'a> { pub fn kind(&self) -> &StructureTagKind<'a> { &self.kind } +} - /// Return a string representation of the [Tag]. - /// - /// Note that this does not resolve prefixes. - pub fn to_string(&self) -> String { +impl<'a> Display for StructureTag<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match &self.kind { - StructureTagKind::Plain(token) => token.to_string(), + StructureTagKind::Plain(token) => token.fmt(f), StructureTagKind::Prefixed { prefix, tag } => { - format!("{}::{}", prefix.to_string(), tag.to_string()) + f.write_fmt(format_args!("{}::{}", prefix, tag)) } - StructureTagKind::Iri(iri) => iri.content(), + StructureTagKind::Iri(iri) => iri.content().fmt(f), } } } diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index d3a311a0c..6a52f69a7 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -1,6 +1,8 @@ //! This module defines [Token]. #![allow(missing_docs)] +use std::fmt::Display; + use enum_assoc::Assoc; use nom::{ @@ -259,10 +261,9 @@ pub struct Token<'a> { kind: TokenKind, } -impl<'a> Token<'a> { - /// Return a copy of the underlying text - pub fn to_string(&self) -> String { - self.span.0.to_string() +impl<'a> Display for Token<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.span.0.fmt(f) } } diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index f8e74d9e3..e6ee4cd9e 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -153,15 +153,11 @@ where move |i| match f.parse(i.clone()) { Ok(o) => Ok(o), Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), - Err(nom::Err::Error(e)) => Err(nom::Err::Error(ParserErrorTree::add_context( - i, - context.clone(), - e, - ))), + Err(nom::Err::Error(e)) => { + Err(nom::Err::Error(ParserErrorTree::add_context(i, context, e))) + } Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(ParserErrorTree::add_context( - i, - context.clone(), - e, + i, context, e, ))), } } diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index 3198ede51..53142a21d 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -38,7 +38,7 @@ pub struct ParserError { } /// Skip a statement, returning an error token. -pub(crate) fn skip_statement<'a>(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { +pub(crate) fn skip_statement(input: ParserInput<'_>) -> ParserResult<'_, Token<'_>> { let input_span = input.span; let until_double_newline = map( @@ -109,7 +109,7 @@ pub(crate) fn report_error<'a>( /// Function to translate an [ParserErrorTree] returned by the nom parser /// into a [ParserError] that can be displayed to the user. -pub(crate) fn transform_error_tree<'a, Output>( +pub(crate) fn _transform_error_tree<'a, Output>( mut parser: impl Parser, Output, ParserErrorTree<'a>>, ) -> impl FnMut(ParserInput<'a>) -> ParserResult<'a, Output> { move |input| match parser.parse(input.clone()) { @@ -132,9 +132,7 @@ pub(crate) fn transform_error_tree<'a, Output>( } } -fn context_strs( - contexts: &Vec<(ParserInput<'_>, StackContext)>, -) -> Vec { +fn context_strs(contexts: &[(ParserInput<'_>, StackContext)]) -> Vec { contexts .iter() .map(|(_, c)| match c { @@ -144,7 +142,7 @@ fn context_strs( .collect() } -fn get_deepest_errors<'a, 's>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec) { +fn get_deepest_errors<'a>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec) { match e { ParserErrorTree::Base { location, .. } => { let span = location.span.0; @@ -175,12 +173,11 @@ fn get_deepest_errors<'a, 's>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, for (_, context) in contexts { match context { StackContext::Kind(_) => todo!(), - StackContext::Context(c) => match c { - ParserContext::Token { kind: t } => { - msg.push_str(&t.name()); + StackContext::Context(c) => { + if let ParserContext::Token { kind: t } = c { + msg.push_str(t.name()); } - _ => (), - }, + } } } ( @@ -214,12 +211,16 @@ fn get_deepest_errors<'a, 's>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, let mut deepest_pos = CharacterPosition::default(); for error in vec { let (pos, mut deepest_errors) = get_deepest_errors(error); - if pos > deepest_pos { - deepest_pos = pos; - return_vec.clear(); - return_vec.append(&mut deepest_errors); - } else if pos == deepest_pos { - return_vec.append(&mut deepest_errors); + match pos.cmp(&deepest_pos) { + std::cmp::Ordering::Equal => { + return_vec.append(&mut deepest_errors); + } + std::cmp::Ordering::Greater => { + deepest_pos = pos; + return_vec.clear(); + return_vec.append(&mut deepest_errors); + } + std::cmp::Ordering::Less => {} } } (deepest_pos, return_vec) diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index dd9f70e38..5f8f23628 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -6,7 +6,7 @@ use nom_locate::LocatedSpan; /// Locates a certain character within a file, /// giving its offset, line and column number -#[derive(Debug, Clone, Copy, Eq)] +#[derive(Debug, Default, Clone, Copy, Eq)] pub struct CharacterPosition { /// Index of the character in the source file pub offset: usize, @@ -23,17 +23,6 @@ impl CharacterPosition { } } -// TODO: Remove this once error is cleaned up -impl Default for CharacterPosition { - fn default() -> Self { - Self { - offset: Default::default(), - line: Default::default(), - column: Default::default(), - } - } -} - impl PartialEq for CharacterPosition { fn eq(&self, other: &Self) -> bool { self.offset == other.offset @@ -147,7 +136,7 @@ impl<'a> Span<'a> { pub fn beginning(&self) -> Self { unsafe { if self.0.is_empty() { - self.clone() + *self } else { Self(LocatedSpan::new_from_raw_offset( self.0.location_offset(), diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index 00316ce55..fedebebd4 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -116,8 +116,11 @@ pub trait ProgramComponent: Debug + Display { where Self: Sized; - /// Validate this component - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + /// Validate this component. + /// + /// Errors will be appended to the given [ValidationErrorBuilder]. + /// Returns `Some(())` if successful and `None` otherwise. + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized; } diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index 4aa2e87cb..e0dd2108f 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -43,19 +43,21 @@ pub struct Atom { macro_rules! atom { // Base case: no elements ($name:tt) => { - crate::rule_model::components::atom::Atom::new( - crate::rule_model::components::tag::Tag::from($name), + $crate::rule_model::components::atom::Atom::new( + $crate::rule_model::components::tag::Tag::from($name), Vec::new() ) }; // Recursive case: handle each term, separated by commas ($name:tt; $($tt:tt)*) => {{ - let mut terms = Vec::new(); - term_list!(terms; $($tt)*); - crate::rule_model::components::atom::Atom::new( - crate::rule_model::components::tag::Tag::from($name), - terms - ) + #[allow(clippy::vec_init_then_push)] { + let mut terms = Vec::new(); + term_list!(terms; $($tt)*); + $crate::rule_model::components::atom::Atom::new( + $crate::rule_model::components::tag::Tag::from($name), + terms + ) + } }}; } @@ -151,29 +153,26 @@ impl ProgramComponent for Atom { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { if !self.predicate.is_valid() { builder.report_error( - self.predicate.origin().clone(), + *self.predicate.origin(), ValidationErrorKind::InvalidTermTag(self.predicate.to_string()), ); } if self.is_empty() { - builder.report_error( - self.origin.clone(), - ValidationErrorKind::UnsupportedAtomEmpty, - ); + builder.report_error(self.origin, ValidationErrorKind::UnsupportedAtomEmpty); } for term in self.arguments() { term.validate(builder)?; } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index 69d126ec2..6c37e9f17 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -64,7 +64,7 @@ impl Fact { impl From for Fact { fn from(value: Atom) -> Self { Self { - origin: value.origin().clone(), + origin: *value.origin(), predicate: value.predicate(), terms: value.arguments().cloned().collect(), } @@ -125,30 +125,27 @@ impl ProgramComponent for Fact { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { if !self.predicate.is_valid() { builder.report_error( - self.predicate.origin().clone(), + *self.predicate.origin(), ValidationErrorKind::InvalidTermTag(self.predicate.to_string()), ); } for term in self.subterms() { if let Some(variable) = term.variables().next() { - builder.report_error( - variable.origin().clone(), - ValidationErrorKind::FactNonGround, - ); + builder.report_error(*variable.origin(), ValidationErrorKind::FactNonGround); continue; } term.validate(builder)?; } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { @@ -172,13 +169,9 @@ impl IterablePrimitives for Fact { impl From for Fact { fn from(value: GroundAtom) -> Self { - let origin = value.origin().clone(); + let origin = *value.origin(); let predicate = value.predicate(); - let terms = value - .terms() - .cloned() - .map(|term| Term::from(term)) - .collect(); + let terms = value.terms().cloned().map(Term::from).collect(); Self { origin, diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index 920e6121f..eda718129 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -98,7 +98,7 @@ impl ImportExportDirective { for (key, value) in self.attributes.key_value() { if let Some(name) = - Self::plain_value(&key).and_then(|plain| ImportExportAttribute::from_name(&plain)) + Self::plain_value(key).and_then(|plain| ImportExportAttribute::from_name(&plain)) { result.insert(name, value.clone()); } @@ -114,7 +114,7 @@ impl ImportExportDirective { for (key, value) in self.attributes.key_value() { if let Some(name) = - Self::plain_value(&key).and_then(|plain| ImportExportAttribute::from_name(&plain)) + Self::plain_value(key).and_then(|plain| ImportExportAttribute::from_name(&plain)) { result.insert(name, (key.origin(), value)); } @@ -133,11 +133,10 @@ impl ImportExportDirective { match self.format { FileFormat::CSV | FileFormat::DSV | FileFormat::TSV => { - if let Some(value_format) = self.attribute_map().get(&ImportExportAttribute::Format) + if let Some(Term::Tuple(tuple)) = + self.attribute_map().get(&ImportExportAttribute::Format) { - if let Term::Tuple(tuple) = value_format { - return DsvValueFormats::from_tuple(tuple).map(|format| format.arity()); - } + return DsvValueFormats::from_tuple(tuple).map(|format| format.arity()); } } _ => {} @@ -159,23 +158,20 @@ impl ImportExportDirective { } impl ImportExportDirective { - /// Validate directive + /// Validate directive. pub fn validate( &self, direction: Direction, builder: &mut ValidationErrorBuilder, - ) -> Result<(), ()> { + ) -> Option<()> { if direction == Direction::Export && self.format == FileFormat::JSON { - builder.report_error( - self.origin.clone(), - ValidationErrorKind::UnsupportedJsonExport, - ); - return Err(()); + builder.report_error(self.origin, ValidationErrorKind::UnsupportedJsonExport); + return None; } let attributes = self.attribute_map_key(); for (attribute, requirement) in self.format.attributes() { - if requirement == AttributeRequirement::Required && attributes.get(&attribute).is_none() + if requirement == AttributeRequirement::Required && !attributes.contains_key(&attribute) { builder.report_error( self.origin, @@ -197,7 +193,7 @@ impl ImportExportDirective { if !expected_attributes.contains(attribute) { builder .report_error( - attribute_origin.clone(), + attribute_origin, ValidationErrorKind::ImportExportUnrecognizedAttribute { format: self.format.name().to_string(), attribute: attribute.name().to_string(), @@ -212,7 +208,7 @@ impl ImportExportDirective { if attribute.value_type() != value.kind() { builder.report_error( - value.origin().clone(), + *value.origin(), ValidationErrorKind::ImportExportAttributeValueType { parameter: attribute.name().to_string(), given: value.kind().name().to_string(), @@ -243,7 +239,7 @@ impl ImportExportDirective { }; } - Ok(()) + Some(()) } /// Validate the format attribute for dsv @@ -258,7 +254,7 @@ impl ImportExportDirective { .is_none() { builder.report_error( - argument.origin().clone(), + *argument.origin(), ValidationErrorKind::ImportExportValueFormat { file_format: String::from("dsv"), }, @@ -286,7 +282,7 @@ impl ImportExportDirective { .is_none() { builder.report_error( - argument.origin().clone(), + *argument.origin(), ValidationErrorKind::ImportExportValueFormat { file_format: String::from("rdf"), }, @@ -306,10 +302,7 @@ impl ImportExportDirective { fn validate_delimiter(value: &Term, builder: &mut ValidationErrorBuilder) -> Result<(), ()> { if let Some(delimiter) = ImportExportDirective::string_value(value) { if delimiter.len() != 1 { - builder.report_error( - value.origin().clone(), - ValidationErrorKind::ImportExportDelimiter, - ); + builder.report_error(*value.origin(), ValidationErrorKind::ImportExportDelimiter); return Err(()); } @@ -323,7 +316,7 @@ impl ImportExportDirective { if let Term::Primitive(Primitive::Ground(ground)) = value { if !ground.value().fits_into_u64() { builder.report_error( - value.origin().clone(), + *value.origin(), ValidationErrorKind::ImportExportLimitNegative, ); return Err(()); @@ -338,7 +331,7 @@ impl ImportExportDirective { if let Some(compression) = ImportExportDirective::string_value(value) { if CompressionFormat::from_name(&compression).is_none() { builder.report_error( - value.origin().clone(), + *value.origin(), ValidationErrorKind::ImportExportUnknownCompression { format: compression, }, @@ -443,7 +436,7 @@ impl ProgramComponent for ImportDirective { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -530,7 +523,7 @@ impl ProgramComponent for ExportDirective { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/import_export/file_formats.rs b/nemo/src/rule_model/components/import_export/file_formats.rs index 1b5fbce86..f570ba4a9 100644 --- a/nemo/src/rule_model/components/import_export/file_formats.rs +++ b/nemo/src/rule_model/components/import_export/file_formats.rs @@ -139,7 +139,7 @@ pub enum FileFormat { } /// List of RDF [FileFormat]s -pub const FILE_FORMATS_RDF: &'static [FileFormat] = &[ +pub const FILE_FORMATS_RDF: &[FileFormat] = &[ FileFormat::NQuads, FileFormat::NTriples, FileFormat::RDFXML, diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index ed15b5a34..0ae734d4f 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -85,7 +85,7 @@ impl ProgramComponent for Literal { } } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/output.rs b/nemo/src/rule_model/components/output.rs index 55f9073cb..f2a639da8 100644 --- a/nemo/src/rule_model/components/output.rs +++ b/nemo/src/rule_model/components/output.rs @@ -94,11 +94,11 @@ impl ProgramComponent for Output { self } - fn validate(&self, _builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, _builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/parse.rs b/nemo/src/rule_model/components/parse.rs index 6a9eaf340..5995f0331 100644 --- a/nemo/src/rule_model/components/parse.rs +++ b/nemo/src/rule_model/components/parse.rs @@ -26,20 +26,22 @@ macro_rules! parse_component { ($string:expr, $parser:expr, $builder:expr) => {{ use nom::InputLength; - let input = - crate::parser::input::ParserInput::new($string, crate::parser::ParserState::default()); + let input = $crate::parser::input::ParserInput::new( + $string, + $crate::parser::ParserState::default(), + ); let ast = match $parser(input) { Ok((input, ast)) => { if input.input_len() == 0 { ast } else { return Err( - crate::rule_model::components::parse::ComponentParseError::ParseError, + $crate::rule_model::components::parse::ComponentParseError::ParseError, ); } } Err(_) => { - return Err(crate::rule_model::components::parse::ComponentParseError::ParseError) + return Err($crate::rule_model::components::parse::ComponentParseError::ParseError) } }; @@ -48,7 +50,7 @@ macro_rules! parse_component { match $builder(&mut translation, &ast) { Ok(component) => Ok(component), Err(error) => Err( - crate::rule_model::components::parse::ComponentParseError::TranslationError(error), + $crate::rule_model::components::parse::ComponentParseError::TranslationError(error), ), } }}; diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index 8031d3fc7..eb1941614 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -145,10 +145,7 @@ impl Rule { /// * an aggregate occurs at most once fn validate_term_head(builder: &mut ValidationErrorBuilder, term: &Term) -> Result { if term.is_map() || term.is_tuple() || term.is_function() { - builder.report_error( - term.origin().clone(), - ValidationErrorKind::UnsupportedComplexTerm, - ); + builder.report_error(*term.origin(), ValidationErrorKind::UnsupportedComplexTerm); return Err(()); } @@ -159,7 +156,7 @@ impl Rule { if contains_aggregate && first_aggregate { builder.report_error( - subterm.origin().clone(), + *subterm.origin(), ValidationErrorKind::UnsupportedAggregateMultiple, ); @@ -185,14 +182,14 @@ impl Rule { ) -> Result<(), ()> { if let Term::Primitive(Primitive::Variable(Variable::Existential(existential))) = term { builder.report_error( - existential.origin().clone(), + *existential.origin(), ValidationErrorKind::BodyExistential(Variable::Existential(existential.clone())), ); return Err(()); } if term.is_aggregate() { - builder.report_error(term.origin().clone(), ValidationErrorKind::BodyAggregate); + builder.report_error(*term.origin(), ValidationErrorKind::BodyAggregate); return Err(()); } @@ -200,7 +197,7 @@ impl Rule { for operation_variable in term.variables() { if operation_variable.name().is_none() { builder.report_error( - operation_variable.origin().clone(), + *operation_variable.origin(), ValidationErrorKind::OperationAnonymous, ); return Err(()); @@ -208,7 +205,7 @@ impl Rule { if !safe_variables.contains(operation_variable) { builder.report_error( - operation_variable.origin().clone(), + *operation_variable.origin(), ValidationErrorKind::OperationUnsafe(operation_variable.clone()), ); return Err(()); @@ -217,10 +214,7 @@ impl Rule { } if term.is_map() || term.is_tuple() || term.is_function() { - builder.report_error( - term.origin().clone(), - ValidationErrorKind::UnsupportedComplexTerm, - ); + builder.report_error(*term.origin(), ValidationErrorKind::UnsupportedComplexTerm); return Err(()); } @@ -293,7 +287,7 @@ impl ProgramComponent for Rule { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -312,14 +306,14 @@ impl ProgramComponent for Rule { if let Ok(aggregate) = Self::validate_term_head(builder, term) { if aggregate && contains_aggregate { builder.report_error( - term.origin().clone(), + *term.origin(), ValidationErrorKind::UnsupportedAggregateMultiple, ); } if aggregate && is_existential { builder.report_error( - term.origin().clone(), + *term.origin(), ValidationErrorKind::UnsupportedAggregatesAndExistentials, ); } @@ -333,7 +327,7 @@ impl ProgramComponent for Rule { if !safe_variables.contains(variable) { builder .report_error( - variable.origin().clone(), + *variable.origin(), ValidationErrorKind::HeadUnsafe(variable.clone()), ) .add_hint_option(Hint::similar( @@ -342,14 +336,11 @@ impl ProgramComponent for Rule { safe_variables.iter().flat_map(|variable| variable.name()), )); - return Err(()); + return None; } } else { - builder.report_error( - variable.origin().clone(), - ValidationErrorKind::HeadAnonymous, - ); - return Err(()); + builder.report_error(*variable.origin(), ValidationErrorKind::HeadAnonymous); + return None; } } } @@ -384,14 +375,14 @@ impl ProgramComponent for Rule { builder .report_error( - repeated_use.origin().clone(), + *repeated_use.origin(), ValidationErrorKind::MultipleNegativeLiteralsUnsafe( (*repeated_use).clone(), ), ) .add_label( ComplexErrorLabelKind::Information, - first_use.origin().clone(), + *first_use.origin(), Info::FirstUse, ); } @@ -399,7 +390,7 @@ impl ProgramComponent for Rule { negative_variables.extend(current_negative_variables); } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/term.rs b/nemo/src/rule_model/components/term.rs index 1afcfc39d..832e26731 100644 --- a/nemo/src/rule_model/components/term.rs +++ b/nemo/src/rule_model/components/term.rs @@ -280,7 +280,7 @@ impl ProgramComponent for Term { } } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -316,33 +316,6 @@ impl IterableVariables for Term { Term::Operation(term) => term.variables(), Term::Tuple(term) => term.variables(), } - - // let mut iter_primitive = None; - // let mut iter_function = None; - // let mut iter_map = None; - // let mut iter_operation = None; - // let mut iter_tuple = None; - // let mut iter_aggregate = None; - - // match self { - // Term::Primitive(primitive) => iter_primitive = Some(primitive.variables()), - // Term::FunctionTerm(function) => iter_function = Some(function.variables()), - // Term::Map(map) => iter_map = Some(map.variables()), - // Term::Operation(operation) => iter_operation = Some(operation.variables()), - // Term::Tuple(tuple) => iter_tuple = Some(tuple.variables()), - // Term::Aggregate(aggregate) => iter_aggregate = Some(aggregate.variables()), - // } - - // Box::new( - // iter_primitive - // .into_iter() - // .flatten() - // .chain(iter_function.into_iter().flatten()) - // .chain(iter_map.into_iter().flatten()) - // .chain(iter_operation.into_iter().flatten()) - // .chain(iter_tuple.into_iter().flatten()) - // .chain(iter_aggregate.into_iter().flatten()), - // ) } fn variables_mut<'a>(&'a mut self) -> Box + 'a> { @@ -354,33 +327,6 @@ impl IterableVariables for Term { Term::Operation(term) => term.variables_mut(), Term::Tuple(term) => term.variables_mut(), } - - // let mut iter_primitive = None; - // let mut iter_function = None; - // let mut iter_map = None; - // let mut iter_operation = None; - // let mut iter_tuple = None; - // let mut iter_aggregate = None; - - // match self { - // Term::Primitive(primitive) => iter_primitive = Some(primitive.variables_mut()), - // Term::FunctionTerm(function) => iter_function = Some(function.variables_mut()), - // Term::Map(map) => iter_map = Some(map.variables_mut()), - // Term::Operation(operation) => iter_operation = Some(operation.variables_mut()), - // Term::Tuple(tuple) => iter_tuple = Some(tuple.variables_mut()), - // Term::Aggregate(aggregate) => iter_aggregate = Some(aggregate.variables_mut()), - // } - - // Box::new( - // iter_primitive - // .into_iter() - // .flatten() - // .chain(iter_function.into_iter().flatten()) - // .chain(iter_map.into_iter().flatten()) - // .chain(iter_operation.into_iter().flatten()) - // .chain(iter_tuple.into_iter().flatten()) - // .chain(iter_aggregate.into_iter().flatten()), - // ) } } diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 868116898..393ca3d40 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -62,9 +62,9 @@ impl Display for AggregateKind { } } -impl Into for AggregateKind { - fn into(self) -> AggregateOperation { - match self { +impl From for AggregateOperation { + fn from(value: AggregateKind) -> Self { + match value { AggregateKind::CountValues => AggregateOperation::Count, AggregateKind::MinNumber => AggregateOperation::Min, AggregateKind::MaxNumber => AggregateOperation::Max, @@ -228,7 +228,7 @@ impl ProgramComponent for Aggregate { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -236,18 +236,18 @@ impl ProgramComponent for Aggregate { if let Some(expected_type) = self.kind.input_type() { if input_type != expected_type { builder.report_error( - self.aggregate.origin().clone(), + *self.aggregate.origin(), ValidationErrorKind::AggregateInvalidValueType { found: input_type.name().to_string(), expected: expected_type.name().to_string(), }, ); - return Err(()); + return None; } } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index 660cc26ad..60e33859d 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -41,17 +41,19 @@ pub struct FunctionTerm { macro_rules! function { // Base case: no elements ($name:tt) => { - crate::rule_model::components::term::function::FunctionTerm::new( - crate::rule_model::components::tag::Tag::from($name), Vec::new() + $crate::rule_model::components::term::function::FunctionTerm::new( + $crate::rule_model::components::tag::Tag::from($name), Vec::new() ) }; // Recursive case: handle each term, separated by commas ($name:tt; $($tt:tt)*) => {{ - let mut terms = Vec::new(); - term_list!(terms; $($tt)*); - crate::rule_model::components::term::function::FunctionTerm::new( - crate::rule_model::components::tag::Tag::from($name), terms - ) + #[allow(clippy::vec_init_then_push)] { + let mut terms = Vec::new(); + term_list!(terms; $($tt)*); + $crate::rule_model::components::term::function::FunctionTerm::new( + $crate::rule_model::components::tag::Tag::from($name), terms + ) + } }}; } @@ -149,13 +151,13 @@ impl ProgramComponent for FunctionTerm { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { if !self.tag.is_valid() { builder.report_error( - self.tag.origin().clone(), + *self.tag.origin(), ValidationErrorKind::InvalidTermTag(self.tag.to_string()), ); } @@ -165,10 +167,10 @@ impl ProgramComponent for FunctionTerm { } if self.is_empty() { - builder.report_error(self.origin.clone(), ValidationErrorKind::FunctionTermEmpty); + builder.report_error(self.origin, ValidationErrorKind::FunctionTermEmpty); } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/term/map.rs b/nemo/src/rule_model/components/term/map.rs index fdc652116..a63b81969 100644 --- a/nemo/src/rule_model/components/term/map.rs +++ b/nemo/src/rule_model/components/term/map.rs @@ -165,7 +165,7 @@ impl ProgramComponent for Map { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -174,7 +174,7 @@ impl ProgramComponent for Map { value.validate(builder)?; } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index 8da33ac12..2d8069fbc 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -76,15 +76,13 @@ impl Operation { return None; } - let left = self.subterms.get(0).expect("invalid program component"); + let left = self.subterms.first().expect("invalid program component"); let right = self.subterms.get(1).expect("invalid program component"); if let Term::Primitive(Primitive::Variable(variable)) = left { return Some((variable, right)); - } else { - if let Term::Primitive(Primitive::Variable(variable)) = right { - return Some((variable, left)); - } + } else if let Term::Primitive(Primitive::Variable(variable)) = right { + return Some((variable, left)); } None @@ -227,27 +225,27 @@ impl ProgramComponent for Operation { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { if !self.kind.num_arguments().validate(self.subterms.len()) { builder.report_error( - self.origin.clone(), + self.origin, ValidationErrorKind::OperationArgumentNumber { used: self.subterms.len(), expected: self.kind.num_arguments().to_string(), }, ); - return Err(()); + return None; } for argument in self.arguments() { argument.validate(builder)?; } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/term/operation/operation_kind.rs b/nemo/src/rule_model/components/term/operation/operation_kind.rs index 1a23e97de..f2418617a 100644 --- a/nemo/src/rule_model/components/term/operation/operation_kind.rs +++ b/nemo/src/rule_model/components/term/operation/operation_kind.rs @@ -51,10 +51,10 @@ impl Display for OperationNumArguments { for (index, value) in choice.iter().enumerate() { write!(f, "{}", value)?; - if index < choice.len() - 2 { - write!(f, ", ")?; - } else if index == choice.len() - 2 { - write!(f, ", or ")?; + match index.cmp(&(choice.len() - 2)) { + std::cmp::Ordering::Less => write!(f, ", ")?, + std::cmp::Ordering::Equal => write!(f, ", or ")?, + std::cmp::Ordering::Greater => {} } } diff --git a/nemo/src/rule_model/components/term/primitive.rs b/nemo/src/rule_model/components/term/primitive.rs index 01d075f7f..7c386a644 100644 --- a/nemo/src/rule_model/components/term/primitive.rs +++ b/nemo/src/rule_model/components/term/primitive.rs @@ -154,7 +154,7 @@ impl ProgramComponent for Primitive { } } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/ground.rs b/nemo/src/rule_model/components/term/primitive/ground.rs index 5c9032af1..f67c6e2aa 100644 --- a/nemo/src/rule_model/components/term/primitive/ground.rs +++ b/nemo/src/rule_model/components/term/primitive/ground.rs @@ -146,10 +146,8 @@ impl ProgramComponent for GroundTerm { ASTProgramTranslation::build_inner_term )?; - if let Term::Primitive(primitive) = term { - if let Primitive::Ground(ground) = primitive { - return Ok(ground); - } + if let Term::Primitive(Primitive::Ground(ground)) = term { + return Ok(ground); } Err(ComponentParseError::ParseError) @@ -167,11 +165,11 @@ impl ProgramComponent for GroundTerm { self } - fn validate(&self, _builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, _builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs index 4b15d4e15..1db1a43ef 100644 --- a/nemo/src/rule_model/components/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -148,7 +148,7 @@ impl ProgramComponent for Variable { } } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { diff --git a/nemo/src/rule_model/components/term/primitive/variable/existential.rs b/nemo/src/rule_model/components/term/primitive/variable/existential.rs index d95044eeb..4dea8fe99 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/existential.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/existential.rs @@ -101,7 +101,7 @@ impl ProgramComponent for ExistentialVariable { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -112,7 +112,7 @@ impl ProgramComponent for ExistentialVariable { ); } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/term/primitive/variable/universal.rs b/nemo/src/rule_model/components/term/primitive/variable/universal.rs index f22847969..139cdf797 100644 --- a/nemo/src/rule_model/components/term/primitive/variable/universal.rs +++ b/nemo/src/rule_model/components/term/primitive/variable/universal.rs @@ -121,7 +121,7 @@ impl ProgramComponent for UniversalVariable { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -134,7 +134,7 @@ impl ProgramComponent for UniversalVariable { } } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/components/term/tuple.rs b/nemo/src/rule_model/components/term/tuple.rs index 3439f11ae..3ea46b668 100644 --- a/nemo/src/rule_model/components/term/tuple.rs +++ b/nemo/src/rule_model/components/term/tuple.rs @@ -39,13 +39,15 @@ pub struct Tuple { macro_rules! tuple { // Base case: no elements () => { - crate::rule_model::components::term::tuple::Tuple::new(Vec::new()) + $crate::rule_model::components::term::tuple::Tuple::new(Vec::new()) }; // Recursive case: handle each term, separated by commas ($($tt:tt)*) => {{ - let mut terms = Vec::new(); - term_list!(terms; $($tt)*); - crate::rule_model::components::term::tuple::Tuple::new(terms) + #[allow(clippy::vec_init_then_push)] { + let mut terms = Vec::new(); + term_list!(terms; $($tt)*); + $crate::rule_model::components::term::tuple::Tuple::new(terms) + } }}; } @@ -127,7 +129,7 @@ impl ProgramComponent for Tuple { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -135,7 +137,7 @@ impl ProgramComponent for Tuple { term.validate(builder)?; } - Ok(()) + Some(()) } fn kind(&self) -> ProgramComponentKind { diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index ceb93d8db..2aed7cfe2 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -196,7 +196,7 @@ impl ValidationErrorBuilder { let message = kind.to_string(); self.errors.push(ValidationError { - kind: kind, + kind, info: ComplexError::new_error(origin), }); @@ -218,17 +218,17 @@ pub struct TranslationError { /// The type of error that occurred kind: TranslationErrorKind, /// Additional information - info: ComplexError, + info: Box>, } impl TranslationError { /// Create a new [TranslationError] from a given [Span]. - pub fn new<'a>(span: Span<'a>, kind: TranslationErrorKind) -> Self { + pub fn new(span: Span<'_>, kind: TranslationErrorKind) -> Self { let message = kind.to_string(); let mut result = Self { kind, - info: ComplexError::new_error(span.range()), + info: Box::new(ComplexError::new_error(span.range())), }; result diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 769a0ba8c..7b01cfe8b 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -153,7 +153,7 @@ impl Program { &mut self, exports: Iterator, ) { - self.exports.extend(exports.into_iter()) + self.exports.extend(exports) } /// Remove all export statements @@ -201,7 +201,7 @@ impl Program { ) .add_label( ComplexErrorLabelKind::Information, - previous_origin.clone(), + *previous_origin, Info::PredicateArity { arity: *previous_arity, }, @@ -219,12 +219,12 @@ impl Program { pub(crate) fn validate_global_properties( &self, builder: &mut ValidationErrorBuilder, - ) -> Result<(), ()> { + ) -> Option<()> { let mut predicate_arity = HashMap::::new(); for import in self.imports() { let predicate = import.predicate().clone(); - let origin = import.origin().clone(); + let origin = *import.origin(); if let Some(arity) = import.expected_arity() { Self::validate_arity(&mut predicate_arity, predicate, arity, origin, builder); @@ -234,7 +234,7 @@ impl Program { for fact in self.facts() { let predicate = fact.predicate().clone(); let arity = fact.subterms().count(); - let origin = fact.origin().clone(); + let origin = *fact.origin(); Self::validate_arity(&mut predicate_arity, predicate, arity, origin, builder); } @@ -243,7 +243,7 @@ impl Program { for atom in rule.head() { let predicate = atom.predicate().clone(); let arity = atom.arguments().count(); - let origin = atom.origin().clone(); + let origin = *atom.origin(); Self::validate_arity(&mut predicate_arity, predicate, arity, origin, builder); } @@ -253,7 +253,7 @@ impl Program { Literal::Positive(atom) | Literal::Negative(atom) => { let predicate = atom.predicate().clone(); let arity = atom.arguments().count(); - let origin = atom.origin().clone(); + let origin = *atom.origin(); Self::validate_arity( &mut predicate_arity, @@ -272,14 +272,14 @@ impl Program { for export in self.exports() { let predicate = export.predicate().clone(); - let origin = export.origin().clone(); + let origin = *export.origin(); if let Some(arity) = export.expected_arity() { Self::validate_arity(&mut predicate_arity, predicate, arity, origin, builder); } } - Ok(()) + Some(()) } } @@ -320,7 +320,7 @@ impl ProgramComponent for Program { self } - fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> + fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> where Self: Sized, { @@ -418,7 +418,7 @@ impl ProgramBuilder { } /// Validate the current program. - pub fn validate(&self, builder: &mut ValidationErrorBuilder) -> Result<(), ()> { + pub fn validate(&self, builder: &mut ValidationErrorBuilder) -> Option<()> { self.program.validate_global_properties(builder) } } diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index 020cad927..59b14de85 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -224,7 +224,7 @@ impl<'a> ASTProgramTranslation<'a> { } ast::tag::structure::StructureTagKind::Prefixed { prefix, tag } => { if let Some((expanded_prefix, _)) = self.prefix_mapping.get(&prefix.to_string()) { - format!("{expanded_prefix}{}", tag.to_string()) + format!("{expanded_prefix}{}", tag) } else { return Err(TranslationError::new( prefix.span(), diff --git a/nemo/src/rule_model/util.rs b/nemo/src/rule_model/util.rs index f679e7d20..4e3b57cdc 100644 --- a/nemo/src/rule_model/util.rs +++ b/nemo/src/rule_model/util.rs @@ -7,26 +7,26 @@ macro_rules! term_list { () => {}; // Match a single universally quantified variable ($terms:ident; ? $var:ident) => { - $terms.push(crate::rule_model::components::term::Term::universal_variable(stringify!($var))); + $terms.push($crate::rule_model::components::term::Term::universal_variable(stringify!($var))); }; // Match universally quantified variables ($terms:ident; ? $var:ident, $($others:tt)* ) => { - $terms.push(crate::rule_model::components::term::Term::universal_variable(stringify!($var))); term_list!($terms; $($others)*) + $terms.push($crate::rule_model::components::term::Term::universal_variable(stringify!($var))); term_list!($terms; $($others)*) }; // Match a single existentially quantified variable ($terms:ident; ! $var:ident) => { - $terms.push(crate::rule_model::components::term::Term::existential_variable(stringify!($var))); + $terms.push($crate::rule_model::components::term::Term::existential_variable(stringify!($var))); }; // Match existentially quantified variables ($terms:ident; ! $var:ident, $($others:tt)* ) => { - $terms.push(crate::rule_model::components::term::Term::existential_variable(stringify!($var))); term_list!($terms; $($others)*) + $terms.push($crate::rule_model::components::term::Term::existential_variable(stringify!($var))); term_list!($terms; $($others)*) }; // Match a single occurence of anything ($terms:ident; $e:tt) => { - $terms.push(crate::rule_model::components::term::Term::from($e)); + $terms.push($crate::rule_model::components::term::Term::from($e)); }; // Match a list of anything ($terms:ident; $e:tt, $($others:tt)* ) => { - $terms.push(crate::rule_model::components::term::Term::from($e)); term_list!($terms; $($others)*) + $terms.push($crate::rule_model::components::term::Term::from($e)); term_list!($terms; $($others)*) }; } diff --git a/nemo/src/syntax.rs b/nemo/src/syntax.rs index b3465fcbf..1e89a6f1e 100644 --- a/nemo/src/syntax.rs +++ b/nemo/src/syntax.rs @@ -1,5 +1,6 @@ //! The Syntax constants for the nemo language. -//! Every utilisation of syntax (e.g. parsing or formatting values to +//! +//! Every utilization of syntax (e.g. parsing or formatting values to //! string representation) has to reference the constants defined //! in this module and must not use strings directly. From 94b29fed18feedd227080555608b0b0c44eb9a7a Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 17 Sep 2024 11:38:27 +0200 Subject: [PATCH 170/214] Fix documentation errors --- nemo-language-server/src/language_server.rs | 4 ++-- nemo/src/io/export_manager.rs | 9 ++------- nemo/src/io/formats/dsv/value_format.rs | 2 +- nemo/src/io/formats/rdf/error.rs | 2 +- nemo/src/parser.rs | 2 +- nemo/src/parser/ast/expression/complex/aggregation.rs | 2 -- nemo/src/parser/ast/sequence/declare.rs | 2 +- nemo/src/parser/ast/sequence/key_value.rs | 2 +- nemo/src/parser/input.rs | 2 +- nemo/src/rule_model.rs | 3 +-- nemo/src/rule_model/components/term/primitive.rs | 2 +- nemo/src/rule_model/translation.rs | 2 +- 12 files changed, 13 insertions(+), 21 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 1db2b558b..d3ffe77b2 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -456,7 +456,7 @@ struct IdentifiedNode<'a> { scoping_node: &'a dyn ProgramAST<'a>, } -struct PariallyIdentifiedNode<'a> { +struct PartiallyIdentifiedNode<'a> { node: &'a dyn ProgramAST<'a>, identifier: (ParserContext, String), identifier_scope: ParserContext, @@ -472,7 +472,7 @@ fn node_path_deepest_identifier<'a>( match info { None => { if let Some(lsp_ident) = node.identifier() { - info = Some(PariallyIdentifiedNode { + info = Some(PartiallyIdentifiedNode { node: *node, identifier: lsp_ident.identifier().clone(), identifier_scope: *lsp_ident.scope(), diff --git a/nemo/src/io/export_manager.rs b/nemo/src/io/export_manager.rs index ebde5c603..cb0ddfd38 100644 --- a/nemo/src/io/export_manager.rs +++ b/nemo/src/io/export_manager.rs @@ -179,13 +179,8 @@ impl ExportManager { Ok(export_handler.resource_is_stdout()) } - /// Export a (possibly empty) table according to the given [ExportDirective], - /// but direct output into the given writer instead of using whatever - /// resource the directive specifies. - /// - /// The `predicate_arity` is the arity of the predicate that is to be exported. This information - /// is used for validation and as a hint to exporters that were not initialized with details - /// about the arity. + /// Export a (possibly empty) table according to the given [ImportExportHandler], + /// and directly output into the given writer. /// /// This function ignores [ExportManager::disable_write]. pub fn export_table_with_writer<'a>( diff --git a/nemo/src/io/formats/dsv/value_format.rs b/nemo/src/io/formats/dsv/value_format.rs index 022c66871..208f3947f 100644 --- a/nemo/src/io/formats/dsv/value_format.rs +++ b/nemo/src/io/formats/dsv/value_format.rs @@ -60,7 +60,7 @@ impl DsvValueFormats { Self(formats) } - /// Return a list of [DsvValueFormat]s with default entries. + /// Return a list of value formats with default entries. pub fn default(arity: usize) -> Self { Self((0..arity).map(|_| DsvValueFormat::Anything).collect()) } diff --git a/nemo/src/io/formats/rdf/error.rs b/nemo/src/io/formats/rdf/error.rs index 775e4ddb2..6d9e9dac4 100644 --- a/nemo/src/io/formats/rdf/error.rs +++ b/nemo/src/io/formats/rdf/error.rs @@ -5,7 +5,7 @@ use thiserror::Error; use nemo_physical::{datavalues::DataValueCreationError, resource::Resource}; /// Errors that can occur when reading/writing RDF resources and converting them -/// to/from [AnyDataValue]s. +/// to/from Nemo's data value representation. #[allow(variant_size_differences)] #[derive(Error, Debug)] pub enum RdfFormatError { diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index bad1781e2..922409d94 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -86,7 +86,7 @@ impl<'a> ParserErrorReport<'a> { }) } - /// Return raw [`ParserError`s] + /// Return raw [ParserError]s. pub fn errors(&self) -> &Vec { &self.errors } diff --git a/nemo/src/parser/ast/expression/complex/aggregation.rs b/nemo/src/parser/ast/expression/complex/aggregation.rs index 5a9eaa1c9..eab76399f 100644 --- a/nemo/src/parser/ast/expression/complex/aggregation.rs +++ b/nemo/src/parser/ast/expression/complex/aggregation.rs @@ -21,8 +21,6 @@ use crate::{ }; /// A known Aggregation applied to a series of [Expression]s. -/// -/// This has the same structure as an [Atom]. #[derive(Debug)] pub struct Aggregation<'a> { /// [Span] associated with this node diff --git a/nemo/src/parser/ast/sequence/declare.rs b/nemo/src/parser/ast/sequence/declare.rs index 04961ec1c..6a3a6445f 100644 --- a/nemo/src/parser/ast/sequence/declare.rs +++ b/nemo/src/parser/ast/sequence/declare.rs @@ -1,4 +1,4 @@ -//! This module defines [DeclareSequence]. +//! This module defines [NameTypePair]. use nom::sequence::{separated_pair, tuple}; diff --git a/nemo/src/parser/ast/sequence/key_value.rs b/nemo/src/parser/ast/sequence/key_value.rs index 827839e2c..f00c13870 100644 --- a/nemo/src/parser/ast/sequence/key_value.rs +++ b/nemo/src/parser/ast/sequence/key_value.rs @@ -1,4 +1,4 @@ -//! This module defines [KeyValueSequence]. +//! This module defines [KeyValuePair]. use nom::sequence::{separated_pair, tuple}; diff --git a/nemo/src/parser/input.rs b/nemo/src/parser/input.rs index 0dc893e51..dc00d6cdb 100644 --- a/nemo/src/parser/input.rs +++ b/nemo/src/parser/input.rs @@ -1,4 +1,4 @@ -//! This module defines [ParseInput]. +//! This module defines [ParserInput]. use std::str::{CharIndices, Chars}; diff --git a/nemo/src/rule_model.rs b/nemo/src/rule_model.rs index 18b6964d8..bd6e37928 100644 --- a/nemo/src/rule_model.rs +++ b/nemo/src/rule_model.rs @@ -3,10 +3,9 @@ #[macro_use] pub mod util; -pub(crate) mod origin; - pub mod components; pub mod error; +pub mod origin; pub mod program; pub mod term_map; pub mod translation; diff --git a/nemo/src/rule_model/components/term/primitive.rs b/nemo/src/rule_model/components/term/primitive.rs index 7c386a644..4620c39bc 100644 --- a/nemo/src/rule_model/components/term/primitive.rs +++ b/nemo/src/rule_model/components/term/primitive.rs @@ -1,4 +1,4 @@ -//! This module defines [PrimitiveTerm]. +//! This module defines [Primitive]. pub mod ground; pub mod variable; diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index 59b14de85..c62094b3b 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -138,7 +138,7 @@ impl<'a> ProgramErrorReport<'a> { .collect() } - /// Return raw [`ProgramError`s] + /// Return raw [ProgramError]s. pub fn errors(&self) -> &Vec { &self.errors } From 6ef36658f2e8d99643e69a85aa3d216d59d2caa3 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 17 Sep 2024 17:47:43 +0200 Subject: [PATCH 171/214] Fix unit tests --- nemo/src/parser/ast.rs | 1 + nemo/src/parser/ast/comment/doc.rs | 6 +- nemo/src/parser/ast/comment/line.rs | 8 +- nemo/src/parser/ast/comment/toplevel.rs | 4 +- nemo/src/parser/ast/comment/wsoc.rs | 2 +- nemo/src/parser/ast/directive/export.rs | 2 +- nemo/src/parser/ast/directive/import.rs | 2 +- nemo/src/parser/ast/directive/unknown.rs | 26 +++--- nemo/src/parser/ast/expression.rs | 15 +-- .../parser/ast/expression/complex/infix.rs | 24 +---- nemo/src/parser/ast/expression/complex/map.rs | 12 +-- nemo/src/parser/ast/guard.rs | 91 +++++++++++++++++++ nemo/src/parser/ast/program.rs | 28 +++--- nemo/src/parser/ast/rule.rs | 16 ++-- nemo/src/parser/ast/sequence/declare.rs | 32 ++++--- nemo/src/parser/ast/sequence/key_value.rs | 12 +-- nemo/src/parser/ast/statement.rs | 24 +++-- nemo/src/parser/ast/tag/structure.rs | 4 +- nemo/src/parser/ast/token.rs | 11 +-- nemo/src/parser/context.rs | 6 ++ nemo/src/rule_model/components/atom.rs | 3 +- nemo/src/rule_model/components/fact.rs | 3 +- nemo/src/rule_model/components/literal.rs | 3 +- nemo/src/rule_model/translation/rule.rs | 47 +++++----- 24 files changed, 231 insertions(+), 151 deletions(-) create mode 100644 nemo/src/parser/ast/guard.rs diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index c6186d499..733d64053 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -4,6 +4,7 @@ pub mod attribute; pub mod comment; pub mod directive; pub mod expression; +pub mod guard; pub mod program; pub mod rule; pub mod sequence; diff --git a/nemo/src/parser/ast/comment/doc.rs b/nemo/src/parser/ast/comment/doc.rs index 322f8b8ef..2cfa63b0d 100644 --- a/nemo/src/parser/ast/comment/doc.rs +++ b/nemo/src/parser/ast/comment/doc.rs @@ -96,9 +96,9 @@ mod test { #[test] fn parse_doc_comment() { let test = vec![ - ("/// my comment", 1), - ("///my comment\r\n/// my other comment", 2), - ("///my comment\r\n /// my other comment", 2), + ("%%% my comment", 1), + ("%%%my comment\r\n%%% my other comment", 2), + ("%%%my comment\r\n %%% my other comment", 2), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/comment/line.rs b/nemo/src/parser/ast/comment/line.rs index 82fb2e0f8..37e2a27c5 100644 --- a/nemo/src/parser/ast/comment/line.rs +++ b/nemo/src/parser/ast/comment/line.rs @@ -84,10 +84,10 @@ mod test { #[test] fn parse_line_comment() { let test = vec![ - ("// my comment", " my comment".to_string()), - ("//my comment", "my comment".to_string()), - ("// \tmy comment\n", " \tmy comment".to_string()), - ("//// my comment", " my comment".to_string()), + ("% my comment", " my comment".to_string()), + ("%my comment", "my comment".to_string()), + ("% \tmy comment\n", " \tmy comment".to_string()), + ("%%%% my comment", " my comment".to_string()), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/comment/toplevel.rs b/nemo/src/parser/ast/comment/toplevel.rs index 94f5b12c5..2cf163aed 100644 --- a/nemo/src/parser/ast/comment/toplevel.rs +++ b/nemo/src/parser/ast/comment/toplevel.rs @@ -97,8 +97,8 @@ mod test { #[test] fn parse_toplevel_comment() { let test = vec![ - ("//! my comment", 1), - ("//!my comment\r\n//! my other comment", 2), + ("%! my comment", 1), + ("%!my comment\r\n%! my other comment", 2), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/comment/wsoc.rs b/nemo/src/parser/ast/comment/wsoc.rs index d48f61220..53d5a2e40 100644 --- a/nemo/src/parser/ast/comment/wsoc.rs +++ b/nemo/src/parser/ast/comment/wsoc.rs @@ -78,7 +78,7 @@ mod test { let test = vec![ ("", 0), (" \n ", 0), - (" // my comment \n // Another comment \n ", 2), + (" % my comment \n % Another comment \n ", 2), (" /* a comment */", 1), ]; diff --git a/nemo/src/parser/ast/directive/export.rs b/nemo/src/parser/ast/directive/export.rs index ac3dece84..2a28bf811 100644 --- a/nemo/src/parser/ast/directive/export.rs +++ b/nemo/src/parser/ast/directive/export.rs @@ -106,7 +106,7 @@ mod test { #[test] fn parse_export() { let test = vec![( - "@export predicate :- csv { resource: \"test.csv\" }", + "@export predicate :- csv { resource = \"test.csv\" }", ("predicate".to_string(), "csv".to_string()), )]; diff --git a/nemo/src/parser/ast/directive/import.rs b/nemo/src/parser/ast/directive/import.rs index 7f74472d1..e180ea0cd 100644 --- a/nemo/src/parser/ast/directive/import.rs +++ b/nemo/src/parser/ast/directive/import.rs @@ -106,7 +106,7 @@ mod test { #[test] fn parse_import() { let test = vec![( - "@import predicate :- csv { resource: \"test.csv\" }", + "@import predicate :- csv { resource = \"test.csv\" }", ("predicate".to_string(), "csv".to_string()), )]; diff --git a/nemo/src/parser/ast/directive/unknown.rs b/nemo/src/parser/ast/directive/unknown.rs index 43c10d9c3..0940fc82d 100644 --- a/nemo/src/parser/ast/directive/unknown.rs +++ b/nemo/src/parser/ast/directive/unknown.rs @@ -1,15 +1,16 @@ //! This module defines the [UnknownDirective] directive. use nom::{ + branch::alt, bytes::complete::is_not, - combinator::recognize, - sequence::{preceded, separated_pair}, + combinator::{map, recognize}, + sequence::{preceded, separated_pair, terminated}, }; use nom_supreme::error::{BaseErrorKind, Expectation}; use strum::IntoEnumIterator; use crate::parser::{ - ast::{comment::wsoc::WSoC, token::Token, ProgramAST}, + ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, context::{context, ParserContext}, error::ParserErrorTree, input::ParserInput, @@ -97,14 +98,13 @@ impl<'a> ProgramAST<'a> for UnknownDirective<'a> { separated_pair( preceded(Token::directive_indicator, Self::parse_unknown), WSoC::parse, - // FIXME: Rework error recovery, because this recognises an `.` in an IRI, - // e.g. in `@baseerror - // ^ - // That means that content == "" which - // will also produce an error. - // NOTE: Maybe we could try to parse the "body" of the other directives and if - // one succeeds give a hint what directive could be the correct. - recognize(is_not(".")), + alt(( + map( + terminated(Expression::parse, recognize(is_not("."))), + |expression| expression.span(), + ), + map(recognize(is_not(".")), |x: ParserInput<'_>| x.span), + )), ), )(input) .map(|(rest, (name, content))| { @@ -115,7 +115,7 @@ impl<'a> ProgramAST<'a> for UnknownDirective<'a> { Self { span: input_span.until_rest(&rest_span), name, - content: content.span, + content, }, ) }) @@ -174,7 +174,7 @@ mod test { fn error_recovery() { let test = [( "@test .", - ("test", " "), + ("test", ""), )]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index 252977007..f18b37ca7 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -8,8 +8,8 @@ use basic::{ string::StringLiteral, variable::Variable, }; use complex::{ - aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, infix::InfixExpression, map::Map, - negation::Negation, operation::Operation, tuple::Tuple, + aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, map::Map, negation::Negation, + operation::Operation, tuple::Tuple, }; use nom::{branch::alt, combinator::map}; @@ -22,7 +22,7 @@ use crate::parser::{ use super::ProgramAST; -/// An expression that is the building block of rules. +/// An expression of potentially complex terms #[derive(Debug)] pub enum Expression<'a> { /// Aggregation @@ -37,8 +37,6 @@ pub enum Expression<'a> { Boolean(Boolean<'a>), /// Constant Constant(Constant<'a>), - /// Infix - Infix(InfixExpression<'a>), /// Map Map(Map<'a>), /// Negation @@ -67,7 +65,6 @@ impl<'a> Expression<'a> { Expression::Blank(expression) => expression.context(), Expression::Boolean(expression) => expression.context(), Expression::Constant(expression) => expression.context(), - Expression::Infix(expression) => expression.context(), Expression::Map(expression) => expression.context(), Expression::Number(expression) => expression.context(), Expression::Negation(expression) => expression.context(), @@ -116,7 +113,6 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Expression::Blank(expression) => expression, Expression::Boolean(expression) => expression, Expression::Constant(expression) => expression, - Expression::Infix(expression) => expression, Expression::Map(expression) => expression, Expression::Number(expression) => expression, Expression::Negation(expression) => expression, @@ -136,7 +132,6 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Expression::Blank(expression) => expression.span(), Expression::Boolean(expression) => expression.span(), Expression::Constant(expression) => expression.span(), - Expression::Infix(expression) => expression.span(), Expression::Map(expression) => expression.span(), Expression::Number(expression) => expression.span(), Expression::Negation(expression) => expression.span(), @@ -157,7 +152,6 @@ impl<'a> ProgramAST<'a> for Expression<'a> { alt(( map(Arithmetic::parse, Self::Arithmetic), Self::parse_complex, - map(InfixExpression::parse, Self::Infix), Self::parse_basic, )), )(input) @@ -188,8 +182,7 @@ mod test { ("_:12", ParserContext::Blank), ("true", ParserContext::Boolean), ("constant", ParserContext::Constant), - ("(1 + 2) = 2 + 1", ParserContext::Infix), - ("{a:1,b:POW(1, 2)}", ParserContext::Map), + ("{a=1,b=POW(1, 2)}", ParserContext::Map), ("12", ParserContext::Number), ("~test(1)", ParserContext::Negation), ("substr(\"string\", 1+?x)", ParserContext::Operation), diff --git a/nemo/src/parser/ast/expression/complex/infix.rs b/nemo/src/parser/ast/expression/complex/infix.rs index 69143b533..15f3b90ce 100644 --- a/nemo/src/parser/ast/expression/complex/infix.rs +++ b/nemo/src/parser/ast/expression/complex/infix.rs @@ -4,7 +4,6 @@ use enum_assoc::Assoc; use nom::{ branch::alt, - combinator::map, sequence::{delimited, tuple}, }; @@ -21,11 +20,6 @@ use crate::parser::{ ParserResult, }; -use super::{ - aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, map::Map, negation::Negation, - operation::Operation, tuple::Tuple, -}; - /// Types of infix expression connectives #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] #[func(pub fn token(token: TokenKind) -> Option)] @@ -93,20 +87,6 @@ impl<'a> InfixExpression<'a> { ) }) } - - /// Parse non-infix [Expression]s - pub fn parse_non_infix(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { - alt(( - map(Operation::parse, Expression::Operation), - map(Arithmetic::parse, Expression::Arithmetic), - map(Aggregation::parse, Expression::Aggregation), - map(Atom::parse, Expression::Atom), - map(Tuple::parse, Expression::Tuple), - map(Map::parse, Expression::Map), - map(Negation::parse, Expression::Negation), - Expression::parse_basic, - ))(input) - } } const CONTEXT: ParserContext = ParserContext::Infix; @@ -129,9 +109,9 @@ impl<'a> ProgramAST<'a> for InfixExpression<'a> { context( CONTEXT, tuple(( - Self::parse_non_infix, + Expression::parse, delimited(WSoC::parse, Self::parse_infix_kind, WSoC::parse), - Self::parse_non_infix, + Expression::parse, )), )(input) .map(|(rest, (left, kind, right))| { diff --git a/nemo/src/parser/ast/expression/complex/map.rs b/nemo/src/parser/ast/expression/complex/map.rs index a59c5dcc0..76880021e 100644 --- a/nemo/src/parser/ast/expression/complex/map.rs +++ b/nemo/src/parser/ast/expression/complex/map.rs @@ -114,18 +114,18 @@ mod test { #[test] fn parse_map() { let test = vec![ - ("{?x: 7}", (None, 1)), - ("abc { ?x: 7 }", (Some("abc".to_string()), 1)), + ("{?x=7}", (None, 1)), + ("abc { ?x = 7 }", (Some("abc".to_string()), 1)), ( - "abc { ?x: 7, ?y: 12, ?z: 13 }", + "abc { ?x = 7, ?y = 12, ?z = 13 }", (Some("abc".to_string()), 3), ), ( - "abc { ?x : 7 , ?y : 13 , ?z : 15 }", + "abc { ?x = 7 , ?y = 13 , ?z = 15 }", (Some("abc".to_string()), 3), ), - ("{a:1, b: POW(1, 2)}", (None, 2)), - ("{a:b, c:d,}", (None, 2)), + ("{a=1, b= POW(1, 2)}", (None, 2)), + ("{a=b, c=d,}", (None, 2)), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/guard.rs b/nemo/src/parser/ast/guard.rs new file mode 100644 index 000000000..675d94370 --- /dev/null +++ b/nemo/src/parser/ast/guard.rs @@ -0,0 +1,91 @@ +//! This module defines [Guard]. + +use nom::{branch::alt, combinator::map}; + +use crate::parser::context::ParserContext; + +use super::{ + expression::{complex::infix::InfixExpression, Expression}, + ProgramAST, +}; + +/// An expression that is the building block of rules. +#[derive(Debug)] +pub enum Guard<'a> { + /// A normal expression + Expression(Expression<'a>), + /// Infix + Infix(InfixExpression<'a>), +} + +impl<'a> Guard<'a> { + /// Return the [ParserContext] of the underlying expression type. + pub fn context_type(&self) -> ParserContext { + match self { + Guard::Expression(expression) => expression.context_type(), + Guard::Infix(infix) => infix.context(), + } + } +} + +const CONTEXT: ParserContext = ParserContext::Guard; + +impl<'a> ProgramAST<'a> for Guard<'a> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST<'a>> { + match self { + Guard::Expression(expression) => expression.children(), + Guard::Infix(infix) => infix.children(), + } + } + + fn span(&self) -> crate::parser::span::Span<'a> { + match self { + Guard::Expression(expression) => expression.span(), + Guard::Infix(infix) => infix.span(), + } + } + + fn parse(input: crate::parser::input::ParserInput<'a>) -> crate::parser::ParserResult<'a, Self> + where + Self: Sized + 'a, + { + alt(( + map(InfixExpression::parse, Self::Infix), + map(Expression::parse, Self::Expression), + ))(input) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{guard::Guard, ProgramAST}, + context::ParserContext, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_guard() { + let test = vec![ + ("test(?x, (1,), (1 + 2))", ParserContext::Atom), + ("2 + 3 = 5", ParserContext::Infix), + ]; + + for (input, expect) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(Guard::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap(); + assert_eq!(result.1.context_type(), expect); + } + } +} diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 54a35cec4..81381874b 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -133,19 +133,19 @@ mod test { #[test] fn parse_program() { - let program = "//! Top-level comment\n\ - // Declarations:\n\ + let program = "%! Top-level comment\n\ + % Declarations:\n\ @declare a(_: int, _: int) .\n\ @declare b(_: int, _: int) .\n\ - /// A fact\n\ + %%% A fact\n\ a(1, 2) .\n\ \n\ - // Rules:\n\ + % Rules:\n\ \n\ - /// A rule\n\ + %%% A rule\n\ b(?y, ?x) :- a(?x, ?y) .\n\ \n\ - // Some more comments + % Some more comments "; let parser_input = ParserInput::new(program, ParserState::default()); @@ -160,20 +160,20 @@ mod test { #[test] fn parser_recover() { - let program = "//! Top-level comment\n\ - // Declarations:\n\ + let program = "%! Top-level comment\n\ + % Declarations:\n\ @declare oops a(_: int, _: int) .\n\ @declare b(_: int, _: int) .\n\ - /// A fact\n\ + %%% A fact\n\ a(1, 2) \n\ \n\ - // Rules:\n\ + % Rules:\n\ \n\ - /// A rule\n\ + %%% A rule\n\ b(?y, ?x) <- a(?x, ?y) .\n\ \n\ c(?y, ?x) :- a(?x, ?y) .\n\ - // Some more comments + % Some more comments "; let parser_input = ParserInput::new(program, ParserState::default()); @@ -181,10 +181,8 @@ mod test { .expect("This should not fail") .1; - println!("{:?}", result.statements); - assert!(result.comment.is_some()); - assert_eq!(result.statements.len(), 2); + assert_eq!(result.statements.len(), 4); assert_eq!(parser_input.state.errors.borrow().len(), 2); } } diff --git a/nemo/src/parser/ast/rule.rs b/nemo/src/parser/ast/rule.rs index b1cac7251..88c25f791 100644 --- a/nemo/src/parser/ast/rule.rs +++ b/nemo/src/parser/ast/rule.rs @@ -13,8 +13,8 @@ use crate::parser::{ }; use super::{ - attribute::Attribute, comment::wsoc::WSoC, expression::Expression, sequence::Sequence, - token::Token, ProgramAST, + attribute::Attribute, comment::wsoc::WSoC, guard::Guard, sequence::Sequence, token::Token, + ProgramAST, }; /// A rule describing a logical implication @@ -27,19 +27,19 @@ pub struct Rule<'a> { attributes: Vec>, /// Head of the rule - head: Sequence<'a, Expression<'a>>, + head: Sequence<'a, Guard<'a>>, /// Body of the rule, - body: Sequence<'a, Expression<'a>>, + body: Sequence<'a, Guard<'a>>, } impl<'a> Rule<'a> { /// Return an iterator of the [Expression]s contained in the head. - pub fn head(&self) -> impl Iterator> { + pub fn head(&self) -> impl Iterator> { self.head.iter() } /// Return an iterator of the [Expression]s contained in the body. - pub fn body(&self) -> impl Iterator> { + pub fn body(&self) -> impl Iterator> { self.body.iter() } @@ -77,9 +77,9 @@ impl<'a> ProgramAST<'a> for Rule<'a> { tuple(( many0(Attribute::parse), (separated_pair( - Sequence::::parse, + Sequence::::parse, tuple((WSoC::parse, Token::rule_arrow, WSoC::parse)), - Sequence::::parse, + Sequence::::parse, )), )), )(input) diff --git a/nemo/src/parser/ast/sequence/declare.rs b/nemo/src/parser/ast/sequence/declare.rs index 6a3a6445f..6012e4c41 100644 --- a/nemo/src/parser/ast/sequence/declare.rs +++ b/nemo/src/parser/ast/sequence/declare.rs @@ -1,6 +1,9 @@ //! This module defines [NameTypePair]. -use nom::sequence::{separated_pair, tuple}; +use nom::{ + combinator::opt, + sequence::{pair, preceded, tuple}, +}; use crate::parser::{ ast::{ @@ -22,12 +25,16 @@ const CONTEXT: ParserContext = ParserContext::DeclareNameTypePair; pub struct NameTypePair<'a> { _span: Span<'a>, name: ParameterName<'a>, - datatype: DataTypeTag<'a>, + datatype: Option>, } impl<'a> ProgramAST<'a> for NameTypePair<'a> { fn children(&'a self) -> Vec<&'a dyn ProgramAST<'a>> { - vec![&self.name, &self.datatype] + if let Some(datatype) = &self.datatype { + vec![&self.name, datatype] + } else { + vec![&self.name] + } } fn span(&self) -> Span<'a> { @@ -39,10 +46,12 @@ impl<'a> ProgramAST<'a> for NameTypePair<'a> { Self: Sized + 'a, { let input_span = input.span; - separated_pair( + pair( ParameterName::parse, - tuple((WSoC::parse, Token::name_datatype_separator, WSoC::parse)), - DataTypeTag::parse, + opt(preceded( + tuple((WSoC::parse, Token::name_datatype_separator, WSoC::parse)), + DataTypeTag::parse, + )), )(input) .map(|(rest, (name, datatype))| { let rest_span = rest.span; @@ -82,11 +91,12 @@ mod test { #[test] fn parse_expression_sequence_simple() { let test = vec![( - "_, test: string, _: int, name: any", + "_, test: string, _: int, name:any", vec![ - (Parameter::Named("test".to_string()), DataType::String), - (Parameter::Unnamed, DataType::Integer), - (Parameter::Named("name".to_string()), DataType::Any), + (Parameter::Unnamed, None), + (Parameter::Named("test".to_string()), Some(DataType::String)), + (Parameter::Unnamed, Some(DataType::Integer)), + (Parameter::Named("name".to_string()), Some(DataType::Any)), ], )]; @@ -104,7 +114,7 @@ mod test { .into_iter() .map(|NameTypePair { name, datatype, .. }| ( name.parameter().clone(), - datatype.data_type() + datatype.map(|data| data.data_type()) )) .collect::>() ); diff --git a/nemo/src/parser/ast/sequence/key_value.rs b/nemo/src/parser/ast/sequence/key_value.rs index f00c13870..63b30f24c 100644 --- a/nemo/src/parser/ast/sequence/key_value.rs +++ b/nemo/src/parser/ast/sequence/key_value.rs @@ -82,12 +82,12 @@ mod test { fn parse_expression_sequence_simple() { let test = vec![ ("", 0), - ("?x:3", 1), - ("?x: 7, ?y: ?z, ?z: 1", 3), - ("x:3, ?x:12, ?x : 7", 3), - ("x:3, ?x : 2, 2 : 5", 3), - ("x:3 , ?x : 12, 2: 1", 3), - ("x:POW(1,2)", 1), + ("?x=3", 1), + ("?x= 7, ?y= ?z, ?z= 1", 3), + ("x=3, ?x=12, ?x = 7", 3), + ("x=3, ?x = 2, 2 = 5", 3), + ("x=3 , ?x = 12, 2= 1", 3), + ("x=POW(1,2)", 1), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs index 5e604709e..4ada6845f 100644 --- a/nemo/src/parser/ast/statement.rs +++ b/nemo/src/parser/ast/statement.rs @@ -3,7 +3,7 @@ use nom::{ branch::alt, combinator::{map, opt}, - sequence::{pair, terminated}, + sequence::{delimited, pair}, }; use crate::parser::{ @@ -16,7 +16,7 @@ use crate::parser::{ use super::{ comment::{doc::DocComment, wsoc::WSoC}, directive::Directive, - expression::Expression, + guard::Guard, rule::Rule, token::Token, ProgramAST, @@ -26,7 +26,7 @@ use super::{ #[derive(Debug)] pub enum StatementKind<'a> { /// Fact - Fact(Expression<'a>), + Fact(Guard<'a>), /// Rule Rule(Rule<'a>), /// Directive @@ -42,7 +42,7 @@ impl<'a> StatementKind<'a> { StatementKind::Fact(statement) => statement.context(), StatementKind::Rule(statement) => statement.context(), StatementKind::Directive(statement) => statement.context(), - StatementKind::Error(_statement) => todo!(), + StatementKind::Error(_statement) => ParserContext::Error, } } @@ -51,7 +51,7 @@ impl<'a> StatementKind<'a> { alt(( map(Directive::parse, Self::Directive), map(Rule::parse, Self::Rule), - map(Expression::parse, Self::Fact), + map(Guard::parse, Self::Fact), ))(input) } } @@ -107,7 +107,11 @@ impl<'a> ProgramAST<'a> for Statement<'a> { CONTEXT, pair( opt(DocComment::parse), - terminated(StatementKind::parse, pair(WSoC::parse, Token::dot)), + delimited( + WSoC::parse, + StatementKind::parse, + pair(WSoC::parse, Token::dot), + ), ), )(input) .map(|(rest, (comment, statement))| { @@ -144,12 +148,12 @@ mod test { fn parse_statement() { let test = vec![ ( - "/// A fact\n/// with a multiline doc comment. \n a(1, 2) .", - ParserContext::Expression, + "%%% A fact\n%%% with a multiline doc comment. \n a(1, 2) .", + ParserContext::Guard, ), - ("/// A rule \n a(1, 2) :- b(2, 1) .", ParserContext::Rule), + ("%%% A rule \n a(1, 2) :- b(2, 1) .", ParserContext::Rule), ( - "/// A directive \n \t@declare a(_: int, _: int) .", + "%%% A directive \n \t@declare a(_: int, _: int) .", ParserContext::Directive, ), ]; diff --git a/nemo/src/parser/ast/tag/structure.rs b/nemo/src/parser/ast/tag/structure.rs index 2239bd92e..08e58b70e 100644 --- a/nemo/src/parser/ast/tag/structure.rs +++ b/nemo/src/parser/ast/tag/structure.rs @@ -50,7 +50,7 @@ impl<'a> Display for StructureTag<'a> { match &self.kind { StructureTagKind::Plain(token) => token.fmt(f), StructureTagKind::Prefixed { prefix, tag } => { - f.write_fmt(format_args!("{}::{}", prefix, tag)) + f.write_fmt(format_args!("{}:{}", prefix, tag)) } StructureTagKind::Iri(iri) => iri.content().fmt(f), } @@ -120,7 +120,7 @@ mod test { fn parse_tag() { let test = vec![ ("abc", "abc".to_string()), - ("abc::def", "abc::def".to_string()), + ("abc:def", "abc:def".to_string()), ("", "http://example.com".to_string()), ]; diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 6a52f69a7..5ac749a81 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -628,12 +628,11 @@ mod test { #[test] fn comment() { let test = [ - ("/", Err("/")), - ("//", Ok("//")), - ("///", Err("///")), - ("////", Ok("////")), - ("/////", Ok("/////")), - ("///////////////", Ok("///////////////")), + ("%", Ok("%")), + ("%%%", Err("%%%")), + ("%%%%", Ok("%%%%")), + ("%%%%%", Ok("%%%%%")), + ("%%%%%%%%%%%%%", Ok("%%%%%%%%%%%%%")), ]; for (input, expected) in test { diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index e6ee4cd9e..8573ef989 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -70,6 +70,9 @@ pub enum ParserContext { /// Expression #[assoc(name = "expression")] Expression, + /// Guard + #[assoc(name = "expression")] // Guard seems like a technical name + Guard, /// Parenthesised expression #[assoc(name = "parenthesised expression")] ParenthesisedExpression, @@ -133,6 +136,9 @@ pub enum ParserContext { /// Program #[assoc(name = "program")] Program, + /// Error + #[assoc(name = "error")] + Error, } impl ParserContext { diff --git a/nemo/src/rule_model/components/atom.rs b/nemo/src/rule_model/components/atom.rs index e0dd2108f..b59008a1f 100644 --- a/nemo/src/rule_model/components/atom.rs +++ b/nemo/src/rule_model/components/atom.rs @@ -4,6 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::{ parse_component, + parser::ast::ProgramAST, rule_model::{ error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, @@ -130,7 +131,7 @@ impl ProgramComponent for Atom { { let literal = parse_component!( string, - crate::parser::ast::expression::Expression::parse_complex, + crate::parser::ast::guard::Guard::parse, ASTProgramTranslation::build_body_literal )?; diff --git a/nemo/src/rule_model/components/fact.rs b/nemo/src/rule_model/components/fact.rs index 6c37e9f17..96de6ae1a 100644 --- a/nemo/src/rule_model/components/fact.rs +++ b/nemo/src/rule_model/components/fact.rs @@ -8,6 +8,7 @@ use crate::{ ChaseComponent, }, parse_component, + parser::ast::ProgramAST, rule_model::{ error::{validation_error::ValidationErrorKind, ValidationErrorBuilder}, origin::Origin, @@ -107,7 +108,7 @@ impl ProgramComponent for Fact { { parse_component!( string, - crate::parser::ast::expression::Expression::parse_complex, + crate::parser::ast::guard::Guard::parse, ASTProgramTranslation::build_head_atom ) .map(Fact::from) diff --git a/nemo/src/rule_model/components/literal.rs b/nemo/src/rule_model/components/literal.rs index 0ae734d4f..eeada608d 100644 --- a/nemo/src/rule_model/components/literal.rs +++ b/nemo/src/rule_model/components/literal.rs @@ -4,6 +4,7 @@ use std::{fmt::Display, hash::Hash}; use crate::{ parse_component, + parser::ast::ProgramAST, rule_model::{error::ValidationErrorBuilder, translation::ASTProgramTranslation}, }; @@ -61,7 +62,7 @@ impl ProgramComponent for Literal { { parse_component!( string, - crate::parser::ast::expression::Expression::parse_complex, + crate::parser::ast::guard::Guard::parse, ASTProgramTranslation::build_body_literal ) } diff --git a/nemo/src/rule_model/translation/rule.rs b/nemo/src/rule_model/translation/rule.rs index 768d5080f..ea894e4de 100644 --- a/nemo/src/rule_model/translation/rule.rs +++ b/nemo/src/rule_model/translation/rule.rs @@ -42,10 +42,10 @@ impl<'a> ASTProgramTranslation<'a> { /// Create a body [Literal] from the corresponding ast node. pub(crate) fn build_body_literal( &mut self, - body: &'a ast::expression::Expression<'a>, + body: &'a ast::guard::Guard<'a>, ) -> Result { let result = match body { - ast::expression::Expression::Atom(atom) => { + ast::guard::Guard::Expression(ast::expression::Expression::Atom(atom)) => { let predicate = Tag::from(self.resolve_tag(atom.tag())?) .set_origin(self.register_node(atom.tag())); @@ -56,7 +56,7 @@ impl<'a> ASTProgramTranslation<'a> { Literal::Positive(self.register_component(Atom::new(predicate, subterms), atom)) } - ast::expression::Expression::Negation(negated) => { + ast::guard::Guard::Expression(ast::expression::Expression::Negation(negated)) => { let atom = if let ast::expression::Expression::Atom(atom) = negated.expression() { atom } else { @@ -77,10 +77,8 @@ impl<'a> ASTProgramTranslation<'a> { Literal::Negative(self.register_component(Atom::new(predicate, subterms), atom)) } - ast::expression::Expression::Infix(infix) => { - Literal::Operation(self.build_infix(infix)?) - } - ast::expression::Expression::Operation(operation) => { + ast::guard::Guard::Infix(infix) => Literal::Operation(self.build_infix(infix)?), + ast::guard::Guard::Expression(ast::expression::Expression::Operation(operation)) => { Literal::Operation(self.build_operation(operation)?) } _ => { @@ -98,23 +96,24 @@ impl<'a> ASTProgramTranslation<'a> { /// Create a head [Atom] from the corresponding ast node. pub(crate) fn build_head_atom( &mut self, - head: &'a ast::expression::Expression<'a>, + head: &'a ast::guard::Guard<'a>, ) -> Result { - let result = if let ast::expression::Expression::Atom(atom) = head { - let predicate = - Tag::from(self.resolve_tag(atom.tag())?).set_origin(self.register_node(atom.tag())); - let mut subterms = Vec::new(); - for expression in atom.expressions() { - subterms.push(self.build_inner_term(expression)?); - } + let result = + if let ast::guard::Guard::Expression(ast::expression::Expression::Atom(atom)) = head { + let predicate = Tag::from(self.resolve_tag(atom.tag())?) + .set_origin(self.register_node(atom.tag())); + let mut subterms = Vec::new(); + for expression in atom.expressions() { + subterms.push(self.build_inner_term(expression)?); + } - self.register_component(Atom::new(predicate, subterms), atom) - } else { - return Err(TranslationError::new( - head.span(), - TranslationErrorKind::HeadNonAtom(head.context_type().name().to_string()), - )); - }; + self.register_component(Atom::new(predicate, subterms), atom) + } else { + return Err(TranslationError::new( + head.span(), + TranslationErrorKind::HeadNonAtom(head.context_type().name().to_string()), + )); + }; Ok(result) } @@ -162,10 +161,6 @@ impl<'a> ASTProgramTranslation<'a> { negation.span(), TranslationErrorKind::InnerExpressionNegation, )), - ast::expression::Expression::Infix(infix) => Err(TranslationError::new( - infix.span(), - TranslationErrorKind::InnerExpressionInfix, - )), }? .set_origin(self.register_node(expression))) } From c8b0d6a6f7d9198842c9e55d30ed0fc1f9457258 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 17 Sep 2024 22:14:44 +0200 Subject: [PATCH 172/214] Fix minor bug in attribute format validation --- nemo/src/rule_model/components/import_export.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/src/rule_model/components/import_export.rs b/nemo/src/rule_model/components/import_export.rs index eda718129..d1b86f00f 100644 --- a/nemo/src/rule_model/components/import_export.rs +++ b/nemo/src/rule_model/components/import_export.rs @@ -249,7 +249,7 @@ impl ImportExportDirective { ) -> Result<(), ()> { if let Term::Tuple(tuple) = value { for argument in tuple.arguments() { - if ImportExportDirective::plain_value(value) + if ImportExportDirective::plain_value(argument) .and_then(|name| DsvValueFormat::from_name(&name)) .is_none() { @@ -277,7 +277,7 @@ impl ImportExportDirective { ) -> Result<(), ()> { if let Term::Tuple(tuple) = value { for argument in tuple.arguments() { - if ImportExportDirective::plain_value(value) + if ImportExportDirective::plain_value(argument) .and_then(|name| RdfValueFormat::from_name(&name)) .is_none() { From 123f6520150b5cacc3979aa902cee873912bcf11 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 18 Sep 2024 01:18:59 +0200 Subject: [PATCH 173/214] Numerous bug fixes --- nemo/src/api.rs | 2 +- nemo/src/chase_model/translation.rs | 32 +++++-- nemo/src/chase_model/translation/aggregate.rs | 74 +++++++++------- nemo/src/chase_model/translation/operation.rs | 40 +++++---- nemo/src/chase_model/translation/rule.rs | 84 ++++++++++++------- nemo/src/execution/execution_engine.rs | 1 - .../planning/operations/aggregate.rs | 7 +- .../planning/operations/operation.rs | 1 + nemo/src/execution/planning/plan_aggregate.rs | 2 +- nemo/src/io/resource_providers.rs | 11 ++- nemo/src/parser/ast/expression.rs | 2 +- .../ast/expression/basic/rdf_literal.rs | 18 ++-- .../ast/expression/complex/aggregation.rs | 9 +- nemo/src/parser/ast/program.rs | 7 -- nemo/src/parser/ast/token.rs | 11 ++- nemo/src/rule_model/components/rule.rs | 21 ++++- .../rule_model/components/term/aggregate.rs | 11 ++- .../rule_model/components/term/function.rs | 5 ++ .../term/operation/operation_kind.rs | 13 ++- .../components/term/primitive/variable.rs | 9 ++ nemo/src/rule_model/error/hint/similar.rs | 15 +++- nemo/src/rule_model/translation/basic/rdf.rs | 4 +- nemo/src/syntax.rs | 4 + resources/testcases/arithmetic/builtins.rls | 2 +- 24 files changed, 252 insertions(+), 133 deletions(-) diff --git a/nemo/src/api.rs b/nemo/src/api.rs index f081d10a3..4af11bdb1 100644 --- a/nemo/src/api.rs +++ b/nemo/src/api.rs @@ -122,7 +122,7 @@ mod test { use super::*; #[cfg_attr(miri, ignore)] - #[test] + // #[test] fn reason() { std::env::set_current_dir("../resources/testcases/lcs-diff-computation/").unwrap(); let mut engine = load("run-lcs-10.rls".into()).unwrap(); diff --git a/nemo/src/chase_model/translation.rs b/nemo/src/chase_model/translation.rs index a18f64ca3..f806287d7 100644 --- a/nemo/src/chase_model/translation.rs +++ b/nemo/src/chase_model/translation.rs @@ -9,15 +9,32 @@ pub(crate) mod rule; use std::collections::HashMap; -use crate::rule_model::{components::tag::Tag, program::Program}; +use crate::rule_model::{ + components::{tag::Tag, term::primitive::variable::Variable}, + program::Program, +}; use super::components::program::ChaseProgram; -/// Object for translating a [Program] into a [ChaseProgram] -#[derive(Debug)] -pub(crate) struct ProgramChaseTranslation { +#[derive(Debug, Default, Clone, Copy)] +struct FreshVariableGenerator { /// Counter for generating ids for fresh variables fresh_variable_counter: usize, +} + +impl FreshVariableGenerator { + /// Create a fresh universal variable. + pub fn create_fresh_variable(&mut self) -> Variable { + self.fresh_variable_counter += 1; + Variable::universal(&format!("__VARIABLE_{}", self.fresh_variable_counter)) + } +} + +/// Object for translating a [Program] into a [ChaseProgram] +#[derive(Debug, Default)] +pub(crate) struct ProgramChaseTranslation { + /// Generator for fresh variables + fresh_variable_generator: FreshVariableGenerator, /// Map associating each predicate with its arity predicate_arity: HashMap, } @@ -26,7 +43,7 @@ impl ProgramChaseTranslation { /// Initialize a new [ProgramChaseTranslation]. pub fn new() -> Self { Self { - fresh_variable_counter: 0, + fresh_variable_generator: FreshVariableGenerator::default(), predicate_arity: HashMap::default(), } } @@ -73,8 +90,7 @@ impl ProgramChaseTranslation { } /// Create a fresh variable name - fn create_fresh_variable(&mut self) -> String { - self.fresh_variable_counter += 1; - format!("__VARIABLE_{}", self.fresh_variable_counter) + fn create_fresh_variable(&mut self) -> Variable { + self.fresh_variable_generator.create_fresh_variable() } } diff --git a/nemo/src/chase_model/translation/aggregate.rs b/nemo/src/chase_model/translation/aggregate.rs index b72204e55..753ae99fc 100644 --- a/nemo/src/chase_model/translation/aggregate.rs +++ b/nemo/src/chase_model/translation/aggregate.rs @@ -30,13 +30,14 @@ impl ProgramChaseTranslation { result: &mut ChaseRule, aggregate: &crate::rule_model::components::term::aggregate::Aggregate, group_by_variables: &[Variable], + output_variable: Variable, ) -> ChaseAggregate { let origin = *aggregate.origin(); let kind = aggregate.aggregate_kind(); let input_variable = match aggregate.aggregate_term() { Term::Primitive(Primitive::Variable(variable)) => variable.clone(), Term::Primitive(primitive) => { - let new_variable = Variable::universal(&self.create_fresh_variable()); + let new_variable = self.create_fresh_variable(); result.add_positive_operation( ChaseOperation::new( new_variable.clone(), @@ -48,7 +49,7 @@ impl ProgramChaseTranslation { new_variable } Term::Operation(operation) => { - let new_variable = Variable::universal(&self.create_fresh_variable()); + let new_variable = self.create_fresh_variable(); result.add_positive_operation( ChaseOperation::new( new_variable.clone(), @@ -62,7 +63,6 @@ impl ProgramChaseTranslation { Term::Aggregate(_) => unreachable!("invalid program: Recursive aggregates not allowed"), _ => unreachable!("invalid program: complex terms not allowed"), }; - let output_variable = Variable::universal(&self.create_fresh_variable()); let distinct_variables = aggregate.distinct().cloned().collect(); ChaseAggregate::new( @@ -78,37 +78,49 @@ impl ProgramChaseTranslation { /// Create an [OperationTerm] from a given /// [Operation][crate::rule_model::components::term::operation::Operation]. /// - /// If this function encounters an aggregate it will use its `output_variable` instead. - /// In this case the given `chase_aggregate` parameter will be set. + /// If this function encounters an aggregate it will + /// use the provided variable instead. /// /// # Panics /// Panics if the operation is not "pure", i.e. if it contains as subterms /// terms that are not operations or primitive terms. - fn build_operation_term_with_aggregate( - &mut self, + fn build_operation_term_with_aggregate<'a>( result: &mut ChaseRule, - operation: &crate::rule_model::components::term::operation::Operation, - group_by_variables: &[Variable], - chase_aggregate: &mut Option, - ) -> OperationTerm { + operation: &'a crate::rule_model::components::term::operation::Operation, + aggregation_variable: &Variable, + ) -> ( + OperationTerm, + Option<&'a crate::rule_model::components::term::aggregate::Aggregate>, + ) { let origin = *operation.origin(); let kind = operation.operation_kind(); let mut subterms = Vec::new(); + let mut aggregation_result = None; + for argument in operation.arguments() { match argument { Term::Primitive(primitive) => { subterms.push(OperationTerm::Primitive(primitive.clone())) } - Term::Operation(operation) => subterms.push(Self::build_operation_term(operation)), - Term::Aggregate(aggregate) => { - let new_aggregate = self.build_aggregate(result, aggregate, group_by_variables); + Term::Operation(operation) => { + let (term, result) = Self::build_operation_term_with_aggregate( + result, + operation, + aggregation_variable, + ); + if aggregation_result.is_none() { + aggregation_result = result; + } + subterms.push(term); + } + Term::Aggregate(aggregate) => { subterms.push(OperationTerm::Primitive(Primitive::Variable( - new_aggregate.output_variable().clone(), + aggregation_variable.clone(), ))); - *chase_aggregate = Some(new_aggregate); + aggregation_result = Some(aggregate); } _ => unreachable!( "invalid program: operation term does not only consist of operation terms" @@ -116,7 +128,10 @@ impl ProgramChaseTranslation { } } - OperationTerm::Operation(Operation::new(kind, subterms).set_origin(origin)) + ( + OperationTerm::Operation(Operation::new(kind, subterms).set_origin(origin)), + aggregation_result, + ) } /// Create a [ChaseOperation] from a given @@ -126,21 +141,22 @@ impl ProgramChaseTranslation { /// /// # Panics /// Panics if operation contains complex terms or multiple aggregates. - pub(crate) fn build_operation_with_aggregate( + pub(crate) fn build_operation_with_aggregate<'a>( &mut self, result: &mut ChaseRule, - operation: &crate::rule_model::components::term::operation::Operation, - group_by_variables: &[Variable], + operation: &'a crate::rule_model::components::term::operation::Operation, + aggregation_variable: Variable, output_variable: Variable, - chase_aggregate: &mut Option, - ) -> ChaseOperation { - let operation_term = self.build_operation_term_with_aggregate( - result, - operation, - group_by_variables, - chase_aggregate, - ); + ) -> ( + ChaseOperation, + Option<&'a crate::rule_model::components::term::aggregate::Aggregate>, + ) { + let (operation_term, aggregate) = + Self::build_operation_term_with_aggregate(result, operation, &aggregation_variable); - ChaseOperation::new(output_variable, operation_term).set_origin(*operation.origin()) + ( + ChaseOperation::new(output_variable, operation_term).set_origin(*operation.origin()), + aggregate, + ) } } diff --git a/nemo/src/chase_model/translation/operation.rs b/nemo/src/chase_model/translation/operation.rs index 085d043f5..24d8643d1 100644 --- a/nemo/src/chase_model/translation/operation.rs +++ b/nemo/src/chase_model/translation/operation.rs @@ -16,6 +16,20 @@ use crate::{ use super::ProgramChaseTranslation; impl ProgramChaseTranslation { + /// Create a [OperationTerm] from a given [Term]. + /// + /// # Panics + /// Panics if term is not primitive or an operation. + fn operation_term(term: &Term) -> OperationTerm { + match term { + Term::Primitive(primitive) => OperationTerm::Primitive(primitive.clone()), + Term::Operation(operation) => Self::build_operation_term(operation), + _ => unreachable!( + "invalid program: operation term does not only consist of operation terms" + ), + } + } + /// Create an [OperationTerm] from a given /// [Operation][crate::rule_model::components::term::operation::Operation]. /// @@ -27,35 +41,19 @@ impl ProgramChaseTranslation { ) -> OperationTerm { let origin = *operation.origin(); let kind = operation.operation_kind(); - let mut subterms = Vec::new(); - - for argument in operation.arguments() { - match argument { - Term::Primitive(primitive) => { - subterms.push(OperationTerm::Primitive(primitive.clone())) - } - Term::Operation(operation) => subterms.push(Self::build_operation_term(operation)), - _ => unreachable!( - "invalid program: operation term does not only consist of operation terms" - ), - } - } + let subterms = operation.arguments().map(Self::operation_term).collect(); OperationTerm::Operation(Operation::new(kind, subterms).set_origin(origin)) } - /// Create a [ChaseOperation] form a given - /// [Operation][crate::rule_model::components::term::operation::Operation]. + /// Create a [ChaseOperation] form a given [Term]. /// /// # Panics /// Panics if the operation is not "pure", i.e. if it contains as subterms /// terms that are not operations or primitive terms. - pub(crate) fn build_operation( - output_variable: &Variable, - operation: &crate::rule_model::components::term::operation::Operation, - ) -> ChaseOperation { - let origin = *operation.origin(); - let operation = Self::build_operation_term(operation); + pub(crate) fn build_operation(output_variable: &Variable, term: &Term) -> ChaseOperation { + let origin = *term.origin(); + let operation = Self::operation_term(term); ChaseOperation::new(output_variable.clone(), operation).set_origin(origin) } diff --git a/nemo/src/chase_model/translation/rule.rs b/nemo/src/chase_model/translation/rule.rs index 84ff90c77..0a0ba1151 100644 --- a/nemo/src/chase_model/translation/rule.rs +++ b/nemo/src/chase_model/translation/rule.rs @@ -4,7 +4,6 @@ use std::collections::{HashMap, HashSet}; use crate::{ chase_model::components::{ - aggregate::ChaseAggregate, atom::{primitive_atom::PrimitiveAtom, variable_atom::VariableAtom}, filter::ChaseFilter, rule::ChaseRule, @@ -14,6 +13,7 @@ use crate::{ atom::Atom, literal::Literal, term::{ + aggregate::Aggregate, primitive::{ variable::{Variable, VariableName}, Primitive, @@ -137,11 +137,14 @@ impl ProgramChaseTranslation { for argument in atom.arguments() { match argument { Term::Primitive(Primitive::Variable(variable)) => { - if !used_variables.insert(variable) { + if variable.is_anonymous() { + let new_variable = self.create_fresh_variable(); + variables.push(new_variable); + } else if !used_variables.insert(variable) { // If the variable was already used in the same atom, // we create a new variable - let new_variable = Variable::universal(&self.create_fresh_variable()); + let new_variable = self.create_fresh_variable(); let new_filter = Self::build_filter_primitive( &new_variable, &Primitive::Variable(variable.clone()), @@ -154,14 +157,14 @@ impl ProgramChaseTranslation { } } Term::Primitive(primitive) => { - let new_variable = Variable::universal(&self.create_fresh_variable()); + let new_variable = self.create_fresh_variable(); let new_filter = Self::build_filter_primitive(&new_variable, primitive); variables.push(new_variable); filters.push(new_filter); } Term::Operation(operation) => { - let new_variable = Variable::universal(&self.create_fresh_variable()); + let new_variable = self.create_fresh_variable(); let new_filter = Self::build_filter_operation(&new_variable, operation); variables.push(new_variable); @@ -209,10 +212,11 @@ impl ProgramChaseTranslation { && term .variables() .all(|variable| derived_variables.contains(variable)) + && !derived_variables.contains(variable) { derived_variables.insert(variable); - let new_operation = Self::build_operation(variable, operation); + let new_operation = Self::build_operation(variable, term); result.add_positive_operation(new_operation); handled_literals.insert(literal_index); @@ -244,46 +248,63 @@ impl ProgramChaseTranslation { /// Translates each head atom into the [PrimitiveAtom], /// while taking care of operations and aggregates. fn handle_head(&mut self, result: &mut ChaseRule, head: &[Atom]) { - let mut chase_aggregate: Option = None; - for (head_index, atom) in head.iter().enumerate() { let origin = *atom.origin(); let predicate = atom.predicate().clone(); let mut terms = Vec::new(); - for (argument_index, argument) in atom.arguments().enumerate() { - let group_by_variables = - Self::compute_group_by_variables(atom.arguments(), argument_index); + let mut aggregate: Option<(&Aggregate, usize, HashSet)> = None; + let aggregate_variable = Variable::universal("__AGGREGATE"); + for (argument_index, argument) in atom.arguments().enumerate() { match argument { Term::Primitive(primitive) => terms.push(primitive.clone()), - Term::Aggregate(aggregate) => { - let new_aggregate = - self.build_aggregate(result, aggregate, &group_by_variables); + Term::Aggregate(term_aggregate) => { + aggregate = Some((term_aggregate, argument_index, HashSet::default())); - terms.push(Primitive::Variable(new_aggregate.output_variable().clone())); - chase_aggregate = Some(new_aggregate); + terms.push(Primitive::Variable(aggregate_variable.clone())); } Term::Operation(operation) => { - let new_variable = Variable::universal(&self.create_fresh_variable()); - - let new_operation = self.build_operation_with_aggregate( - result, - operation, - &group_by_variables, - new_variable.clone(), - &mut chase_aggregate, - ); + let new_variable = self.create_fresh_variable(); + + let (new_operation, operation_aggregate) = self + .build_operation_with_aggregate( + result, + operation, + aggregate_variable.clone(), + new_variable.clone(), + ); + if let Some(operation_aggregate) = operation_aggregate { + let mut operation_variables = + new_operation.variables().cloned().collect::>(); + operation_variables.remove(&aggregate_variable); + operation_variables.remove(new_operation.variable()); + + aggregate = + Some((operation_aggregate, argument_index, operation_variables)); + result.add_aggregation_operation(new_operation); + } else { + result.add_positive_operation(new_operation) + } - result.add_aggregation_operation(new_operation); terms.push(Primitive::Variable(new_variable)); } _ => unreachable!("invalid program: rule head contains complex terms"), } + } - if let Some(aggregate) = chase_aggregate.clone() { - result.add_aggregation(aggregate, head_index); - } + if let Some((aggregate, argument_index, initial)) = aggregate { + let group_by_variables = + Self::compute_group_by_variables(initial, terms.iter(), argument_index); + + let chase_aggregate = self.build_aggregate( + result, + aggregate, + &group_by_variables, + aggregate_variable.clone(), + ); + + result.add_aggregation(chase_aggregate, head_index); } self.predicate_arity.insert(predicate.clone(), terms.len()); @@ -296,10 +317,11 @@ impl ProgramChaseTranslation { /// Essentially, these are all variables contained in some terms /// that are not the term containing the aggregate. fn compute_group_by_variables<'a>( - terms: impl Iterator, + initial: HashSet, + terms: impl Iterator, current_index: usize, ) -> Vec { - let mut result = HashSet::new(); + let mut result = initial; for (term_index, term) in terms.enumerate() { if term_index == current_index { diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 65db1bede..500359418 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -82,7 +82,6 @@ impl ExecutionEngine { /// Initialize [ExecutionEngine]. pub fn initialize(program: &Program, input_manager: ImportManager) -> Result { let chase_program = ProgramChaseTranslation::new().translate(program.clone()); - let analysis = chase_program.analyze(); let mut table_manager = TableManager::new(); diff --git a/nemo/src/execution/planning/operations/aggregate.rs b/nemo/src/execution/planning/operations/aggregate.rs index b4f3bc10a..40c90c722 100644 --- a/nemo/src/execution/planning/operations/aggregate.rs +++ b/nemo/src/execution/planning/operations/aggregate.rs @@ -19,13 +19,16 @@ fn operations_tables( distinct_columns: &[OperationColumnMarker], group_by_columns: &[OperationColumnMarker], ) -> (OperationTable, OperationTable) { - // Create input order that produces inteded output order + // Create input order that produces intended output order let mut ordered_input = OperationTable::default(); let mut ordered_output = OperationTable::default(); for column in input.iter() { if group_by_columns.contains(column) { - ordered_input.push(*column); + if column != aggregate_input_column { + ordered_input.push(*column); + } + ordered_output.push(*column); } } diff --git a/nemo/src/execution/planning/operations/operation.rs b/nemo/src/execution/planning/operations/operation.rs index 453d1e140..24c91c10a 100644 --- a/nemo/src/execution/planning/operations/operation.rs +++ b/nemo/src/execution/planning/operations/operation.rs @@ -84,6 +84,7 @@ pub(super) fn operation_to_function_tree( OperationKind::StringAfter => binary!(string_after, sub), OperationKind::StringStarts => binary!(string_starts, sub), OperationKind::StringEnds => binary!(string_ends, sub), + OperationKind::StringRegex => binary!(string_regex, sub), OperationKind::StringSubstring => { if sub.len() == 2 { let start = sub.pop().expect("length must be 2"); diff --git a/nemo/src/execution/planning/plan_aggregate.rs b/nemo/src/execution/planning/plan_aggregate.rs index d3928234d..16c35a113 100644 --- a/nemo/src/execution/planning/plan_aggregate.rs +++ b/nemo/src/execution/planning/plan_aggregate.rs @@ -27,7 +27,7 @@ pub(crate) struct AggregateStategy { } impl AggregateStategy { - /// Create new [SeminaiveStrategy] object. + /// Create new [AggregateStategy] object. pub(crate) fn initialize(rule: &ChaseRule, _analysis: &RuleAnalysis) -> Self { Self { aggregate: rule diff --git a/nemo/src/io/resource_providers.rs b/nemo/src/io/resource_providers.rs index 3a5180401..317de6c40 100644 --- a/nemo/src/io/resource_providers.rs +++ b/nemo/src/io/resource_providers.rs @@ -12,9 +12,14 @@ pub mod file; /// A resource provider for HTTP(s) requests. pub mod http; -fn is_iri(_resource: &Resource) -> bool { - todo!() - // all_input_consumed(iri)(resource).is_ok() +/// Helper function that determines whether resource has the form of an iri. +/// +/// For now, we don't validate the exact requirements but simply check +/// whether the string contains a `:`. +/// +/// TODO: Revise if deemed necessary. +fn is_iri(resource: &Resource) -> bool { + resource.contains(':') } /// Allows resolving resources to readers. diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index f18b37ca7..13b66edd3 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -176,7 +176,7 @@ mod test { #[test] fn parse_expression() { let test = vec![ - ("#sum(1 + POW(?x, 2); ?y, ?z)", ParserContext::Aggregation), + ("#sum(1 + POW(?x, 2), ?y, ?z)", ParserContext::Aggregation), ("(1 + 2)", ParserContext::Arithmetic), ("test(?x, (1,), (1 + 2))", ParserContext::Atom), ("_:12", ParserContext::Blank), diff --git a/nemo/src/parser/ast/expression/basic/rdf_literal.rs b/nemo/src/parser/ast/expression/basic/rdf_literal.rs index 2cf49d6a3..7350636c6 100644 --- a/nemo/src/parser/ast/expression/basic/rdf_literal.rs +++ b/nemo/src/parser/ast/expression/basic/rdf_literal.rs @@ -4,15 +4,13 @@ use nom::sequence::{separated_pair, tuple}; use crate::parser::{ - ast::{token::Token, ProgramAST}, + ast::{tag::structure::StructureTag, token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, span::Span, ParserResult, }; -use super::iri::Iri; - /// AST node representing an rdf literal #[derive(Debug)] pub struct RdfLiteral<'a> { @@ -22,7 +20,7 @@ pub struct RdfLiteral<'a> { /// Content part rdf literal content: Token<'a>, /// Tag of the rdf literal - tag: Iri<'a>, + tag: StructureTag<'a>, } impl<'a> RdfLiteral<'a> { @@ -32,8 +30,8 @@ impl<'a> RdfLiteral<'a> { } // Return the tag of the rdf literal. - pub fn tag(&self) -> String { - self.tag.content() + pub fn tag(&self) -> &StructureTag<'a> { + &self.tag } /// Parse the content part of the rdf literal. @@ -62,7 +60,11 @@ impl<'a> ProgramAST<'a> for RdfLiteral<'a> { context( CONTEXT, - separated_pair(Self::parse_content, Token::double_caret, Iri::parse), + separated_pair( + Self::parse_content, + Token::double_caret, + StructureTag::parse, + ), )(input) .map(|(rest, (content, tag))| { let rest_span = rest.span; @@ -110,7 +112,7 @@ mod test { assert!(result.is_ok()); let result = result.unwrap(); - assert_eq!(expected, (result.1.content(), result.1.tag())); + assert_eq!(expected, (result.1.content(), result.1.tag().to_string())); } } } diff --git a/nemo/src/parser/ast/expression/complex/aggregation.rs b/nemo/src/parser/ast/expression/complex/aggregation.rs index eab76399f..f1d153eda 100644 --- a/nemo/src/parser/ast/expression/complex/aggregation.rs +++ b/nemo/src/parser/ast/expression/complex/aggregation.rs @@ -90,8 +90,11 @@ impl<'a> ProgramAST<'a> for Aggregation<'a> { pair( Expression::parse, opt(preceded( - // TODO: What is the semicolon for? - tuple((WSoC::parse, Token::semicolon, WSoC::parse)), + tuple(( + WSoC::parse, + Token::aggregate_distinct_separator, + WSoC::parse, + )), ExpressionSequenceSimple::parse, )), ), @@ -136,7 +139,7 @@ mod test { fn parse_aggregation() { let test = vec![ ("#sum(?x)", (AggregateKind::SumOfNumbers, 0)), - ("#max(?x; ?y, ?z)", (AggregateKind::MaxNumber, 2)), + ("#max(?x, ?y, ?z)", (AggregateKind::MaxNumber, 2)), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 81381874b..952576b40 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -95,13 +95,6 @@ impl<'a> ProgramAST<'a> for Program<'a> { )(input) .map(|(rest, (comment, statements))| { let rest_span = rest.span; - // TODO: Remove if debug info is no longer needed - if !rest_span.0.is_empty() { - println!( - "\x1b[91mUNPARSED INPUT:\x1b[0m {:?}\n", - rest.span.0.fragment() - ); - } ( rest, diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 5ac749a81..925c15450 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -57,9 +57,6 @@ pub enum TokenKind { /// Closing delimiter for tuples #[assoc(name = tuple::CLOSE)] TupleClose, - /// Semicolon, in aggregates? (for what?) - #[assoc(name = ";")] - Semicolon, /// [UNIVERSAL_INDICATOR](variable::UNIVERSAL_INDICATOR), used to mark universal variables #[assoc(name = variable::UNIVERSAL_INDICATOR)] UniversalIndicator, @@ -123,6 +120,9 @@ pub enum TokenKind { /// Aggregate close #[assoc(name = aggregate::CLOSE)] AggregateClose, + /// Distinct Variable Separator + #[assoc(name = aggregate::SEPARATOR_DISTINCT)] + AggregateDistinctSeparator, /// Underscore, used for anonymous values as defined in [ANONYMOUS](datavalues::ANONYMOUS) #[assoc(name = datavalues::ANONYMOUS)] AnonVal, @@ -566,7 +566,6 @@ impl<'a> Token<'a> { string_token!(dot, TokenKind::Dot); string_token!(seq_sep, TokenKind::SequenceSeparator); string_token!(arrow, TokenKind::RuleArrow); - string_token!(semicolon, TokenKind::Semicolon); string_token!(greater, TokenKind::Greater); string_token!(greater_equal, TokenKind::GreaterEqual); string_token!(less, TokenKind::Less); @@ -578,6 +577,10 @@ impl<'a> Token<'a> { string_token!(aggregate_indicator, TokenKind::AggregateIndicator); string_token!(aggregate_open, TokenKind::AggregateOpen); string_token!(aggregate_close, TokenKind::AggregateClose); + string_token!( + aggregate_distinct_separator, + TokenKind::AggregateDistinctSeparator + ); string_token!(underscore, TokenKind::AnonVal); string_token!(plus, TokenKind::Plus); string_token!(minus, TokenKind::Minus); diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index eb1941614..ca5ef67c9 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -145,7 +145,9 @@ impl Rule { /// * an aggregate occurs at most once fn validate_term_head(builder: &mut ValidationErrorBuilder, term: &Term) -> Result { if term.is_map() || term.is_tuple() || term.is_function() { - builder.report_error(*term.origin(), ValidationErrorKind::UnsupportedComplexTerm); + builder + .report_error(*term.origin(), ValidationErrorKind::UnsupportedComplexTerm) + .add_hint_option(Self::hint_term_operation(term)); return Err(()); } @@ -214,7 +216,9 @@ impl Rule { } if term.is_map() || term.is_tuple() || term.is_function() { - builder.report_error(*term.origin(), ValidationErrorKind::UnsupportedComplexTerm); + builder + .report_error(*term.origin(), ValidationErrorKind::UnsupportedComplexTerm) + .add_hint_option(Self::hint_term_operation(term)); return Err(()); } @@ -224,6 +228,17 @@ impl Rule { Ok(()) } + + /// If the given [Term] is a function term, + /// then this function returns a [Hint] returning the operation + /// with the closest name to its tag. + fn hint_term_operation(term: &Term) -> Option { + if let Term::FunctionTerm(function) = term { + Hint::similar_operation(function.tag().to_string()) + } else { + None + } + } } impl Display for Rule { @@ -324,7 +339,7 @@ impl ProgramComponent for Rule { for variable in atom.variables() { if let Some(variable_name) = variable.name() { - if !safe_variables.contains(variable) { + if !variable.is_existential() && !safe_variables.contains(variable) { builder .report_error( *variable.origin(), diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 393ca3d40..4e5931aae 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -1,7 +1,10 @@ //! This module defines [Aggregate]. #![allow(missing_docs)] -use std::{fmt::Display, hash::Hash}; +use std::{ + fmt::{Display, Write}, + hash::Hash, +}; use enum_assoc::Assoc; use nemo_physical::aggregates::operation::AggregateOperation; @@ -162,6 +165,10 @@ impl Display for Aggregate { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!("{}({}", self.kind, self.aggregate))?; + if !self.distinct.is_empty() { + f.write_char(',')?; + } + for (distinct_index, variable) in self.distinct.iter().enumerate() { variable.fmt(f)?; @@ -234,7 +241,7 @@ impl ProgramComponent for Aggregate { { let input_type = self.aggregate.value_type(); if let Some(expected_type) = self.kind.input_type() { - if input_type != expected_type { + if input_type != ValueType::Any && input_type != expected_type { builder.report_error( *self.aggregate.origin(), ValidationErrorKind::AggregateInvalidValueType { diff --git a/nemo/src/rule_model/components/term/function.rs b/nemo/src/rule_model/components/term/function.rs index 60e33859d..768f64893 100644 --- a/nemo/src/rule_model/components/term/function.rs +++ b/nemo/src/rule_model/components/term/function.rs @@ -86,6 +86,11 @@ impl FunctionTerm { pub fn is_empty(&self) -> bool { self.len() == 0 } + + /// Return the [Tag] of the function term. + pub fn tag(&self) -> &Tag { + &self.tag + } } impl Display for FunctionTerm { diff --git a/nemo/src/rule_model/components/term/operation/operation_kind.rs b/nemo/src/rule_model/components/term/operation/operation_kind.rs index f2418617a..b79f175d0 100644 --- a/nemo/src/rule_model/components/term/operation/operation_kind.rs +++ b/nemo/src/rule_model/components/term/operation/operation_kind.rs @@ -146,6 +146,11 @@ pub enum OperationKind { #[assoc(num_arguments = OperationNumArguments::Binary)] #[assoc(return_type = ValueType::Boolean)] StringContains, + /// Check whether the pattern given as a regular expression holds + #[assoc(name = function::REGEX)] + #[assoc(num_arguments = OperationNumArguments::Binary)] + #[assoc(return_type = ValueType::Boolean)] + StringRegex, /// String starting at some start position #[assoc(name = function::SUBSTR)] #[assoc(num_arguments = OperationNumArguments::Choice(vec![2, 3]))] @@ -308,17 +313,17 @@ pub enum OperationKind { StringUppercase, /// Bitwise and operation #[assoc(name = function::BITAND)] - #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(return_type = ValueType::Number)] BitAnd, /// Bitwise or operation #[assoc(name = function::BITOR)] - #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(return_type = ValueType::Number)] BitOr, /// Bitwise xor operation #[assoc(name = function::BITXOR)] - #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(return_type = ValueType::Number)] BitXor, /// Conjunction of boolean values @@ -348,7 +353,7 @@ pub enum OperationKind { NumericLukasiewicz, /// Concatentation of two string values, correspondng to SPARQL function CONCAT. #[assoc(name = function::CONCAT)] - #[assoc(num_arguments = OperationNumArguments::Unary)] + #[assoc(num_arguments = OperationNumArguments::Arbitrary)] #[assoc(return_type = ValueType::String)] StringConcatenation, /// Lexical value diff --git a/nemo/src/rule_model/components/term/primitive/variable.rs b/nemo/src/rule_model/components/term/primitive/variable.rs index 1db1a43ef..3dd47c5b0 100644 --- a/nemo/src/rule_model/components/term/primitive/variable.rs +++ b/nemo/src/rule_model/components/term/primitive/variable.rs @@ -89,6 +89,15 @@ impl Variable { matches!(self, Variable::Existential(_)) } + /// Return whether this is an anonymous universal variable. + pub fn is_anonymous(&self) -> bool { + if let Variable::Universal(universal) = self { + return universal.is_anonymous(); + } + + false + } + /// Change the name of this variable. pub fn rename(&mut self, name: VariableName) { match self { diff --git a/nemo/src/rule_model/error/hint/similar.rs b/nemo/src/rule_model/error/hint/similar.rs index 43ef34911..d9248856f 100644 --- a/nemo/src/rule_model/error/hint/similar.rs +++ b/nemo/src/rule_model/error/hint/similar.rs @@ -2,15 +2,18 @@ //! that points the user to a similar string exist in a collection of source strings. use similar_string::find_best_similarity; +use strum::IntoEnumIterator; + +use crate::rule_model::components::term::operation::operation_kind::OperationKind; use super::Hint; const SIMILARITY_MIN_LENGTH: usize = 3; -const SIMILARITY_THRESHOLD: f64 = 0.8; +const SIMILARITY_THRESHOLD: f64 = 0.6; impl Hint { /// Checks whether a similar string exist in a collection of source strings. - /// Returns the most similar string, if there is one + /// Returns the most similar string, if it meets the threshold. pub fn similar>( kind: &str, target: impl AsRef, @@ -35,4 +38,12 @@ impl Hint { None } + + /// Checks whether a similar string exists within [OperationKind] + /// and returns the most similar one, if it meets the threshold. + pub fn similar_operation(target: impl AsRef) -> Option { + let options = OperationKind::iter().map(|kind| kind.name()); + + Self::similar("operation", target, options) + } } diff --git a/nemo/src/rule_model/translation/basic/rdf.rs b/nemo/src/rule_model/translation/basic/rdf.rs index 54ad4fe79..7ad9b3903 100644 --- a/nemo/src/rule_model/translation/basic/rdf.rs +++ b/nemo/src/rule_model/translation/basic/rdf.rs @@ -14,7 +14,9 @@ impl<'a> ASTProgramTranslation<'a> { &mut self, rdf: &'a ast::expression::basic::rdf_literal::RdfLiteral, ) -> Result { - match AnyDataValue::new_from_typed_literal(rdf.content(), rdf.tag()) { + let datatype_iri = self.resolve_tag(rdf.tag())?; + + match AnyDataValue::new_from_typed_literal(rdf.content(), datatype_iri) { Ok(data_value) => Ok(data_value), Err(error) => Err(TranslationError::new( rdf.span(), diff --git a/nemo/src/syntax.rs b/nemo/src/syntax.rs index 1e89a6f1e..b9c839ec2 100644 --- a/nemo/src/syntax.rs +++ b/nemo/src/syntax.rs @@ -106,6 +106,8 @@ pub mod expression { pub const OPEN: &str = "("; /// Closing delimiter pub const CLOSE: &str = ")"; + /// Symbol to separate distinct variables + pub const SEPARATOR_DISTINCT: &str = ","; } /// Syntax for variables @@ -258,6 +260,8 @@ pub mod builtin { pub(crate) const STRBEFORE: &str = "STRBEFORE"; /// Return the second part of a string split by some other string pub(crate) const STRAFTER: &str = "STRAFTER"; + /// Check whether regex pattern holds in a given string + pub(crate) const REGEX: &str = "REGEX"; /// Compute the remainder of two numerical values pub(crate) const REM: &str = "REM"; /// Compute the and on the bit representation of integer values diff --git a/resources/testcases/arithmetic/builtins.rls b/resources/testcases/arithmetic/builtins.rls index d9f304531..c1edc3063 100644 --- a/resources/testcases/arithmetic/builtins.rls +++ b/resources/testcases/arithmetic/builtins.rls @@ -62,7 +62,7 @@ result(contains, ?R) :- strings(?A, _), ?R = CONTAINS(?A, "lo"). result(regex, ?R) :- strings(?A, _), ?R = REGEX(?A, "l+"). result(subString, ?R) :- strings(?A, ?B), ?R = SUBSTR(?A, STRLEN(?B) / 2). result(stringreverse, ?R) :- strings(?A, _), ?R = STRREV(?A). -result(subStringLength, ?R) :- strings(?A, _), ?R = SUBSTRING(?A, 2, 3). +result(subStringLength, ?R) :- strings(?A, _), ?R = SUBSTR(?A, 2, 3). result(ucase, ?R) :- strings(?A, _), ?R = UCASE(?A). result(lcase, ?R) :- strings(_, ?B), ?R = LCASE(?B). result(stringbefore, ?R) :- strings(?A, _), ?R = STRBEFORE(?A, "ll"). From 4a737312c6ce09c597338d08601ff467aa7ea56e Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 18 Sep 2024 03:16:02 +0200 Subject: [PATCH 174/214] Final bug fixes --- nemo/src/api.rs | 4 +- nemo/src/chase_model/translation/aggregate.rs | 11 ++---- nemo/src/chase_model/translation/rule.rs | 1 - nemo/src/parser/ast/directive/prefix.rs | 7 +++- .../parser/ast/expression/basic/constant.rs | 38 ++++--------------- .../src/parser/ast/expression/basic/string.rs | 7 +++- .../parser/ast/expression/complex/negation.rs | 32 ++++++++-------- nemo/src/parser/ast/rule.rs | 4 +- nemo/src/parser/ast/tag/structure.rs | 7 +++- nemo/src/parser/ast/token.rs | 18 ++++++++- nemo/src/parser/span.rs | 16 ++++++++ .../rule_model/translation/basic/constant.rs | 2 +- .../translation/directive/import_export.rs | 2 +- nemo/src/syntax.rs | 18 ++++----- resources/testcases/arithmetic/builtins.rls | 2 +- .../testcases/arithmetic/builtins/result.csv | 4 +- .../dsv/bare-names-with-percent.rls | 2 +- .../lcs-diff-computation/run-lcs-10.rls | 34 ++++++++--------- .../run-lcs-10/{max.csv => greatest.csv} | 0 .../run-lcs-10/{min.csv => smallest.csv} | 0 .../planning_engine/constants_filter/run.rls | 8 ++-- .../constants_negation/run.rls | 8 ++-- 22 files changed, 122 insertions(+), 103 deletions(-) rename resources/testcases/lcs-diff-computation/run-lcs-10/{max.csv => greatest.csv} (100%) rename resources/testcases/lcs-diff-computation/run-lcs-10/{min.csv => smallest.csv} (100%) diff --git a/nemo/src/api.rs b/nemo/src/api.rs index 4af11bdb1..92270832a 100644 --- a/nemo/src/api.rs +++ b/nemo/src/api.rs @@ -122,7 +122,7 @@ mod test { use super::*; #[cfg_attr(miri, ignore)] - // #[test] + #[test] fn reason() { std::env::set_current_dir("../resources/testcases/lcs-diff-computation/").unwrap(); let mut engine = load("run-lcs-10.rls".into()).unwrap(); @@ -134,7 +134,7 @@ mod test { .filter(|pred| pred.to_string().contains('i')) .collect::>(); - assert_eq!(results.len(), 5); + assert_eq!(results.len(), 4); let _temp_dir = TempDir::new().unwrap(); // Disabled: // write(temp_dir.to_str().unwrap().to_string(), &mut engine, results).unwrap(); diff --git a/nemo/src/chase_model/translation/aggregate.rs b/nemo/src/chase_model/translation/aggregate.rs index 753ae99fc..90a0492f3 100644 --- a/nemo/src/chase_model/translation/aggregate.rs +++ b/nemo/src/chase_model/translation/aggregate.rs @@ -85,7 +85,6 @@ impl ProgramChaseTranslation { /// Panics if the operation is not "pure", i.e. if it contains as subterms /// terms that are not operations or primitive terms. fn build_operation_term_with_aggregate<'a>( - result: &mut ChaseRule, operation: &'a crate::rule_model::components::term::operation::Operation, aggregation_variable: &Variable, ) -> ( @@ -104,11 +103,8 @@ impl ProgramChaseTranslation { subterms.push(OperationTerm::Primitive(primitive.clone())) } Term::Operation(operation) => { - let (term, result) = Self::build_operation_term_with_aggregate( - result, - operation, - aggregation_variable, - ); + let (term, result) = + Self::build_operation_term_with_aggregate(operation, aggregation_variable); if aggregation_result.is_none() { aggregation_result = result; } @@ -143,7 +139,6 @@ impl ProgramChaseTranslation { /// Panics if operation contains complex terms or multiple aggregates. pub(crate) fn build_operation_with_aggregate<'a>( &mut self, - result: &mut ChaseRule, operation: &'a crate::rule_model::components::term::operation::Operation, aggregation_variable: Variable, output_variable: Variable, @@ -152,7 +147,7 @@ impl ProgramChaseTranslation { Option<&'a crate::rule_model::components::term::aggregate::Aggregate>, ) { let (operation_term, aggregate) = - Self::build_operation_term_with_aggregate(result, operation, &aggregation_variable); + Self::build_operation_term_with_aggregate(operation, &aggregation_variable); ( ChaseOperation::new(output_variable, operation_term).set_origin(*operation.origin()), diff --git a/nemo/src/chase_model/translation/rule.rs b/nemo/src/chase_model/translation/rule.rs index 0a0ba1151..fd1a04c4b 100644 --- a/nemo/src/chase_model/translation/rule.rs +++ b/nemo/src/chase_model/translation/rule.rs @@ -269,7 +269,6 @@ impl ProgramChaseTranslation { let (new_operation, operation_aggregate) = self .build_operation_with_aggregate( - result, operation, aggregate_variable.clone(), new_variable.clone(), diff --git a/nemo/src/parser/ast/directive/prefix.rs b/nemo/src/parser/ast/directive/prefix.rs index bedeb89a6..f6aea6ad3 100644 --- a/nemo/src/parser/ast/directive/prefix.rs +++ b/nemo/src/parser/ast/directive/prefix.rs @@ -1,6 +1,9 @@ //! This module defines the [Prefix] directive. -use nom::sequence::{preceded, separated_pair, tuple}; +use nom::{ + branch::alt, + sequence::{preceded, separated_pair, tuple}, +}; use crate::parser::{ ast::{comment::wsoc::WSoC, expression::basic::iri::Iri, token::Token, ProgramAST}, @@ -40,7 +43,7 @@ impl<'a> Prefix<'a> { pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (Token<'a>, Iri<'a>)> { separated_pair( - Token::name, + alt((Token::name, Token::empty)), tuple((WSoC::parse, Token::prefix_assignment, WSoC::parse)), Iri::parse, )(input) diff --git a/nemo/src/parser/ast/expression/basic/constant.rs b/nemo/src/parser/ast/expression/basic/constant.rs index 9cba606be..d38f3a415 100644 --- a/nemo/src/parser/ast/expression/basic/constant.rs +++ b/nemo/src/parser/ast/expression/basic/constant.rs @@ -1,26 +1,13 @@ //! This module defines [Constant] -use nom::{branch::alt, combinator::map}; - use crate::parser::{ - ast::{token::Token, ProgramAST}, + ast::{tag::structure::StructureTag, ProgramAST}, context::{context, ParserContext}, input::ParserInput, span::Span, ParserResult, }; -use super::iri::Iri; - -// Type of constants -#[derive(Debug)] -enum ConstantKind<'a> { - /// Plain constant - Plain(Token<'a>), - /// Iri constant - Iri(Iri<'a>), -} - /// AST node representing a constant #[derive(Debug)] pub struct Constant<'a> { @@ -28,16 +15,13 @@ pub struct Constant<'a> { span: Span<'a>, /// The constant - constant: ConstantKind<'a>, + constant: StructureTag<'a>, } impl<'a> Constant<'a> { - /// Return the name of the constant. - pub fn name(&self) -> String { - match &self.constant { - ConstantKind::Plain(token) => token.to_string(), - ConstantKind::Iri(iri) => iri.content(), - } + /// Return the [StructureTag] representing the constant. + pub fn tag(&self) -> &StructureTag<'a> { + &self.constant } } @@ -58,14 +42,7 @@ impl<'a> ProgramAST<'a> for Constant<'a> { { let input_span = input.span; - context( - CONTEXT, - alt(( - map(Token::name, ConstantKind::Plain), - map(Iri::parse, ConstantKind::Iri), - )), - )(input) - .map(|(rest, constant)| { + context(CONTEXT, StructureTag::parse)(input).map(|(rest, constant)| { let rest_span = rest.span; ( @@ -97,6 +74,7 @@ mod test { fn parse_constant() { let test = vec![ ("abc", "abc".to_string()), + ("abc:def", "abc:def".to_string()), ("", "http://example.com".to_string()), ]; @@ -107,7 +85,7 @@ mod test { assert!(result.is_ok()); let result = result.unwrap(); - assert_eq!(expected, result.1.name()); + assert_eq!(expected, result.1.tag().to_string()); } } } diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs index b0b3a25b7..afc89577c 100644 --- a/nemo/src/parser/ast/expression/basic/string.rs +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -2,6 +2,7 @@ #![allow(missing_docs)] use nom::{ + branch::alt, combinator::opt, sequence::{delimited, pair}, }; @@ -39,7 +40,11 @@ impl<'a> StringLiteral<'a> { /// Parse the main part of the string. pub fn parse_string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { - delimited(Token::quote, Token::string, Token::quote)(input) + delimited( + Token::quote, + alt((Token::string, Token::empty)), + Token::quote, + )(input) } /// Parse the language tag of the string. diff --git a/nemo/src/parser/ast/expression/complex/negation.rs b/nemo/src/parser/ast/expression/complex/negation.rs index 66972258c..34d35a216 100644 --- a/nemo/src/parser/ast/expression/complex/negation.rs +++ b/nemo/src/parser/ast/expression/complex/negation.rs @@ -1,9 +1,9 @@ //! This module defines [Negation]. -use nom::sequence::preceded; +use nom::sequence::{preceded, terminated}; use crate::parser::{ - ast::{expression::Expression, token::Token, ProgramAST}, + ast::{comment::wsoc::WSoC, expression::Expression, token::Token, ProgramAST}, context::{context, ParserContext}, input::ParserInput, span::Span, @@ -44,19 +44,21 @@ impl<'a> ProgramAST<'a> for Negation<'a> { { let input_span = input.span; - context(CONTEXT, preceded(Token::tilde, Expression::parse))(input).map( - |(rest, expression)| { - let rest_span = rest.span; - - ( - rest, - Self { - span: input_span.until_rest(&rest_span), - expression: Box::new(expression), - }, - ) - }, - ) + context( + CONTEXT, + preceded(terminated(Token::tilde, WSoC::parse), Expression::parse), + )(input) + .map(|(rest, expression)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + expression: Box::new(expression), + }, + ) + }) } fn context(&self) -> ParserContext { diff --git a/nemo/src/parser/ast/rule.rs b/nemo/src/parser/ast/rule.rs index 88c25f791..31e617df6 100644 --- a/nemo/src/parser/ast/rule.rs +++ b/nemo/src/parser/ast/rule.rs @@ -33,12 +33,12 @@ pub struct Rule<'a> { } impl<'a> Rule<'a> { - /// Return an iterator of the [Expression]s contained in the head. + /// Return an iterator of the [Guard]s contained in the head. pub fn head(&self) -> impl Iterator> { self.head.iter() } - /// Return an iterator of the [Expression]s contained in the body. + /// Return an iterator of the [Guard]s contained in the body. pub fn body(&self) -> impl Iterator> { self.body.iter() } diff --git a/nemo/src/parser/ast/tag/structure.rs b/nemo/src/parser/ast/tag/structure.rs index 08e58b70e..f9d922fbf 100644 --- a/nemo/src/parser/ast/tag/structure.rs +++ b/nemo/src/parser/ast/tag/structure.rs @@ -82,7 +82,11 @@ impl<'a> ProgramAST<'a> for StructureTag<'a> { CONTEXT, alt(( map( - separated_pair(Token::name, Token::namespace_separator, Token::name), + separated_pair( + alt((Token::name, Token::empty)), + Token::namespace_separator, + Token::name, + ), |(prefix, tag)| StructureTagKind::Prefixed { prefix, tag }, ), map(Token::name, StructureTagKind::Plain), @@ -121,6 +125,7 @@ mod test { let test = vec![ ("abc", "abc".to_string()), ("abc:def", "abc:def".to_string()), + (":def", ":def".to_string()), ("", "http://example.com".to_string()), ]; diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 925c15450..0ac5ec321 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -248,6 +248,9 @@ pub enum TokenKind { /// Token that captures errors #[assoc(name = "error")] Error, + /// Empty token + #[assoc(name = "empty")] + Empty, } /// A token is the smallest unit recognized by the parser @@ -294,13 +297,26 @@ impl<'a> Token<'a> { self.kind } + /// Parse [TokenKind::Empty]. + pub fn empty(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + let beginning = input.span.empty(); + + ParserResult::Ok(( + input, + Token { + span: beginning, + kind: TokenKind::Empty, + }, + )) + } + /// Parse [TokenKind::Name]. pub fn name(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { context( ParserContext::token(TokenKind::Name), recognize(pair( alpha1, - many0(alt((alphanumeric1, tag("_"), tag("-")))), + many0(alt((alphanumeric1, tag("_"), tag("-"), tag("%")))), )), )(input) .map(|(rest_input, result)| { diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 5f8f23628..bc4bb0e0e 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -147,4 +147,20 @@ impl<'a> Span<'a> { } } } + + /// Return an empty [Span] that points to the beginning. + pub fn empty(&self) -> Self { + unsafe { + if self.0.is_empty() { + *self + } else { + Self(LocatedSpan::new_from_raw_offset( + self.0.location_offset(), + self.0.location_line(), + &self.0[0..0], + (), + )) + } + } + } } diff --git a/nemo/src/rule_model/translation/basic/constant.rs b/nemo/src/rule_model/translation/basic/constant.rs index 1f56edcaf..0bcb66801 100644 --- a/nemo/src/rule_model/translation/basic/constant.rs +++ b/nemo/src/rule_model/translation/basic/constant.rs @@ -13,7 +13,7 @@ impl<'a> ASTProgramTranslation<'a> { &mut self, constant: &'a ast::expression::basic::constant::Constant, ) -> Result { - let name = constant.name(); + let name = self.resolve_tag(constant.tag())?; Ok(AnyDataValue::new_iri(name)) } diff --git a/nemo/src/rule_model/translation/directive/import_export.rs b/nemo/src/rule_model/translation/directive/import_export.rs index 42d3f7385..5c24aa63e 100644 --- a/nemo/src/rule_model/translation/directive/import_export.rs +++ b/nemo/src/rule_model/translation/directive/import_export.rs @@ -29,7 +29,7 @@ impl<'a> ASTProgramTranslation<'a> { ) -> Option<(String, &'a ast::expression::Expression<'a>)> { for (key, value) in map.key_value() { if let ast::expression::Expression::Constant(constant) = key { - if &constant.name() == "resource" { + if &constant.tag().to_string() == "resource" { if let ast::expression::Expression::String(string) = value { return Some(( Path::new(&string.content()) diff --git a/nemo/src/syntax.rs b/nemo/src/syntax.rs index b9c839ec2..c32e6c206 100644 --- a/nemo/src/syntax.rs +++ b/nemo/src/syntax.rs @@ -177,17 +177,17 @@ pub mod builtin { /// This module contains all strings of the supported builtin functions. pub mod function { /// Check if two values are equal to each other - pub(crate) const EQUAL: &str = "EQUAL"; + pub(crate) const EQUAL: &str = "EQUALITY"; /// Check if two values are not equal to each other - pub(crate) const UNEQUAL: &str = "UNEQUAL"; + pub(crate) const UNEQUAL: &str = "UNEQUALITY"; /// Check if a numeric value is greater than another - pub(crate) const GREATER: &str = "GREATER"; + pub(crate) const GREATER: &str = "NUMGREATER"; /// Check if a numeric value is greater or equal to another - pub(crate) const GREATEREQ: &str = "GREATEREQ"; + pub(crate) const GREATEREQ: &str = "NUMGREATEREQ"; /// Check if a numeric value is smaller than another - pub(crate) const LESS: &str = "LESS"; + pub(crate) const LESS: &str = "NUMLESS"; /// Check if a numeric value is smaller or equal to another - pub(crate) const LESSEQ: &str = "LESSEQ"; + pub(crate) const LESSEQ: &str = "NUMLESSEQ"; /// Check if value is an integer pub(crate) const IS_INTEGER: &str = "isInteger"; /// Check if value is a 32bit floating point number @@ -279,11 +279,11 @@ pub mod builtin { /// Compute the sum of numerical values pub(crate) const SUM: &str = "SUM"; /// Compute the product of numerical values - pub(crate) const PRODUCT: &str = "PROD"; + pub(crate) const PRODUCT: &str = "PRODUCT"; /// Compute the difference between to numeric values - pub(crate) const SUBTRACTION: &str = "MINUS"; + pub(crate) const SUBTRACTION: &str = "SUBTRACTION"; /// Compute the quotient of two numeric values - pub(crate) const DIVISION: &str = "DIV"; + pub(crate) const DIVISION: &str = "DIVISION"; /// Compute the multiplicative inverse of a numeric value pub(crate) const INVERSE: &str = "INVERSE"; /// Compute the logical and between boolean values diff --git a/resources/testcases/arithmetic/builtins.rls b/resources/testcases/arithmetic/builtins.rls index c1edc3063..6291de4f1 100644 --- a/resources/testcases/arithmetic/builtins.rls +++ b/resources/testcases/arithmetic/builtins.rls @@ -82,7 +82,7 @@ result(Boolean, ?R) :- boolean(?True, ?False), ?R = STR(OR(AND(?True, ?False, ?T % Nary functions result(sum, ?R) :- doubles(?A, ?B, ?C), ?R = SUM(?A, ?B, ?C). -result(prod, ?R) :- doubles(?A, ?B, ?C), ?R = PROD(?A, ?B, ?C). +result(prod, ?R) :- doubles(?A, ?B, ?C), ?R = PRODUCT(?A, ?B, ?C). result(min, ?R) :- doubles(?A, ?B, ?C), ?R = MIN(?A, ?B, ?C). result(max, ?R) :- doubles(?A, ?B, ?C), ?R = MAX(?A, ?B, ?C). result(luka, ?R) :- numbers(?A, ?B, ?C), ?R = LUKA(?A, ?B, ?C) . diff --git a/resources/testcases/arithmetic/builtins/result.csv b/resources/testcases/arithmetic/builtins/result.csv index d66b4c365..36351957a 100644 --- a/resources/testcases/arithmetic/builtins/result.csv +++ b/resources/testcases/arithmetic/builtins/result.csv @@ -42,8 +42,8 @@ regex,"""true""^^" stringreverse,"""olleH""" subString,"""ello""" subStringLength,"""ell""" -ucase,"""hello""" -lcase,"""WORLD""" +ucase,"""HELLO""" +lcase,"""world""" stringbefore,"""He""" stringafter,"""o""" stringstarts_true,"""true""^^" diff --git a/resources/testcases/data-formats/dsv/bare-names-with-percent.rls b/resources/testcases/data-formats/dsv/bare-names-with-percent.rls index aefdc8751..c524117e0 100644 --- a/resources/testcases/data-formats/dsv/bare-names-with-percent.rls +++ b/resources/testcases/data-formats/dsv/bare-names-with-percent.rls @@ -1,4 +1,4 @@ -@import fromCsv :- csv{resource="./sources/bareNameWithPercent.csv",format=(any)}. +@import fromCsv :- csv{resource="./sources/bareNameWithPercent.csv",format=(any,)}. % comments still work inProg(). % comments still work diff --git a/resources/testcases/lcs-diff-computation/run-lcs-10.rls b/resources/testcases/lcs-diff-computation/run-lcs-10.rls index 73bd0f530..ca21ec278 100644 --- a/resources/testcases/lcs-diff-computation/run-lcs-10.rls +++ b/resources/testcases/lcs-diff-computation/run-lcs-10.rls @@ -1,16 +1,16 @@ -%%% This program computes the longest common subsequence (LCS) of two words -%%% using (mostly) Datalog rules. -%%% -%%% See "An O(ND) Difference Algorithm and its Variations" [Myers 1986] for details -%%% on the algorithm that we follow here. -%%% -%%% Existential quantification is used to create some extra elements to represent numbers. -%%% One could simplify this and many other places if native number support were available. -%%% Likewise, some manual listings could be simplified by using stratified negation (inequality would suffice). -%%% Conversely, a pure Datalog version would also be possible by using "flattened skolem functions" -%%% for the few acyclic nulls we actually need (see [Kroetzsch and Rudolph, IJCAI 2011]). - -%%% Input data: two documents, A and B, that consist of a sequence of letters (or lines) to diff over: +%! This program computes the longest common subsequence (LCS) of two words +%! using (mostly) Datalog rules. +%! +%! See "An O(ND) Difference Algorithm and its Variations" [Myers 1986] for details +%! on the algorithm that we follow here. +%! +%! Existential quantification is used to create some extra elements to represent numbers. +%! One could simplify this and many other places if native number support were available. +%! Likewise, some manual listings could be simplified by using stratified negation (inequality would suffice). +%! Conversely, a pure Datalog version would also be possible by using "flattened skolem functions" +%! for the few acyclic nulls we actually need (see [Kroetzsch and Rudolph, IJCAI 2011]). + +%! Input data: two documents, A and B, that consist of a sequence of letters (or lines) to diff over: % @prefix xsd: . @@ -45,11 +45,11 @@ ge(?X,?X) :- eq(?X,?Y) . % Add some more numbers to our list: doPlus(?X,p0) :- docAend(?X) . s(?X,!Xp), doPlus(!Xp,?Bp) :- doPlus(?X,?B), s(?B,?Bp), lt(?B,?Bmax), docBend(?Bmax) . -max(?X) :- doPlus(?X,?Bmax), docBend(?Bmax). +greatest(?X) :- doPlus(?X,?Bmax), docBend(?Bmax). minus(p0,p0) . minus(?Up,!Lm), s(!Lm,?L) :- minus(?U,?L), s(?U,?Up) . -min(?X) :- minus(?Y,?X), max(?Y). +smallest(?X) :- minus(?Y,?X), greatest(?Y). % Plus 2: s2(?X,?Z) :- s(?X,?Y),s(?Y,?Z) . @@ -97,11 +97,11 @@ lcs(?A,?B,?content), gather(?Ap,?Bp) :- gather(?A,?B), edge(?Ap,?Bp,?A,?B,eq), d gather(?Ap,?Bp) :- gather(?A,?B), edge(?Ap,?Bp,?A,?B,down) . gather(?Ap,?Bp) :- gather(?A,?B), edge(?Ap,?Bp,?A,?B,right) . -@export min :- csv {}. +@export smallest :- csv {}. @export eq :- csv {}. @export nonfinal :- csv {}. @export infDocA :- csv {}. -@export max :- csv {}. +@export greatest :- csv {}. @export docAend :- csv {}. @export gather :- csv {}. @export docBend :- csv {}. diff --git a/resources/testcases/lcs-diff-computation/run-lcs-10/max.csv b/resources/testcases/lcs-diff-computation/run-lcs-10/greatest.csv similarity index 100% rename from resources/testcases/lcs-diff-computation/run-lcs-10/max.csv rename to resources/testcases/lcs-diff-computation/run-lcs-10/greatest.csv diff --git a/resources/testcases/lcs-diff-computation/run-lcs-10/min.csv b/resources/testcases/lcs-diff-computation/run-lcs-10/smallest.csv similarity index 100% rename from resources/testcases/lcs-diff-computation/run-lcs-10/min.csv rename to resources/testcases/lcs-diff-computation/run-lcs-10/smallest.csv diff --git a/resources/testcases/regression/planning_engine/constants_filter/run.rls b/resources/testcases/regression/planning_engine/constants_filter/run.rls index f8bc84022..c47ef48c2 100644 --- a/resources/testcases/regression/planning_engine/constants_filter/run.rls +++ b/resources/testcases/regression/planning_engine/constants_filter/run.rls @@ -1,7 +1,7 @@ -%%% Test related to -%%% https://github.com/knowsys/nemo/issues/500 -%%% -%%% A panic was caused by having a filter consisting of constants +%! Test related to +%! https://github.com/knowsys/nemo/issues/500 +%! +%! A panic was caused by having a filter consisting of constants pair(1, 2) . pair(3, 3) . diff --git a/resources/testcases/regression/planning_engine/constants_negation/run.rls b/resources/testcases/regression/planning_engine/constants_negation/run.rls index f7737ed7f..fb1462f2d 100644 --- a/resources/testcases/regression/planning_engine/constants_negation/run.rls +++ b/resources/testcases/regression/planning_engine/constants_negation/run.rls @@ -1,7 +1,7 @@ -%%% Test related to -%%% https://github.com/knowsys/nemo/issues/452 -%%% -%%% Previously, an incorrect plan was produced, which lead to a crash. +%! Test related to +%! https://github.com/knowsys/nemo/issues/452 +%! +%! Previously, an incorrect plan was produced, which lead to a crash. S(a, b, c). S(r, r, r). From 85651223814f2c37629098608b9442d4e20ba3cd Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 18 Sep 2024 08:13:11 +0200 Subject: [PATCH 175/214] Add parsing of an empty string --- nemo/src/parser/ast/expression.rs | 1 + nemo/src/parser/ast/expression/basic/string.rs | 1 + nemo/src/parser/ast/sequence/key_value.rs | 1 + nemo/src/parser/ast/statement.rs | 4 ++++ nemo/src/parser/ast/token.rs | 12 +++++++++--- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index 13b66edd3..12e0eb3b2 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -191,6 +191,7 @@ mod test { ParserContext::RdfLiteral, ), ("\"string\"", ParserContext::String), + ("\"\"", ParserContext::String), ("(1,)", ParserContext::Tuple), ("?variable", ParserContext::Variable), ]; diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs index afc89577c..d3478faa7 100644 --- a/nemo/src/parser/ast/expression/basic/string.rs +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -107,6 +107,7 @@ mod test { fn parse_string() { let test = vec![ ("\"test\"", ("test".to_string(), None)), + ("\"\"", ("".to_string(), None)), ( "\"テスト\"@ja", ("テスト".to_string(), Some("ja".to_string())), diff --git a/nemo/src/parser/ast/sequence/key_value.rs b/nemo/src/parser/ast/sequence/key_value.rs index 63b30f24c..95d63c77d 100644 --- a/nemo/src/parser/ast/sequence/key_value.rs +++ b/nemo/src/parser/ast/sequence/key_value.rs @@ -88,6 +88,7 @@ mod test { ("x=3, ?x = 2, 2 = 5", 3), ("x=3 , ?x = 12, 2= 1", 3), ("x=POW(1,2)", 1), + ("resource=\"\"", 1), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs index 4ada6845f..f1b47dcf9 100644 --- a/nemo/src/parser/ast/statement.rs +++ b/nemo/src/parser/ast/statement.rs @@ -156,6 +156,10 @@ mod test { "%%% A directive \n \t@declare a(_: int, _: int) .", ParserContext::Directive, ), + ( + "@export test :- csv{resource = \"\"}.", + ParserContext::Directive, + ), ]; for (input, expect) in test { diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 0ac5ec321..3272ebe1a 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -345,11 +345,17 @@ impl<'a> Token<'a> { /// Parse [TokenKind::String]. pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { - is_not("\"")(input).map(|(rest, result)| { + let input_span = input.span; + // NOTE: Optional for empty string, because `is_not` fails on "\"" + opt(is_not("\""))(input).map(|(rest, result)| { ( - rest, + rest.clone(), Token { - span: result.span, + span: if let Some(result) = result { + result.span + } else { + input_span.until_rest(&rest.span) + }, kind: TokenKind::String, }, ) From b8f08e9bbcde0635a1f3f27e2f5956d58ccab26c Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 18 Sep 2024 08:15:13 +0200 Subject: [PATCH 176/214] Change error reporting on statement level --- nemo/src/parser/error.rs | 60 +++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index 53142a21d..9f07d5d01 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -96,10 +96,42 @@ pub(crate) fn report_error<'a>( match &e { nom::Err::Incomplete(_) => (), nom::Err::Error(err) | nom::Err::Failure(err) => { - let (_deepest_pos, errors) = get_deepest_errors(err); - for error in errors { - input.state.report_error(error); - } + let error = match err { + GenericErrorTree::Base { location, .. } => ParserError { + position: CharacterPosition { + offset: location.span.0.location_offset(), + line: location.span.0.location_line(), + column: location.span.0.get_utf8_column() as u32, + }, + context: vec![], + }, + GenericErrorTree::Stack { base, contexts } => { + dbg!(&base); + dbg!(&contexts); + ParserError { + position: CharacterPosition { + offset: contexts[0].0.span.0.location_offset(), + line: contexts[0].0.span.0.location_line(), + column: contexts[0].0.span.0.get_utf8_column() as u32, + }, + context: match contexts[0].1 { + StackContext::Kind(_) => todo!(), + StackContext::Context(ctx) => { + vec![ctx] + } + }, + } + } + GenericErrorTree::Alt(vec) => { + dbg!(&vec); + todo!() + } + }; + input.state.report_error(error); + // let (_deepest_pos, errors) = get_deepest_errors(err); + // for error in errors { + // input.state.report_error(error); + // } } }; Err(e) @@ -121,7 +153,7 @@ pub(crate) fn _transform_error_tree<'a, Output>( match &e { nom::Err::Incomplete(_) => (), nom::Err::Error(err) | nom::Err::Failure(err) => { - let (_deepest_pos, errors) = get_deepest_errors(err); + let (_deepest_pos, errors) = _get_deepest_errors(err); for error in errors { input.state.report_error(error); } @@ -132,7 +164,9 @@ pub(crate) fn _transform_error_tree<'a, Output>( } } -fn context_strs(contexts: &[(ParserInput<'_>, StackContext)]) -> Vec { +fn _context_strs( + contexts: &[(ParserInput<'_>, StackContext)], +) -> Vec { contexts .iter() .map(|(_, c)| match c { @@ -142,7 +176,7 @@ fn context_strs(contexts: &[(ParserInput<'_>, StackContext)]) -> .collect() } -fn get_deepest_errors<'a>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec) { +fn _get_deepest_errors<'a>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec) { match e { ParserErrorTree::Base { location, .. } => { let span = location.span.0; @@ -184,21 +218,21 @@ fn get_deepest_errors<'a>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec err_pos, vec![ParserError { position: err_pos, - context: context_strs(contexts), + context: _context_strs(contexts), }], ) } ParserErrorTree::Stack { base, contexts } => { - let (pos, mut deepest_errors) = get_deepest_errors(base); - let contexts = context_strs(contexts); + let (pos, mut deepest_errors) = _get_deepest_errors(base); + let contexts = _context_strs(contexts); for error in &mut deepest_errors { error.context.append(&mut contexts.clone()); } (pos, deepest_errors) } ParserErrorTree::Alt(_error_tree) => { - let (pos, mut deepest_errors) = get_deepest_errors(base); - let contexts = context_strs(contexts); + let (pos, mut deepest_errors) = _get_deepest_errors(base); + let contexts = _context_strs(contexts); for error in &mut deepest_errors { error.context.append(&mut contexts.clone()); } @@ -210,7 +244,7 @@ fn get_deepest_errors<'a>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec let mut return_vec: Vec = Vec::new(); let mut deepest_pos = CharacterPosition::default(); for error in vec { - let (pos, mut deepest_errors) = get_deepest_errors(error); + let (pos, mut deepest_errors) = _get_deepest_errors(error); match pos.cmp(&deepest_pos) { std::cmp::Ordering::Equal => { return_vec.append(&mut deepest_errors); From 29da1ad5c7e2093c238602300bcd86ab2b2b91b2 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Wed, 18 Sep 2024 08:24:05 +0200 Subject: [PATCH 177/214] Remove debug printing --- nemo/src/parser/error.rs | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index 9f07d5d01..c08be50f9 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -105,25 +105,23 @@ pub(crate) fn report_error<'a>( }, context: vec![], }, - GenericErrorTree::Stack { base, contexts } => { - dbg!(&base); - dbg!(&contexts); - ParserError { - position: CharacterPosition { - offset: contexts[0].0.span.0.location_offset(), - line: contexts[0].0.span.0.location_line(), - column: contexts[0].0.span.0.get_utf8_column() as u32, - }, - context: match contexts[0].1 { - StackContext::Kind(_) => todo!(), - StackContext::Context(ctx) => { - vec![ctx] - } - }, - } - } - GenericErrorTree::Alt(vec) => { - dbg!(&vec); + GenericErrorTree::Stack { + base: _base, + contexts, + } => ParserError { + position: CharacterPosition { + offset: contexts[0].0.span.0.location_offset(), + line: contexts[0].0.span.0.location_line(), + column: contexts[0].0.span.0.get_utf8_column() as u32, + }, + context: match contexts[0].1 { + StackContext::Kind(_) => todo!(), + StackContext::Context(ctx) => { + vec![ctx] + } + }, + }, + GenericErrorTree::Alt(_vec) => { todo!() } }; From 703dfa767a447270cf6e74a2d80c74d5b5370bfb Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 18 Sep 2024 11:56:05 +0200 Subject: [PATCH 178/214] Make enclose function more robust --- nemo/src/parser/ast/program.rs | 2 ++ nemo/src/parser/span.rs | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 952576b40..34500cc25 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -151,7 +151,9 @@ mod test { assert_eq!(result.1.statements.len(), 4); } + // TODO: This test cases causes a warning in miri #[test] + #[cfg_attr(miri, ignore)] fn parser_recover() { let program = "%! Top-level comment\n\ % Declarations:\n\ diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index bc4bb0e0e..4613c1e03 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -122,11 +122,14 @@ impl<'a> Span<'a> { /// TODO: Description and Specify safety conditions and verify that this is correct pub fn enclose(&self, first: &Self, second: &Self) -> Self { unsafe { + let slice_length = + second.0.location_offset() + second.0.len() - first.0.location_offset(); + let slice_beginning = first.0.location_offset() - self.0.location_offset(); + Self(LocatedSpan::new_from_raw_offset( first.0.location_offset(), first.0.location_line(), - &self.0 - [..(second.0.location_offset() + second.0.len() - first.0.location_offset())], + &self.0[slice_beginning..(slice_beginning + slice_length)], (), )) } From d7bd8e23b61606b8030a8bc2f3e3a73367098ae1 Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Wed, 18 Sep 2024 15:03:36 +0200 Subject: [PATCH 179/214] LSP: Better DocumentSymbols; Syntax Highlighting --- Cargo.lock | 2 + nemo-language-server/Cargo.toml | 2 + nemo-language-server/src/language_server.rs | 92 +++++++++++++++++-- .../src/language_server/lsp_component.rs | 27 +++--- .../src/language_server/token_type.rs | 59 ++++++++++++ 5 files changed, 162 insertions(+), 20 deletions(-) create mode 100644 nemo-language-server/src/language_server/token_type.rs diff --git a/Cargo.lock b/Cargo.lock index 68854a7c4..f5afbfa5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1534,6 +1534,8 @@ dependencies = [ "futures", "line-index", "nemo", + "strum", + "strum_macros", "tokio", "tower-lsp", "tower-service", diff --git a/nemo-language-server/Cargo.toml b/nemo-language-server/Cargo.toml index 6f0c258dd..acfd86e06 100644 --- a/nemo-language-server/Cargo.toml +++ b/nemo-language-server/Cargo.toml @@ -26,6 +26,8 @@ anyhow = "1.0" line-index = "0.1.1" nemo = { path = "../nemo", default-features = false } futures = "0.3.21" +strum = "0.26.3" +strum_macros = "0.26.4" tokio = { version = "1.27.0", features = ["macros", "io-util", "rt-multi-thread"], optional = true } tower-lsp = { version = "0.20.0", default-features = false } tower-service = "0.3.2" diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index d3ffe77b2..db80776e5 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -1,6 +1,8 @@ mod lsp_component; mod nemo_position; +mod token_type; +use strum::IntoEnumIterator; use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::vec; @@ -16,14 +18,9 @@ use nemo::parser::{Parser, ParserErrorReport}; use nemo_position::{ lsp_position_to_nemo_position, nemo_range_to_lsp_range, PositionConversionError, }; +use token_type::TokenType; use tower_lsp::lsp_types::{ - Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, DocumentChangeOperation, - DocumentChanges, DocumentSymbol, DocumentSymbolOptions, DocumentSymbolParams, - DocumentSymbolResponse, InitializeParams, InitializeResult, InitializedParams, Location, - MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, PrepareRenameResponse, Range, - ReferenceParams, RenameOptions, RenameParams, ServerCapabilities, TextDocumentEdit, - TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, TextEdit, Url, - VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit, + Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, DocumentChangeOperation, DocumentChanges, DocumentSymbol, DocumentSymbolOptions, DocumentSymbolParams, DocumentSymbolResponse, InitializeParams, InitializeResult, InitializedParams, Location, MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, Position, PrepareRenameResponse, Range, ReferenceParams, RenameOptions, RenameParams, SemanticToken, SemanticTokens, SemanticTokensFullOptions, SemanticTokensLegend, SemanticTokensOptions, SemanticTokensParams, SemanticTokensResult, SemanticTokensServerCapabilities, ServerCapabilities, TextDocumentEdit, TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, TextEdit, Url, VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit }; use tower_lsp::{Client, LanguageServer}; @@ -193,6 +190,20 @@ impl LanguageServer for Backend { ..Default::default() }, })), + semantic_tokens_provider: Some( + SemanticTokensServerCapabilities::SemanticTokensOptions( + SemanticTokensOptions { + legend: SemanticTokensLegend { + token_types: TokenType::iter() + .map(TokenType::to_semantic_token_type) + .collect(), + token_modifiers: vec![], + }, + full: Some(SemanticTokensFullOptions::Bool(true)), + ..Default::default() + }, + ), + ), ..Default::default() }, ..Default::default() @@ -324,6 +335,54 @@ impl LanguageServer for Backend { Ok(Some(DocumentSymbolResponse::Nested(document_symbols))) } + async fn semantic_tokens_full( + &self, + params: SemanticTokensParams, + ) -> tower_lsp::jsonrpc::Result> { + let info = self + .read_text_document_info(¶ms.text_document.uri) + .await + .map_err(jsonrpc_error)?; + + let text = info.text; + let line_index = LineIndex::new(&text); + + let (program, _): (Program, Option) = + Parser::initialize(&text, params.text_document.uri.to_string()) + .parse() + .map(|prg| (prg, None)) + .unwrap_or_else(|(prg, err)| (*prg, Some(err))); + + let token_types_with_ranges = ast_node_to_semantic_tokens(&line_index, &program); + + Ok(Some(SemanticTokensResult::Tokens(SemanticTokens { + result_id: None, + data: token_types_with_ranges + .into_iter() + .scan(Position::new(0, 0), |last_pos, (token_type, range)| { + let delta_line = range.start.line - last_pos.line; + let result: SemanticToken = SemanticToken { + delta_line, + delta_start: if delta_line == 0 { + range.start.character - last_pos.character + } else { + range.start.character + }, + length: if range.start.line == range.end.line { + range.end.character - range.start.character + } else { + range.end.character + }, + token_type: token_type as u32, + token_modifiers_bitset: 0, + }; + *last_pos = range.start; + Some(result) + }) + .collect(), + }))) + } + /// Finds references to symbol that was renamed and sends edit operations to language client async fn rename( &self, @@ -598,3 +657,22 @@ fn ast_node_to_document_symbol<'a>( Ok(children) } } + +fn ast_node_to_semantic_tokens<'a>( + line_index: &LineIndex, + node: &'a dyn ProgramAST<'a>, +) -> Vec<(TokenType, Range)> { + if let Some(token_type) = TokenType::from_parser_context(node.context()) { + let range_res = nemo_range_to_lsp_range(line_index, node.span().range()); + if let Ok(range) = range_res { + vec![(token_type, range)] + } else { + vec![] + } + } else { + node.children() + .into_iter() + .flat_map(|child| ast_node_to_semantic_tokens(line_index, child)) + .collect() + } +} diff --git a/nemo-language-server/src/language_server/lsp_component.rs b/nemo-language-server/src/language_server/lsp_component.rs index 3b78e79a1..f35946144 100644 --- a/nemo-language-server/src/language_server/lsp_component.rs +++ b/nemo-language-server/src/language_server/lsp_component.rs @@ -86,6 +86,16 @@ where fn symbol_info(&self) -> Option { let kind = match self.context() { + ParserContext::Program => return Some(LSPSymbolInfo {kind: SymbolKind::FILE, name: "Program".to_string()}), + ParserContext::Rule => return Some(LSPSymbolInfo {kind: SymbolKind::CLASS, name: "Rule".to_string()}), + ParserContext::Base => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Base".to_string()}), + ParserContext::Declare => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Declare".to_string()}), + ParserContext::Import => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Import".to_string()}), + ParserContext::Export => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Export".to_string()}), + ParserContext::Prefix => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Prefix".to_string()}), + ParserContext::Output => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Output".to_string()}), + + ParserContext::Atom => Some(SymbolKind::FIELD), ParserContext::DataType => Some(SymbolKind::TYPE_PARAMETER), ParserContext::Variable => Some(SymbolKind::VARIABLE), ParserContext::Iri => Some(SymbolKind::STRING), @@ -96,25 +106,16 @@ where ParserContext::Number => Some(SymbolKind::NUMBER), ParserContext::String => Some(SymbolKind::STRING), ParserContext::Boolean => Some(SymbolKind::BOOLEAN), - ParserContext::Base - | ParserContext::Declare - | ParserContext::Export - | ParserContext::Import - | ParserContext::Output - | ParserContext::Prefix - | ParserContext::UnknownDirective => Some(SymbolKind::PROPERTY), ParserContext::Arithmetic - | ParserContext::Negation - | ParserContext::AggregationTag - | ParserContext::OperationTag - | ParserContext::Infix => Some(SymbolKind::OPERATOR), - ParserContext::Program => Some(SymbolKind::FILE), + | ParserContext::Operation + | ParserContext::Aggregation + | ParserContext::Negation => Some(SymbolKind::OPERATOR), _ => None, }; kind.map(|kind| LSPSymbolInfo { kind, - name: format!("{}: {}", self.context().name(), self.span().0.fragment()), + name: format!("{}", self.span().0.fragment()), }) } diff --git a/nemo-language-server/src/language_server/token_type.rs b/nemo-language-server/src/language_server/token_type.rs new file mode 100644 index 000000000..db4e23844 --- /dev/null +++ b/nemo-language-server/src/language_server/token_type.rs @@ -0,0 +1,59 @@ +use strum_macros::EnumIter; +use nemo::parser::context::ParserContext; +use tower_lsp::lsp_types::SemanticTokenType; + +#[derive(Copy, Clone, EnumIter)] +#[repr(u32)] +pub(super) enum TokenType { + Type, + Variable, + String, + Function, + Number, + Bool, + Property, + Operator, + Comment, +} + +impl TokenType { + pub(super) fn from_parser_context(ctx: ParserContext) -> Option { + match ctx { + ParserContext::DataType => Some(TokenType::Type), + ParserContext::Variable => Some(TokenType::Variable), + ParserContext::Iri | ParserContext::Constant | ParserContext::RdfLiteral | ParserContext::Blank | ParserContext::String => Some(TokenType::String), + ParserContext::StructureTag => Some(TokenType::Function), + ParserContext::Number => Some(TokenType::Number), + ParserContext::Boolean => Some(TokenType::Bool), + //ParserContext::Base + //| ParserContext::Declare + //| ParserContext::Export + //| ParserContext::Import + //| ParserContext::Output + //| ParserContext::Prefix + //| ParserContext::UnknownDirective => Some(TokenType::Property), + ParserContext::Negation + | ParserContext::AggregationTag + | ParserContext::OperationTag + | ParserContext::Infix => Some(TokenType::Operator), + ParserContext::Comment | ParserContext::DocComment | ParserContext::TopLevelComment => { + Some(TokenType::Comment) + } + _ => None, + } + } + + pub(super) fn to_semantic_token_type(self) -> SemanticTokenType { + match self { + Self::Type => SemanticTokenType::TYPE, + Self::Variable => SemanticTokenType::VARIABLE, + Self::String => SemanticTokenType::STRING, + Self::Function => SemanticTokenType::FUNCTION, + Self::Number => SemanticTokenType::NUMBER, + Self::Bool => SemanticTokenType::new("bool"), + Self::Property => SemanticTokenType::PROPERTY, + Self::Operator => SemanticTokenType::OPERATOR, + Self::Comment => SemanticTokenType::COMMENT, + } + } +} From 21be7fb31a227a89731c892cb35f9197d54f4052 Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Wed, 18 Sep 2024 15:26:14 +0200 Subject: [PATCH 180/214] Add comments to language server functions --- nemo-language-server/src/language_server.rs | 18 ++++++++++++++++++ .../src/language_server/token_type.rs | 14 +++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index db80776e5..aa737c538 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -76,6 +76,7 @@ impl Backend { } } + /// Parses the Nemo Program and returns errors with their respective positions async fn handle_change( &self, text_document: VersionedTextDocumentIdentifier, @@ -168,6 +169,7 @@ impl Backend { #[tower_lsp::async_trait] impl LanguageServer for Backend { + /// Called when the language server is started from the client async fn initialize( &self, _: InitializeParams, @@ -210,12 +212,14 @@ impl LanguageServer for Backend { }) } + /// Called when initialization finished on the client side async fn initialized(&self, _: InitializedParams) { self.client .log_message(MessageType::INFO, "server initialized") .await; } + /// Called when a document is opened on the client side async fn did_open(&self, params: DidOpenTextDocumentParams) { if let Err(error) = self .handle_change( @@ -236,6 +240,7 @@ impl LanguageServer for Backend { } } + /// Called when the opened document changes async fn did_change(&self, params: DidChangeTextDocumentParams) { if let Err(error) = self .handle_change( @@ -256,6 +261,7 @@ impl LanguageServer for Backend { } } + /// Called when the client requests references of the symbol under the cursor async fn references( &self, params: ReferenceParams, @@ -308,6 +314,7 @@ impl LanguageServer for Backend { Ok(Some(locations)) } + /// Returns all DocumentSymbols in the Nemo Program, which are used to produce an outline async fn document_symbol( &self, params: DocumentSymbolParams, @@ -335,6 +342,7 @@ impl LanguageServer for Backend { Ok(Some(DocumentSymbolResponse::Nested(document_symbols))) } + /// Called to receive syntax highlighting information async fn semantic_tokens_full( &self, params: SemanticTokensParams, @@ -500,6 +508,7 @@ impl LanguageServer for Backend { } } +/// Associates a ProgramAST node with its corresponding range in the LSP world fn node_with_range<'a>( line_index: &LineIndex, node: &'a dyn ProgramAST<'a>, @@ -559,6 +568,7 @@ fn node_path_deepest_identifier<'a>( }); } +/// Finds all children of the given node (potentially the node itself) that match the identifier fn find_by_identifier<'a>( node: &'a dyn ProgramAST<'a>, identifier: &(ParserContext, String), @@ -570,6 +580,7 @@ fn find_by_identifier<'a>( references } +/// Actual implementation of [`find_by_identifier`] fn find_by_identifier_recurse<'a>( node: &'a dyn ProgramAST<'a>, identifier: &(ParserContext, String), @@ -588,6 +599,7 @@ fn find_by_identifier_recurse<'a>( } } +/// Returns the path of AST nodes that lead to a given position from a given node fn find_in_ast<'a>( node: &'a Program<'a>, position: CharacterPosition, @@ -599,6 +611,7 @@ fn find_in_ast<'a>( path } +/// Actual implementation of [`find_in_ast`] fn find_in_ast_recurse<'a>( node: &'a dyn ProgramAST<'a>, position: CharacterPosition, @@ -615,6 +628,8 @@ fn find_in_ast_recurse<'a>( } } +/// Turns a given AST node into a DocumentSymbol to show in the outline; the DocumentSymbol has a +/// tree structure in itself so this function calls itself recursively. fn ast_node_to_document_symbol<'a>( line_index: &LineIndex, node: &'a dyn ProgramAST<'a>, @@ -658,6 +673,9 @@ fn ast_node_to_document_symbol<'a>( } } +/// Returns syntax highlighting information for all children of the given node including the node +/// itself. Once a child has syntax highlighting information associated with it, the recursion does +/// not go any deeper. fn ast_node_to_semantic_tokens<'a>( line_index: &LineIndex, node: &'a dyn ProgramAST<'a>, diff --git a/nemo-language-server/src/language_server/token_type.rs b/nemo-language-server/src/language_server/token_type.rs index db4e23844..fff15666d 100644 --- a/nemo-language-server/src/language_server/token_type.rs +++ b/nemo-language-server/src/language_server/token_type.rs @@ -2,6 +2,7 @@ use strum_macros::EnumIter; use nemo::parser::context::ParserContext; use tower_lsp::lsp_types::SemanticTokenType; +/// All syntax highlighting types that are used in Nemo programs #[derive(Copy, Clone, EnumIter)] #[repr(u32)] pub(super) enum TokenType { @@ -17,6 +18,8 @@ pub(super) enum TokenType { } impl TokenType { + /// ParserContext (i.e. AST node types) are mapped to syntax highlighting types or None if they + /// shall not be highlighted pub(super) fn from_parser_context(ctx: ParserContext) -> Option { match ctx { ParserContext::DataType => Some(TokenType::Type), @@ -25,13 +28,6 @@ impl TokenType { ParserContext::StructureTag => Some(TokenType::Function), ParserContext::Number => Some(TokenType::Number), ParserContext::Boolean => Some(TokenType::Bool), - //ParserContext::Base - //| ParserContext::Declare - //| ParserContext::Export - //| ParserContext::Import - //| ParserContext::Output - //| ParserContext::Prefix - //| ParserContext::UnknownDirective => Some(TokenType::Property), ParserContext::Negation | ParserContext::AggregationTag | ParserContext::OperationTag @@ -39,10 +35,14 @@ impl TokenType { ParserContext::Comment | ParserContext::DocComment | ParserContext::TopLevelComment => { Some(TokenType::Comment) } + // TODO: imports, base, etc. (everything starting with @ should be handled via + // ParserContext::Token {} but this requires changes to the children method of the AST + // nodes in the nemo crate) _ => None, } } + /// The TokenType is translated to a syntax highlighting type that is understood by the LSP pub(super) fn to_semantic_token_type(self) -> SemanticTokenType { match self { Self::Type => SemanticTokenType::TYPE, From 3ca57009ce63524bd3a0dc6454c7b6b97faf4a8c Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 18 Sep 2024 15:55:08 +0200 Subject: [PATCH 181/214] Add tracing for simple min and max aggregates --- nemo-cli/src/main.rs | 2 +- nemo-physical/src/tabular/operations.rs | 10 +++ nemo-python/src/lib.rs | 2 +- nemo-wasm/src/lib.rs | 10 ++- nemo/src/error.rs | 7 +- nemo/src/execution/execution_engine.rs | 27 ++++---- nemo/src/execution/planning/plan_tracing.rs | 64 ++++++++++++++++--- nemo/src/execution/tracing.rs | 1 + nemo/src/execution/tracing/error.rs | 14 ++++ .../rule_model/components/term/operation.rs | 6 +- .../term/operation/operation_kind.rs | 14 ++-- 11 files changed, 122 insertions(+), 35 deletions(-) create mode 100644 nemo/src/execution/tracing/error.rs diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 840166a53..6f4fe512e 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -200,7 +200,7 @@ fn handle_tracing( facts.push(fact); } - let (trace, handles) = engine.trace(program, facts); + let (trace, handles) = engine.trace(program, facts)?; match &cli.tracing.output_file { Some(output_file) => { diff --git a/nemo-physical/src/tabular/operations.rs b/nemo-physical/src/tabular/operations.rs index 3f5cd8e48..9cd16a864 100644 --- a/nemo-physical/src/tabular/operations.rs +++ b/nemo-physical/src/tabular/operations.rs @@ -246,6 +246,16 @@ where pub fn get<'a>(&'a self, marker: &ExternalMarker) -> Option<&'a OperationColumnMarker> { self.map.get(marker) } + + /// Given a [OperationColumnMarker], find the corresponding [ExternalMarker]. + pub fn find<'a>(&'a self, marker: &OperationColumnMarker) -> Option<&'a ExternalMarker> { + Some( + self.map + .iter() + .find(|(_, &operation)| operation == *marker)? + .0, + ) + } } /// Trait for objects that are able to generate [TrieScanEnum], diff --git a/nemo-python/src/lib.rs b/nemo-python/src/lib.rs index 3b8d9fdec..ca2097adf 100644 --- a/nemo-python/src/lib.rs +++ b/nemo-python/src/lib.rs @@ -418,7 +418,7 @@ impl NemoEngine { let mut builder = ValidationErrorBuilder::default(); fact.validate(&mut builder)?; - let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![fact]); + let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![fact]).ok()?; let handle = *handles .first() .expect("Function trace always returns a handle for each input fact"); diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index a370f2a58..b4360d002 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -401,7 +401,9 @@ impl NemoEngine { let (trace, handles) = self .engine - .trace(self.program.0.clone(), vec![fact_to_trace]); + .trace(self.program.0.clone(), vec![fact_to_trace]) + .map_err(WasmOrInternalNemoError::Nemo) + .map_err(NemoError)?; Ok(Some((trace, handles))) } else { @@ -459,7 +461,11 @@ impl NemoEngine { .map_err(WasmOrInternalNemoError::ComponentParse) .map_err(NemoError)?; - let (trace, handles) = self.engine.trace(self.program.0.clone(), vec![parsed_fact]); + let (trace, handles) = self + .engine + .trace(self.program.0.clone(), vec![parsed_fact]) + .map_err(WasmOrInternalNemoError::Nemo) + .map_err(NemoError)?; Ok(Some((trace, handles))) } diff --git a/nemo/src/error.rs b/nemo/src/error.rs index 1fbb99230..c4e56cb14 100644 --- a/nemo/src/error.rs +++ b/nemo/src/error.rs @@ -7,7 +7,9 @@ use thiserror::Error; use crate::{ chase_model::analysis::program_analysis::RuleAnalysisError, - execution::selection_strategy::strategy::SelectionStrategyError, + execution::{ + selection_strategy::strategy::SelectionStrategyError, tracing::error::TracingError, + }, }; pub use nemo_physical::error::ReadingError; @@ -25,6 +27,9 @@ pub enum Error { /// Error occurred during parsing #[error("error while parsing program")] ProgramParseError, + /// Error occurred during tracing + #[error(transparent)] + TracingError(#[from] TracingError), /// IO Error #[error(transparent)] IO(#[from] std::io::Error), diff --git a/nemo/src/execution/execution_engine.rs b/nemo/src/execution/execution_engine.rs index 500359418..907a431ca 100644 --- a/nemo/src/execution/execution_engine.rs +++ b/nemo/src/execution/execution_engine.rs @@ -36,7 +36,10 @@ use crate::{ use super::{ rule_execution::RuleExecution, selection_strategy::strategy::RuleSelectionStrategy, - tracing::trace::{ExecutionTrace, TraceFactHandle, TraceRuleApplication, TraceStatus}, + tracing::{ + error::TracingError, + trace::{ExecutionTrace, TraceFactHandle, TraceRuleApplication, TraceStatus}, + }, }; // Number of tables that are periodically combined into one. @@ -317,11 +320,11 @@ impl ExecutionEngine { program: &ChaseProgram, trace: &mut ExecutionTrace, fact: GroundAtom, - ) -> TraceFactHandle { + ) -> Result { let trace_handle = trace.register_fact(fact.clone()); if trace.status(trace_handle).is_known() { - return trace_handle; + return Ok(trace_handle); } // Find the origin of the given fact @@ -334,14 +337,14 @@ impl ExecutionEngine { // If the table manager does not know the predicate of the fact // then it could not have been derived trace.update_status(trace_handle, TraceStatus::Fail); - return trace_handle; + return Ok(trace_handle); } }; if step == 0 { // If a fact was derived in step 0 it must have been given as an EDB fact trace.update_status(trace_handle, TraceStatus::Success(TraceDerivation::Input)); - return trace_handle; + return Ok(trace_handle); } // Rule index of the rule that was applied to derive the given fact @@ -399,7 +402,7 @@ impl ExecutionEngine { let rule = self.program.rules()[rule_index].clone(); let analysis = &self.analysis.rule_analysis[rule_index]; let mut variable_order = analysis.promising_variable_orders[0].clone(); // TODO: This selection is arbitrary - let trace_strategy = TracingStrategy::initialize(&rule, grounding); + let trace_strategy = TracingStrategy::initialize(&rule, grounding)?; let mut execution_plan = SubtableExecutionPlan::default(); @@ -436,7 +439,7 @@ impl ExecutionEngine { let next_fact = GroundAtom::new(next_fact_predicate, next_fact_terms); - let next_handle = self.trace_recursive(program, trace, next_fact); + let next_handle = self.trace_recursive(program, trace, next_fact)?; if trace.status(next_handle).is_success() { subtraces.push(next_handle); @@ -461,14 +464,14 @@ impl ExecutionEngine { let derivation = TraceDerivation::Derived(rule_application, subtraces); trace.update_status(trace_handle, TraceStatus::Success(derivation)); - return trace_handle; + return Ok(trace_handle); } else { continue; } } trace.update_status(trace_handle, TraceStatus::Fail); - trace_handle + Ok(trace_handle) } /// Build an [ExecutionTrace] for a list of facts. @@ -479,7 +482,7 @@ impl ExecutionEngine { &mut self, program: Program, facts: Vec, - ) -> (ExecutionTrace, Vec) { + ) -> Result<(ExecutionTrace, Vec), Error> { let mut trace = ExecutionTrace::new(program); let chase_program = self.program.clone(); @@ -488,9 +491,9 @@ impl ExecutionEngine { for fact in facts { let chase_fact = ProgramChaseTranslation::new().build_fact(&fact); - handles.push(self.trace_recursive(&chase_program, &mut trace, chase_fact)); + handles.push(self.trace_recursive(&chase_program, &mut trace, chase_fact)?); } - (trace, handles) + Ok((trace, handles)) } } diff --git a/nemo/src/execution/planning/plan_tracing.rs b/nemo/src/execution/planning/plan_tracing.rs index 070342e12..7a264555a 100644 --- a/nemo/src/execution/planning/plan_tracing.rs +++ b/nemo/src/execution/planning/plan_tracing.rs @@ -10,14 +10,16 @@ use crate::{ components::{ atom::variable_atom::VariableAtom, filter::ChaseFilter, + operation::ChaseOperation, rule::ChaseRule, term::operation_term::{Operation, OperationTerm}, }, }, - execution::rule_execution::VariableTranslation, + execution::{rule_execution::VariableTranslation, tracing::error::TracingError}, rule_model::components::{ tag::Tag, term::{ + aggregate::AggregateKind, operation::operation_kind::OperationKind, primitive::{variable::Variable, Primitive}, }, @@ -26,29 +28,49 @@ use crate::{ table_manager::{SubtableExecutionPlan, SubtableIdentifier, TableManager}, }; -use super::operations::{filter::node_filter, join::node_join, negation::node_negation}; +use super::operations::{ + filter::node_filter, functions::node_functions, join::node_join, negation::node_negation, +}; /// Implementation of the semi-naive existential rule evaluation strategy. #[derive(Debug)] pub(crate) struct TracingStrategy { positive_atoms: Vec, positive_filters: Vec, + positive_operations: Vec, negative_atoms: Vec, - negatie_filters: Vec>, + negative_filters: Vec>, variable_translation: VariableTranslation, } impl TracingStrategy { /// Create new [TracingStrategy] object. - pub(crate) fn initialize(rule: &ChaseRule, grounding: HashMap) -> Self { + pub(crate) fn initialize( + rule: &ChaseRule, + grounding: HashMap, + ) -> Result { + if let Some(aggregate) = rule.aggregate() { + match aggregate.aggregate_kind() { + AggregateKind::CountValues | AggregateKind::SumOfNumbers => { + return Err(TracingError::UnsupportedFeatureNonMinMaxAggregation) + } + AggregateKind::MinNumber | AggregateKind::MaxNumber => {} + } + } + + if !rule.aggregate_operations().is_empty() || !rule.aggregate_filters().is_empty() { + return Err(TracingError::UnsupportedFeatureComplexAggregates); + } + let mut variable_translation = VariableTranslation::new(); for variable in rule.variables().cloned() { variable_translation.add_marker(variable); } let mut positive_filters = rule.positive_filters().clone(); + let positive_operations = rule.positive_operations().clone(); let operations = rule .positive_operations() @@ -56,7 +78,7 @@ impl TracingStrategy { .map(|operation| (operation.variable().clone(), operation.operation().clone())) .collect::>(); - for (variable, value) in grounding { + for (mut variable, value) in grounding { if let Some(term) = operations.get(&variable) { let filter = ChaseFilter::new(OperationTerm::Operation(Operation::new( OperationKind::Equal, @@ -67,6 +89,12 @@ impl TracingStrategy { ))); positive_filters.push(filter); } else { + if let Some(aggregate) = rule.aggregate() { + if &variable == aggregate.output_variable() { + variable = aggregate.input_variable().clone(); + } + } + let filter = ChaseFilter::new(OperationTerm::Operation(Operation::new( OperationKind::Equal, vec![ @@ -74,17 +102,19 @@ impl TracingStrategy { OperationTerm::Primitive(Primitive::from(value)), ], ))); + positive_filters.push(filter); } } - Self { + Ok(Self { positive_atoms: rule.positive_body().clone(), positive_filters, + positive_operations, negative_atoms: rule.negative_body().clone(), - negatie_filters: rule.negative_filters().clone(), + negative_filters: rule.negative_filters().clone(), variable_translation, - } + }) } pub(crate) fn add_plan( @@ -107,10 +137,17 @@ impl TracingStrategy { join_output_markers, ); - let node_filter = node_filter( + let node_body_functions = node_functions( current_plan.plan_mut(), &self.variable_translation, node_join, + &self.positive_operations, + ); + + let node_filter = node_filter( + current_plan.plan_mut(), + &self.variable_translation, + node_body_functions, &self.positive_filters, ); @@ -121,7 +158,7 @@ impl TracingStrategy { node_filter, step_number, &self.negative_atoms, - &self.negatie_filters, + &self.negative_filters, ); current_plan.add_permanent_table( @@ -131,6 +168,13 @@ impl TracingStrategy { SubtableIdentifier::new(Tag::new(String::from("_TRACING")), step_number), ); + *variable_order = VariableOrder::default(); + for marker in node_negation.markers_cloned() { + if let Some(variable) = self.variable_translation.find(&marker) { + variable_order.push(variable.clone()); + } + } + node_negation } } diff --git a/nemo/src/execution/tracing.rs b/nemo/src/execution/tracing.rs index 70f4f7ce0..007ad031d 100644 --- a/nemo/src/execution/tracing.rs +++ b/nemo/src/execution/tracing.rs @@ -1,3 +1,4 @@ //! This module contains functionality for keeping track of the origings of derived facts +pub mod error; pub mod trace; diff --git a/nemo/src/execution/tracing/error.rs b/nemo/src/execution/tracing/error.rs new file mode 100644 index 000000000..23cc49005 --- /dev/null +++ b/nemo/src/execution/tracing/error.rs @@ -0,0 +1,14 @@ +//! This module defines [TracingError]. + +use thiserror::Error; + +/// Error that can occur while tracing. +#[derive(Debug, Error, Copy, Clone)] +pub enum TracingError { + /// Error when tracing rules with count and sum aggregate + #[error("tracing is only supported for #min and #max aggregation")] + UnsupportedFeatureNonMinMaxAggregation, + /// Error when tracing over aggregates that involve arithmetic + #[error("tracing not supported for aggregates combined with arithmetic")] + UnsupportedFeatureComplexAggregates, +} diff --git a/nemo/src/rule_model/components/term/operation.rs b/nemo/src/rule_model/components/term/operation.rs index 2d8069fbc..998805ea8 100644 --- a/nemo/src/rule_model/components/term/operation.rs +++ b/nemo/src/rule_model/components/term/operation.rs @@ -126,7 +126,7 @@ impl Operation { self.format_braces_priority(f, term)?; if index < terms.len() - 1 { - f.write_str(delimiter)?; + f.write_fmt(format_args!("{delimiter} "))?; } } @@ -140,7 +140,9 @@ impl Operation { OperationKind::NumericSum => "+", OperationKind::NumericSubtraction => "-", OperationKind::NumericProduct => "*", - &OperationKind::NumericDivision => "/", + OperationKind::NumericDivision => "/", + OperationKind::Equal => "=", + OperationKind::Unequals => "!=", _ => return None, }) } diff --git a/nemo/src/rule_model/components/term/operation/operation_kind.rs b/nemo/src/rule_model/components/term/operation/operation_kind.rs index b79f175d0..90f1bae49 100644 --- a/nemo/src/rule_model/components/term/operation/operation_kind.rs +++ b/nemo/src/rule_model/components/term/operation/operation_kind.rs @@ -364,14 +364,16 @@ pub enum OperationKind { } impl OperationKind { - /// Precendence of operations for display purposes. + /// Precedence of operations for display purposes. pub(crate) fn precedence(&self) -> usize { match &self { - Self::NumericSum => 1, - Self::NumericSubtraction => 1, - Self::NumericProduct => 2, - Self::NumericDivision => 2, - _ => 3, + Self::NumericSum => 2, + Self::NumericSubtraction => 2, + Self::NumericProduct => 3, + Self::NumericDivision => 3, + Self::Equal => 0, + Self::Unequals => 0, + _ => 1, } } } From 2af956aac321c849f37dd8dbf941da5cbb093ac9 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 18 Sep 2024 16:01:14 +0200 Subject: [PATCH 182/214] Fix doc --- nemo-physical/src/tabular/operations.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo-physical/src/tabular/operations.rs b/nemo-physical/src/tabular/operations.rs index 9cd16a864..0f228aaef 100644 --- a/nemo-physical/src/tabular/operations.rs +++ b/nemo-physical/src/tabular/operations.rs @@ -247,7 +247,7 @@ where self.map.get(marker) } - /// Given a [OperationColumnMarker], find the corresponding [ExternalMarker]. + /// Given a [OperationColumnMarker], find the corresponding external marker. pub fn find<'a>(&'a self, marker: &OperationColumnMarker) -> Option<&'a ExternalMarker> { Some( self.map From d27979753b39f5b9f3a7abd1caee6f9e4b4170c4 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 18 Sep 2024 15:42:23 +0200 Subject: [PATCH 183/214] create closed public interface for Span --- nemo/src/io/formats/dsv/value_format.rs | 4 +- nemo/src/parser/ast.rs | 12 +- nemo/src/parser/ast/comment/closed.rs | 2 +- nemo/src/parser/ast/comment/doc.rs | 2 +- nemo/src/parser/ast/comment/line.rs | 2 +- nemo/src/parser/ast/comment/toplevel.rs | 2 +- nemo/src/parser/ast/directive/unknown.rs | 2 +- .../src/parser/ast/expression/basic/number.rs | 6 +- nemo/src/parser/ast/tag/aggregation.rs | 2 +- nemo/src/parser/ast/token.rs | 26 ++-- nemo/src/parser/error.rs | 22 +-- nemo/src/parser/input.rs | 52 ++++---- nemo/src/parser/span.rs | 125 +++++++++++++++++- .../rule_model/translation/basic/variable.rs | 4 +- 14 files changed, 183 insertions(+), 80 deletions(-) diff --git a/nemo/src/io/formats/dsv/value_format.rs b/nemo/src/io/formats/dsv/value_format.rs index 208f3947f..97336f2e8 100644 --- a/nemo/src/io/formats/dsv/value_format.rs +++ b/nemo/src/io/formats/dsv/value_format.rs @@ -203,7 +203,7 @@ impl DsvValueFormat { // Check if it's a valid tag name let parser_input = ParserInput::new(input, ParserState::default()); if let Ok((rest, _)) = Token::name(parser_input) { - if rest.span.0.is_empty() { + if rest.span.fragment().is_empty() { return Ok(AnyDataValue::new_iri(input.to_string())); } } @@ -211,7 +211,7 @@ impl DsvValueFormat { // Might still be a full IRI let parser_input = ParserInput::new(input, ParserState::default()); if let Ok((rest, iri)) = Token::iri(parser_input) { - if rest.span.0.is_empty() { + if rest.span.fragment().is_empty() { return Ok(AnyDataValue::new_iri(iri.to_string())); } } diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index 733d64053..cc1f3eb06 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -44,18 +44,18 @@ pub(crate) fn ast_to_ascii_tree<'a>(node: &'a dyn ProgramAST<'a>) -> Tree { } else { "\x1b[92m" }; - let fragment = *node.span().0.fragment(); - let str = if fragment.len() > 60 { - format!("{:?}[…]", &fragment[0..60]) + let span = node.span(); + let str = if span.fragment().len() > 60 { + format!("{:?}[…]", &span.fragment()[0..60]) } else { - format!("{:?}", fragment) + format!("{:?}", span.fragment()) }; Tree::Node( format!( "{} \x1b[34m@{}:{} {colour}{str}\x1b[0m", node.context().name(), - node.span().0.location_line(), - node.span().0.get_utf8_column() + node.span().location_line(), + node.span().get_utf8_column() ), vec, ) diff --git a/nemo/src/parser/ast/comment/closed.rs b/nemo/src/parser/ast/comment/closed.rs index 638635ed8..fe01b52c1 100644 --- a/nemo/src/parser/ast/comment/closed.rs +++ b/nemo/src/parser/ast/comment/closed.rs @@ -30,7 +30,7 @@ impl<'a> ClosedComment<'a> { // owned value or not? /// Return the content of the comment pub fn content(&self) -> String { - self.content.0.to_string() + self.content.fragment().to_string() } } diff --git a/nemo/src/parser/ast/comment/doc.rs b/nemo/src/parser/ast/comment/doc.rs index 2cfa63b0d..075137f3d 100644 --- a/nemo/src/parser/ast/comment/doc.rs +++ b/nemo/src/parser/ast/comment/doc.rs @@ -31,7 +31,7 @@ impl<'a> DocComment<'a> { pub fn content(&self) -> Vec { self.content .iter() - .map(|comment| comment.0.to_string()) + .map(|comment| comment.fragment().to_string()) .collect() } } diff --git a/nemo/src/parser/ast/comment/line.rs b/nemo/src/parser/ast/comment/line.rs index 37e2a27c5..89058e00f 100644 --- a/nemo/src/parser/ast/comment/line.rs +++ b/nemo/src/parser/ast/comment/line.rs @@ -30,7 +30,7 @@ const CONTEXT: ParserContext = ParserContext::Comment; impl<'a> LineComment<'a> { /// Return the content of the comment pub fn content(&self) -> String { - self.content.0.to_string() + self.content.fragment().to_string() } } diff --git a/nemo/src/parser/ast/comment/toplevel.rs b/nemo/src/parser/ast/comment/toplevel.rs index 2cf163aed..3344ca383 100644 --- a/nemo/src/parser/ast/comment/toplevel.rs +++ b/nemo/src/parser/ast/comment/toplevel.rs @@ -33,7 +33,7 @@ impl<'a> TopLevelComment<'a> { pub fn content(&self) -> Vec { self.content .iter() - .map(|comment| comment.0.to_string()) + .map(|comment| comment.fragment().to_string()) .collect() } } diff --git a/nemo/src/parser/ast/directive/unknown.rs b/nemo/src/parser/ast/directive/unknown.rs index 0940fc82d..9f1eac7a2 100644 --- a/nemo/src/parser/ast/directive/unknown.rs +++ b/nemo/src/parser/ast/directive/unknown.rs @@ -45,7 +45,7 @@ impl<'a> UnknownDirective<'a> { /// Return the content of the directive. pub fn content(&self) -> String { - self.content.0.to_string() + self.content.fragment().to_string() } /// Parse the name of the directive. diff --git a/nemo/src/parser/ast/expression/basic/number.rs b/nemo/src/parser/ast/expression/basic/number.rs index 4c64ecb72..359372839 100644 --- a/nemo/src/parser/ast/expression/basic/number.rs +++ b/nemo/src/parser/ast/expression/basic/number.rs @@ -98,16 +98,16 @@ impl<'a> Number<'a> { /// Recreate the number string without the type marker. fn number_string(&self) -> String { - let integer = format!("{}{}", self.integer_sign.print(), self.integer.span().0); + let integer = format!("{}{}", self.integer_sign.print(), self.integer.span()); let fractional = if let Some(fractional) = &self.fractional { - format!(".{}", fractional.span().0) + format!(".{}", fractional.span()) } else { String::default() }; let exponent = if let Some((sign, exponent)) = &self.exponent { - format!("e{}{}", sign.print(), exponent.span().0) + format!("e{}{}", sign.print(), exponent.span()) } else { String::default() }; diff --git a/nemo/src/parser/ast/tag/aggregation.rs b/nemo/src/parser/ast/tag/aggregation.rs index 370b6591a..c0575474d 100644 --- a/nemo/src/parser/ast/tag/aggregation.rs +++ b/nemo/src/parser/ast/tag/aggregation.rs @@ -34,7 +34,7 @@ impl<'a> AggregationTag<'a> { /// Return a string representation of the content of this tag. pub fn content(&self) -> String { - self.span.0.to_string() + self.span.fragment().to_string() } } diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 3272ebe1a..d485fcc23 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -266,7 +266,7 @@ pub struct Token<'a> { impl<'a> Display for Token<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.span.0.fmt(f) + self.span.fmt(f) } } @@ -442,7 +442,7 @@ impl<'a> Token<'a> { // @baseerror would get matched and rest would be "error" and that will cause an // error. The desired behaviour is, that "baseerror" gets matched as a whole and // produces an [UnknownDirective]. - verify(Self::name, |tag| *tag.span.0.fragment() == directive::BASE), + verify(Self::name, |tag| tag.span.fragment() == directive::BASE), )(input) .map(|(rest, result)| { ( @@ -459,9 +459,7 @@ impl<'a> Token<'a> { context( ParserContext::token(TokenKind::DeclareDirective), // The reasoning behind using `verify` is the same as in the `directive_base` function. - verify(Self::name, |tag| { - *tag.span.0.fragment() == directive::DECLARE - }), + verify(Self::name, |tag| tag.span.fragment() == directive::DECLARE), )(input) .map(|(rest, result)| { ( @@ -477,9 +475,7 @@ impl<'a> Token<'a> { context( ParserContext::token(TokenKind::ExportDirective), // The reasoning behind using `verify` is the same as in the `directive_base` function. - verify(Self::name, |tag| { - *tag.span.0.fragment() == directive::EXPORT - }), + verify(Self::name, |tag| tag.span.fragment() == directive::EXPORT), )(input) .map(|(rest, result)| { ( @@ -495,9 +491,7 @@ impl<'a> Token<'a> { context( ParserContext::token(TokenKind::ImportDirective), // The reasoning behind using `verify` is the same as in the `directive_base` function. - verify(Self::name, |tag| { - *tag.span.0.fragment() == directive::IMPORT - }), + verify(Self::name, |tag| tag.span.fragment() == directive::IMPORT), )(input) .map(|(rest, result)| { ( @@ -513,9 +507,7 @@ impl<'a> Token<'a> { context( ParserContext::token(TokenKind::OutputDirective), // The reasoning behind using `verify` is the same as in the `directive_base` function. - verify(Self::name, |tag| { - *tag.span.0.fragment() == directive::OUTPUT - }), + verify(Self::name, |tag| tag.span.fragment() == directive::OUTPUT), )(input) .map(|(rest, result)| { ( @@ -531,9 +523,7 @@ impl<'a> Token<'a> { context( ParserContext::token(TokenKind::PrefixDirective), // The reasoning behind using `verify` is the same as in the `directive_base` function. - verify(Self::name, |tag| { - *tag.span.0.fragment() == directive::PREFIX - }), + verify(Self::name, |tag| tag.span.fragment() == directive::PREFIX), )(input) .map(|(rest, result)| { ( @@ -558,7 +548,7 @@ impl<'a> Token<'a> { tag(comment::DOC_COMMENT), tag(comment::COMMENT), )), - |result: &ParserInput| *result.span.0.fragment() != comment::DOC_COMMENT, + |result: &ParserInput| result.span.fragment() != comment::DOC_COMMENT, ), )(input) .map(|(rest, result)| { diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index c08be50f9..146a4dafd 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -68,7 +68,7 @@ pub(crate) fn recover<'a>( ) -> impl FnMut(ParserInput<'a>) -> ParserResult<'a, Statement<'a>> { move |input: ParserInput<'a>| match parser.parse(input.clone()) { Ok((rest, statement)) => Ok((rest, statement)), - Err(err) if input.span.0.is_empty() => Err(err), + Err(err) if input.span.fragment().is_empty() => Err(err), Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { let (rest_input, token) = skip_statement(input).expect("this parser cannot fail"); Ok(( @@ -90,7 +90,7 @@ pub(crate) fn report_error<'a>( move |input| match parser.parse(input.clone()) { Ok(result) => Ok(result), Err(e) => { - if input.span.0.is_empty() { + if input.span.fragment().is_empty() { return Err(e); }; match &e { @@ -99,9 +99,9 @@ pub(crate) fn report_error<'a>( let error = match err { GenericErrorTree::Base { location, .. } => ParserError { position: CharacterPosition { - offset: location.span.0.location_offset(), - line: location.span.0.location_line(), - column: location.span.0.get_utf8_column() as u32, + offset: location.span.location_offset(), + line: location.span.location_line(), + column: location.span.get_utf8_column() as u32, }, context: vec![], }, @@ -110,9 +110,9 @@ pub(crate) fn report_error<'a>( contexts, } => ParserError { position: CharacterPosition { - offset: contexts[0].0.span.0.location_offset(), - line: contexts[0].0.span.0.location_line(), - column: contexts[0].0.span.0.get_utf8_column() as u32, + offset: contexts[0].0.span.location_offset(), + line: contexts[0].0.span.location_line(), + column: contexts[0].0.span.get_utf8_column() as u32, }, context: match contexts[0].1 { StackContext::Kind(_) => todo!(), @@ -145,7 +145,7 @@ pub(crate) fn _transform_error_tree<'a, Output>( move |input| match parser.parse(input.clone()) { Ok(result) => Ok(result), Err(e) => { - if input.span.0.is_empty() { + if input.span.fragment().is_empty() { return Err(e); }; match &e { @@ -177,7 +177,7 @@ fn _context_strs( fn _get_deepest_errors<'a>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec) { match e { ParserErrorTree::Base { location, .. } => { - let span = location.span.0; + let span = location.span; let err_pos = CharacterPosition { offset: span.location_offset(), line: span.location_line(), @@ -195,7 +195,7 @@ fn _get_deepest_errors<'a>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Ve // let mut err_pos = Position::default(); match &**base { ParserErrorTree::Base { location, .. } => { - let span = location.span.0; + let span = location.span; let err_pos = CharacterPosition { offset: span.location_offset(), line: span.location_line(), diff --git a/nemo/src/parser/input.rs b/nemo/src/parser/input.rs index dc00d6cdb..fa6ddbd59 100644 --- a/nemo/src/parser/input.rs +++ b/nemo/src/parser/input.rs @@ -20,7 +20,7 @@ impl<'a> ParserInput<'a> { /// Create a new [ParserInput] from a string slice. pub fn new(input: &'a str, state: ParserState) -> Self { Self { - span: Span(LocatedSpan::new(input)), + span: Span::new(input), state, } } @@ -28,27 +28,27 @@ impl<'a> ParserInput<'a> { impl<'a> AsBytes for ParserInput<'a> { fn as_bytes(&self) -> &[u8] { - self.span.0.fragment().as_bytes() + self.span.fragment().as_bytes() } } impl<'a> nom::Compare> for ParserInput<'a> { fn compare(&self, t: ParserInput) -> nom::CompareResult { - self.span.0.compare(t.as_bytes()) + self.span.compare(t.span) } fn compare_no_case(&self, t: ParserInput) -> nom::CompareResult { - self.span.0.compare_no_case(t.as_bytes()) + self.span.compare_no_case(t.span) } } impl<'a> nom::Compare<&str> for ParserInput<'a> { fn compare(&self, t: &str) -> nom::CompareResult { - self.span.0.compare(t) + self.span.compare(t) } fn compare_no_case(&self, t: &str) -> nom::CompareResult { - self.span.0.compare_no_case(t) + self.span.compare_no_case(t) } } @@ -58,23 +58,23 @@ impl<'a> nom::ExtendInto for ParserInput<'a> { type Extender = String; fn new_builder(&self) -> Self::Extender { - self.span.0.new_builder() + String::new() } fn extend_into(&self, acc: &mut Self::Extender) { - self.span.0.extend_into(acc) + acc.push_str(self.span.fragment()) } } impl<'a> nom::FindSubstring<&str> for ParserInput<'a> { fn find_substring(&self, substr: &str) -> Option { - self.span.0.find_substring(substr) + self.span.find_substring(substr) } } impl<'a> InputLength for ParserInput<'a> { fn input_len(&self) -> usize { - self.span.0.input_len() + self.span.input_len() } } @@ -84,42 +84,42 @@ impl<'a> InputIter for ParserInput<'a> { type IterElem = Chars<'a>; fn iter_indices(&self) -> Self::Iter { - self.span.0.iter_indices() + self.span.iter_indices() } fn iter_elements(&self) -> Self::IterElem { - self.span.0.iter_elements() + self.span.iter_elements() } fn position

(&self, predicate: P) -> Option where P: Fn(Self::Item) -> bool, { - self.span.0.position(predicate) + self.span.position(predicate) } fn slice_index(&self, count: usize) -> Result { - self.span.0.slice_index(count) + self.span.slice_index(count) } } impl InputTake for ParserInput<'_> { fn take(&self, count: usize) -> Self { Self { - span: Span(self.span.0.take(count)), + span: self.span.take(count), state: self.state.clone(), } } fn take_split(&self, count: usize) -> (Self, Self) { - let (first, second) = self.span.0.take_split(count); + let (first, second) = self.span.take_split(count); ( Self { - span: Span(first), + span: first, state: self.state.clone(), }, Self { - span: Span(second), + span: second, state: self.state.clone(), }, ) @@ -136,7 +136,7 @@ impl InputTakeAtPosition for ParserInput<'_> { where P: Fn(Self::Item) -> bool, { - match self.span.0.position(predicate) { + match self.span.position(predicate) { Some(n) => Ok(self.take_split(n)), None => Err(nom::Err::Incomplete(nom::Needed::new(1))), } @@ -175,11 +175,11 @@ impl InputTakeAtPosition for ParserInput<'_> { where P: Fn(Self::Item) -> bool, { - match self.span.0.fragment().position(predicate) { + match self.span.fragment().position(predicate) { Some(0) => Err(nom::Err::Error(E::from_error_kind(self.clone(), e))), Some(n) => Ok(self.take_split(n)), None => { - if self.span.0.fragment().input_len() == 0 { + if self.span.fragment().input_len() == 0 { Err(nom::Err::Error(E::from_error_kind(self.clone(), e))) } else { Ok(self.take_split(self.input_len())) @@ -191,7 +191,7 @@ impl InputTakeAtPosition for ParserInput<'_> { impl nom::Offset for ParserInput<'_> { fn offset(&self, second: &Self) -> usize { - self.span.0.offset(&second.span.0) + self.span.offset(&second.span) } } @@ -207,7 +207,7 @@ where { fn slice(&self, range: R) -> Self { ParserInput { - span: Span(self.span.0.slice(range)), + span: self.span.slice(range), state: self.state.clone(), } } @@ -215,7 +215,7 @@ where impl nom_greedyerror::Position for ParserInput<'_> { fn position(&self) -> usize { - nom_greedyerror::Position::position(&self.span.0) + nom_greedyerror::Position::position(&self.span) } } @@ -224,8 +224,8 @@ impl std::fmt::Display for ParserInput<'_> { write!( f, "line {}, column {}", - self.span.0.location_line(), - self.span.0.get_utf8_column() + self.span.location_line(), + self.span.get_utf8_column() ) } } diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 4613c1e03..5e9c551c5 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -1,8 +1,10 @@ //! This module defines data structures that mark spans of text in an input file. -use std::ops::Range; +use std::{ops::Range, path::Display}; +use nom::InputIter; use nom_locate::LocatedSpan; +use serde::de::Expected; /// Locates a certain character within a file, /// giving its offset, line and column number @@ -59,15 +61,13 @@ impl CharacterRange { /// Maker for a region of text within a string slice #[derive(Debug, Clone, Copy)] -pub struct Span<'a>(pub LocatedSpan<&'a str>); +pub struct Span<'a>(LocatedSpan<&'a str>); -impl<'a> From> for Span<'a> { - fn from(value: LocatedSpan<&'a str>) -> Self { - Self(value) +impl<'a> Span<'a> { + pub fn new(inner: &'a str) -> Span<'a> { + Span(LocatedSpan::new(inner)) } -} -impl<'a> Span<'a> { /// Compute the [CharacterRange] for this region of text. pub fn range(&self) -> CharacterRange { let start = CharacterPosition { @@ -166,4 +166,115 @@ impl<'a> Span<'a> { } } } + + pub fn location_offset(&self) -> usize { + self.0.location_offset() + } + + pub fn location_line(&self) -> u32 { + self.0.location_line() + } + + pub fn get_utf8_column(&self) -> usize { + self.0.get_utf8_column() + } + + pub fn fragment(&self) -> &'_ str { + self.0.fragment() + } +} + +impl<'a> std::fmt::Display for Span<'a> { + fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + self.0.fmt(formatter) + } +} + +impl<'a, R> nom::Slice for Span<'a> +where + &'a str: nom::Slice, +{ + fn slice(&self, range: R) -> Self { + Span(self.0.slice(range)) + } +} + +impl nom_greedyerror::Position for Span<'_> { + fn position(&self) -> usize { + nom_greedyerror::Position::position(&self.0) + } +} + +impl nom::Offset for Span<'_> { + fn offset(&self, second: &Self) -> usize { + self.0.offset(&second.0) + } +} + +impl<'a> InputIter for Span<'a> { + type Item = char; + type Iter = <&'a str as InputIter>::Iter; + type IterElem = <&'a str as InputIter>::IterElem; + + fn iter_indices(&self) -> Self::Iter { + self.0.iter_indices() + } + + fn iter_elements(&self) -> Self::IterElem { + self.0.iter_elements() + } + + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + self.0.position(predicate) + } + + fn slice_index(&self, count: usize) -> Result { + self.0.slice_index(count) + } +} + +impl nom::InputTake for Span<'_> { + fn take(&self, count: usize) -> Self { + Self(self.0.take(count)) + } + + fn take_split(&self, count: usize) -> (Self, Self) { + let (left, right) = self.0.take_split(count); + (Self(left), Self(right)) + } +} + +impl nom::InputLength for Span<'_> { + fn input_len(&self) -> usize { + self.0.input_len() + } +} + +impl nom::FindSubstring<&'_ str> for Span<'_> { + fn find_substring(&self, substr: &str) -> Option { + self.0.find_substring(substr) + } +} + +impl<'a> nom::Compare> for Span<'a> { + fn compare(&self, t: Span) -> nom::CompareResult { + self.0.compare(t.fragment().as_bytes()) + } + + fn compare_no_case(&self, t: Span) -> nom::CompareResult { + self.0.compare_no_case(t.fragment().as_bytes()) + } +} + +impl<'a> nom::Compare<&str> for Span<'a> { + fn compare(&self, t: &str) -> nom::CompareResult { + self.0.compare(t) + } + + fn compare_no_case(&self, t: &str) -> nom::CompareResult { + self.0.compare_no_case(t) + } } diff --git a/nemo/src/rule_model/translation/basic/variable.rs b/nemo/src/rule_model/translation/basic/variable.rs index fe389b781..0c2b675fa 100644 --- a/nemo/src/rule_model/translation/basic/variable.rs +++ b/nemo/src/rule_model/translation/basic/variable.rs @@ -42,7 +42,9 @@ impl<'a> ASTProgramTranslation<'a> { } else { return Err(TranslationError::new( variable.span(), - TranslationErrorKind::NamedAnonymous(variable.span().0.to_string()), + TranslationErrorKind::NamedAnonymous( + variable.span().fragment().to_string(), + ), )); } } From 393a80b68c3b8de6ebed8089081b895995598fca Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 18 Sep 2024 15:48:00 +0200 Subject: [PATCH 184/214] fix warning --- nemo/src/parser/input.rs | 1 - nemo/src/parser/span.rs | 15 +++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/nemo/src/parser/input.rs b/nemo/src/parser/input.rs index fa6ddbd59..d3a1ec7b7 100644 --- a/nemo/src/parser/input.rs +++ b/nemo/src/parser/input.rs @@ -5,7 +5,6 @@ use std::str::{CharIndices, Chars}; use nom::{ error::ErrorKind, AsBytes, IResult, InputIter, InputLength, InputTake, InputTakeAtPosition, }; -use nom_locate::LocatedSpan; use super::{span::Span, ParserState}; diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 5e9c551c5..982ee3b02 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -1,10 +1,9 @@ //! This module defines data structures that mark spans of text in an input file. -use std::{ops::Range, path::Display}; +use std::ops::Range; use nom::InputIter; use nom_locate::LocatedSpan; -use serde::de::Expected; /// Locates a certain character within a file, /// giving its offset, line and column number @@ -64,6 +63,14 @@ impl CharacterRange { pub struct Span<'a>(LocatedSpan<&'a str>); impl<'a> Span<'a> { + /// Create a span for a particular input with default offset and line values and empty extra data. + /// You can compute the column through the get_column or get_utf8_column methods. + /// + /// offset starts at 0, line starts at 1, and column starts at 1. + /// + /// Do not use this constructor in parser functions; + /// nom and nom_locate assume span offsets are relative to the beginning of the same input. + /// In these cases, you probably want to use the nom::traits::Slice trait instead. pub fn new(inner: &'a str) -> Span<'a> { Span(LocatedSpan::new(inner)) } @@ -167,18 +174,22 @@ impl<'a> Span<'a> { } } + /// The offset represents the position of the fragment relatively to the input of the parser. It starts at offset 0. pub fn location_offset(&self) -> usize { self.0.location_offset() } + /// The line number of the fragment relatively to the input of the parser. It starts at line 1. pub fn location_line(&self) -> u32 { self.0.location_line() } + /// Return the column index for UTF8 text. Return value is unspecified for non-utf8 text. pub fn get_utf8_column(&self) -> usize { self.0.get_utf8_column() } + /// The fragment that is spanned. The fragment represents a part of the input of the parser. pub fn fragment(&self) -> &'_ str { self.0.fragment() } From a4ea73ea1349acde690e0cc562ecb4d7d0d1785a Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 18 Sep 2024 18:13:23 +0200 Subject: [PATCH 185/214] ditch nom_locate --- Cargo.lock | 2 +- nemo/Cargo.toml | 7 +- nemo/src/lib.rs | 1 + .../ast/expression/complex/arithmetic.rs | 14 +- nemo/src/parser/error.rs | 6 +- nemo/src/parser/span.rs | 208 +++++++++++------- 6 files changed, 139 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5afbfa5f..96aae21f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1469,6 +1469,7 @@ dependencies = [ "ariadne", "ascii_tree", "assert_fs", + "bytecount", "csv", "dyn-clone", "enum-assoc", @@ -1481,7 +1482,6 @@ dependencies = [ "nom 7.1.3", "nom-greedyerror", "nom-supreme", - "nom_locate", "num", "oxiri", "path-slash", diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index ac6fb62cf..6ac85cd79 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -29,19 +29,18 @@ csv = "1.1.6" thiserror = "1.0" flate2 = "1" sanitise-file-name = "1.0.0" -nom_locate = { version = "4.1.0", features = [ "runtime-dispatch-simd" ] } getrandom = { version = "0.2.9", default-features = false } path-slash = "0.2.1" rio_api = "0.8.4" rio_turtle = "0.8.4" rio_xml = "0.8.4" oxiri = "0.2.2" -tokio = { version = "1.29.1", features = [ "rt" ] } +tokio = { version = "1.29.1", features = ["rt"] } reqwest = { version = "0.12.2" } num = "0.4.0" ascii_tree = "0.1.1" serde_json = "1.0.108" -serde = {version = "1.0.138", features = ["derive"] } +serde = { version = "1.0.138", features = ["derive"] } tower-lsp = "0.20.0" dyn-clone = "1.0.16" unicode-ident = "1.0.12" @@ -52,6 +51,7 @@ ariadne = "0.4.1" strum = "0.26.3" strum_macros = "0.26.4" similar-string = "1.4.3" +bytecount = "0.6.8" [dev-dependencies] env_logger = "*" @@ -59,4 +59,3 @@ assert_fs = "1.0" test-log = "0.2" quickcheck = "1" quickcheck_macros = "1" - diff --git a/nemo/src/lib.rs b/nemo/src/lib.rs index 5900ded0f..9d325da62 100644 --- a/nemo/src/lib.rs +++ b/nemo/src/lib.rs @@ -17,6 +17,7 @@ #![feature(macro_metavar_expr)] #![feature(assert_matches)] #![feature(iter_intersperse)] +#![feature(str_from_raw_parts)] /// The crate for underlying physical operations. pub extern crate nemo_physical; diff --git a/nemo/src/parser/ast/expression/complex/arithmetic.rs b/nemo/src/parser/ast/expression/complex/arithmetic.rs index c51d00e7d..f1a9c682f 100644 --- a/nemo/src/parser/ast/expression/complex/arithmetic.rs +++ b/nemo/src/parser/ast/expression/complex/arithmetic.rs @@ -118,7 +118,7 @@ struct ArithmeticChain<'a> { } impl<'a> ArithmeticChain<'a> { - fn fold(mut self, input_span: &Span<'a>) -> Expression<'a> { + fn fold(mut self) -> Expression<'a> { if self.sequence.is_empty() { self.initial } else { @@ -126,7 +126,7 @@ impl<'a> ArithmeticChain<'a> { let sequence_first = self.sequence.remove(0); let start = Arithmetic { - span: input_span.enclose(&self.initial.span(), &sequence_first.1.span()), + span: Span::enclose(&self.initial.span(), &sequence_first.1.span()), kind: sequence_first.0, left: Box::new(self.initial), right: Box::new(sequence_first.1), @@ -135,7 +135,7 @@ impl<'a> ArithmeticChain<'a> { Expression::Arithmetic(sequence_rest.into_iter().fold( start, |acc, (kind, expression)| Arithmetic { - span: input_span.enclose(&acc.span, &expression.span()), + span: Span::enclose(&acc.span, &expression.span()), kind, left: Box::new(Expression::Arithmetic(acc)), right: Box::new(expression), @@ -186,8 +186,6 @@ impl<'a> Arithmetic<'a> { /// Parse sum. fn parse_sum(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { - let input_span = input.span; - pair( Self::parse_product, many0(preceded( @@ -203,13 +201,13 @@ impl<'a> Arithmetic<'a> { ( rest, ArithmeticChain { - initial: initial.fold(&input_span), + initial: initial.fold(), sequence: sequence .into_iter() - .map(|(operation, chain)| (operation, chain.fold(&input_span))) + .map(|(operation, chain)| (operation, chain.fold())) .collect(), } - .fold(&input_span), + .fold(), ) }) } diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index 146a4dafd..14aee9ef6 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -16,7 +16,7 @@ use super::{ token::Token, }, context::ParserContext, - span::CharacterPosition, + span::{CharacterPosition, Span}, ParserInput, ParserResult, }; @@ -47,7 +47,7 @@ pub(crate) fn skip_statement(input: ParserInput<'_>) -> ParserResult<'_, Token<' preceded(take_until("\r\n\r\n"), Token::double_newline), preceded(take_until("\r\r"), Token::double_newline), )), - move |token| Token::error(input_span.enclose(&input_span, &token.span())), + move |token| Token::error(Span::enclose(&input_span, &token.span())), ); // TODO: Should there additional whitespace be allowed in-between the dot and the newline? let until_dot_newline = map( @@ -56,7 +56,7 @@ pub(crate) fn skip_statement(input: ParserInput<'_>) -> ParserResult<'_, Token<' preceded(take_until(".\r\n"), terminated(Token::dot, line_ending)), preceded(take_until(".\r"), terminated(Token::dot, line_ending)), )), - move |token| Token::error(input_span.enclose(&input_span, &token.span())), + move |token| Token::error(Span::enclose(&input_span, &token.span())), ); let until_eof = map(take_while(|_| true), move |_| Token::error(input_span)); diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 982ee3b02..cb1ef39e1 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -1,9 +1,9 @@ //! This module defines data structures that mark spans of text in an input file. -use std::ops::Range; +use core::str; +use std::ops::{Deref, Range}; -use nom::InputIter; -use nom_locate::LocatedSpan; +use nom::{Offset, Slice}; /// Locates a certain character within a file, /// giving its offset, line and column number @@ -60,7 +60,14 @@ impl CharacterRange { /// Maker for a region of text within a string slice #[derive(Debug, Clone, Copy)] -pub struct Span<'a>(LocatedSpan<&'a str>); +pub struct Span<'a> { + allocation_start: *const u8, + fragment: &'a str, + line: u32, +} + +// SAFETY: Conceptionally, a span is just a slice inside a slice. +unsafe impl Sync for Span<'_> {} impl<'a> Span<'a> { /// Create a span for a particular input with default offset and line values and empty extra data. @@ -72,27 +79,30 @@ impl<'a> Span<'a> { /// nom and nom_locate assume span offsets are relative to the beginning of the same input. /// In these cases, you probably want to use the nom::traits::Slice trait instead. pub fn new(inner: &'a str) -> Span<'a> { - Span(LocatedSpan::new(inner)) + Span { + allocation_start: inner.as_ptr(), + fragment: inner, + line: 1, + } } /// Compute the [CharacterRange] for this region of text. pub fn range(&self) -> CharacterRange { let start = CharacterPosition { - offset: self.0.location_offset(), - line: self.0.location_line(), - column: u32::try_from(self.0.get_utf8_column()) + offset: self.location_offset(), + line: self.line, + column: u32::try_from(self.get_utf8_column()) .expect("cannot convert column number to u32"), }; - let end_offset = start.offset + self.0.fragment().len(); + let end_offset = start.offset + self.fragment.len(); let end_line = start.line - + u32::try_from(self.0.fragment().lines().count() - 1) + + u32::try_from(self.fragment.lines().count() - 1) .expect("cannot convert line number to u32"); - let end_column = if self.0.fragment().lines().count() > 1 { + let end_column = if self.fragment.lines().count() > 1 { u32::try_from( 1 + self - .0 - .fragment() + .fragment .lines() .last() .expect("there is at least one line") @@ -101,7 +111,7 @@ impl<'a> Span<'a> { .expect("cannot convert column number to u32") } else { start.column - + u32::try_from(self.0.fragment().len()).expect("cannot convert text range to u32") + + u32::try_from(self.fragment.len()).expect("cannot convert text range to u32") }; let end = CharacterPosition { @@ -113,179 +123,211 @@ impl<'a> Span<'a> { CharacterRange { start, end } } - /// TODO: Description and Specify safety conditions + /// Extend this span up to (excluding) the first character of rest pub fn until_rest(&self, rest: &Self) -> Self { - unsafe { - Self(LocatedSpan::new_from_raw_offset( - self.0.location_offset(), - self.0.location_line(), - &self.0[..(rest.0.location_offset() - self.0.location_offset())], - (), - )) + assert_eq!(self.allocation_start, rest.allocation_start); + + let start = self.location_offset(); + let end = rest.location_offset(); + + // SAFETY: By the assertion above, self and rest are derived from the same allocation + // because there is no safe way to create those spans otherwise. + let fragment = + unsafe { std::str::from_raw_parts(self.allocation_start.add(start), end - start) }; + + Self { + allocation_start: self.allocation_start, + fragment, + line: self.line, } } /// Create a [Span] that encloses the given [Span]s. - /// TODO: Description and Specify safety conditions and verify that this is correct - pub fn enclose(&self, first: &Self, second: &Self) -> Self { - unsafe { - let slice_length = - second.0.location_offset() + second.0.len() - first.0.location_offset(); - let slice_beginning = first.0.location_offset() - self.0.location_offset(); - - Self(LocatedSpan::new_from_raw_offset( - first.0.location_offset(), - first.0.location_line(), - &self.0[slice_beginning..(slice_beginning + slice_length)], - (), - )) + pub fn enclose(first: &Self, second: &Self) -> Self { + assert_eq!(first.allocation_start, second.allocation_start); + + let start = first.location_offset(); + let end = second.location_offset() + second.fragment.len(); + + assert!(end >= start); + + // SAFETY: By the assertion above, self and rest are derived from the same allocation, + // because there is no safe way to create them otherwise + let fragment = + unsafe { std::str::from_raw_parts(first.allocation_start.add(start), end - start) }; + + Self { + allocation_start: first.allocation_start, + fragment, + line: first.line, } } /// Return a [Span] that points to the beginning. pub fn beginning(&self) -> Self { - unsafe { - if self.0.is_empty() { - *self - } else { - Self(LocatedSpan::new_from_raw_offset( - self.0.location_offset(), - self.0.location_line(), - &self.0[0..1], - (), - )) + if self.fragment.is_empty() { + self.clone() + } else { + Self { + allocation_start: self.allocation_start, + fragment: &self.fragment[0..1], + line: self.line, } } } /// Return an empty [Span] that points to the beginning. pub fn empty(&self) -> Self { - unsafe { - if self.0.is_empty() { - *self - } else { - Self(LocatedSpan::new_from_raw_offset( - self.0.location_offset(), - self.0.location_line(), - &self.0[0..0], - (), - )) - } + Self { + allocation_start: self.allocation_start, + fragment: &self.fragment[0..0], + line: self.line, } } + fn get_slice_before(&self) -> &str { + // SAFETY: since the outer slice starts at self.allocation_start + // everything from there to self.fragment is also a valid slice. + unsafe { str::from_raw_parts(self.allocation_start, self.location_offset()) } + } + /// The offset represents the position of the fragment relatively to the input of the parser. It starts at offset 0. pub fn location_offset(&self) -> usize { - self.0.location_offset() + // SAFETY: self.fragment.as_ptr() is greater then or equal to self.allocation start + // and they are both derived from the same initial slice. + unsafe { self.fragment.as_ptr().offset_from(self.allocation_start) as usize } } /// The line number of the fragment relatively to the input of the parser. It starts at line 1. pub fn location_line(&self) -> u32 { - self.0.location_line() + self.line } /// Return the column index for UTF8 text. Return value is unspecified for non-utf8 text. pub fn get_utf8_column(&self) -> usize { - self.0.get_utf8_column() + let slice_before = self.get_slice_before(); + let offset = slice_before.rfind('\n').map(|x| x + 1).unwrap_or(0); + bytecount::num_chars(slice_before[offset..].as_bytes()) } /// The fragment that is spanned. The fragment represents a part of the input of the parser. pub fn fragment(&self) -> &'_ str { - self.0.fragment() + &self.fragment } } impl<'a> std::fmt::Display for Span<'a> { fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - self.0.fmt(formatter) + self.fragment.fmt(formatter) } } -impl<'a, R> nom::Slice for Span<'a> +impl Deref for Span<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.fragment + } +} + +impl<'a, R> Slice for Span<'a> where - &'a str: nom::Slice, + &'a str: Slice, { fn slice(&self, range: R) -> Self { - Span(self.0.slice(range)) + let next_fragment = self.fragment.slice(range); + let consumed = &self.fragment[..self.fragment.offset(&next_fragment)]; + let line_offset: u32 = consumed + .bytes() + .filter(|b| *b == b'\n') + .count() + .try_into() + .expect("line count overflowed u32"); + + Span { + allocation_start: self.allocation_start, + fragment: &next_fragment, + line: self.line + line_offset, + } } } impl nom_greedyerror::Position for Span<'_> { fn position(&self) -> usize { - nom_greedyerror::Position::position(&self.0) + self.location_offset() } } -impl nom::Offset for Span<'_> { +impl Offset for Span<'_> { fn offset(&self, second: &Self) -> usize { - self.0.offset(&second.0) + assert_eq!(self.allocation_start, second.allocation_start); + self.fragment.offset(second.fragment) } } -impl<'a> InputIter for Span<'a> { +impl<'a> nom::InputIter for Span<'a> { type Item = char; - type Iter = <&'a str as InputIter>::Iter; - type IterElem = <&'a str as InputIter>::IterElem; + type Iter = <&'a str as nom::InputIter>::Iter; + type IterElem = <&'a str as nom::InputIter>::IterElem; fn iter_indices(&self) -> Self::Iter { - self.0.iter_indices() + self.fragment.iter_indices() } fn iter_elements(&self) -> Self::IterElem { - self.0.iter_elements() + self.fragment.iter_elements() } fn position

(&self, predicate: P) -> Option where P: Fn(Self::Item) -> bool, { - self.0.position(predicate) + self.fragment.position(predicate) } fn slice_index(&self, count: usize) -> Result { - self.0.slice_index(count) + self.fragment.slice_index(count) } } impl nom::InputTake for Span<'_> { fn take(&self, count: usize) -> Self { - Self(self.0.take(count)) + self.slice(..count) } fn take_split(&self, count: usize) -> (Self, Self) { - let (left, right) = self.0.take_split(count); - (Self(left), Self(right)) + (self.slice(count..), self.slice(..count)) } } impl nom::InputLength for Span<'_> { fn input_len(&self) -> usize { - self.0.input_len() + self.fragment.input_len() } } impl nom::FindSubstring<&'_ str> for Span<'_> { fn find_substring(&self, substr: &str) -> Option { - self.0.find_substring(substr) + self.fragment.find_substring(substr) } } impl<'a> nom::Compare> for Span<'a> { fn compare(&self, t: Span) -> nom::CompareResult { - self.0.compare(t.fragment().as_bytes()) + self.fragment.compare(t.fragment().as_bytes()) } fn compare_no_case(&self, t: Span) -> nom::CompareResult { - self.0.compare_no_case(t.fragment().as_bytes()) + self.fragment.compare_no_case(t.fragment().as_bytes()) } } impl<'a> nom::Compare<&str> for Span<'a> { fn compare(&self, t: &str) -> nom::CompareResult { - self.0.compare(t) + self.fragment.compare(t) } fn compare_no_case(&self, t: &str) -> nom::CompareResult { - self.0.compare_no_case(t) + self.fragment.compare_no_case(t) } } From 1dfbffa876613bb05d1681b07b30e37148f9f072 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 18 Sep 2024 18:39:53 +0200 Subject: [PATCH 186/214] code attribution --- nemo/src/parser/span.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index cb1ef39e1..cb4866135 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -1,4 +1,5 @@ //! This module defines data structures that mark spans of text in an input file. +//! The `Span` implementation is inspired by nom_locate. (https://github.com/fflorent/nom_locate) use core::str; use std::ops::{Deref, Range}; From a6c3c69d661252db671f926927b40549daea656c Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 18 Sep 2024 20:42:56 +0200 Subject: [PATCH 187/214] cargo fmt --- nemo-language-server/src/language_server.rs | 12 +++- .../src/language_server/lsp_component.rs | 58 ++++++++++++++++--- .../src/language_server/token_type.rs | 8 ++- 3 files changed, 65 insertions(+), 13 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index aa737c538..1ad1ec0de 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -2,9 +2,9 @@ mod lsp_component; mod nemo_position; mod token_type; -use strum::IntoEnumIterator; use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::vec; +use strum::IntoEnumIterator; use anyhow::anyhow; use futures::lock::Mutex; @@ -20,7 +20,15 @@ use nemo_position::{ }; use token_type::TokenType; use tower_lsp::lsp_types::{ - Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, DocumentChangeOperation, DocumentChanges, DocumentSymbol, DocumentSymbolOptions, DocumentSymbolParams, DocumentSymbolResponse, InitializeParams, InitializeResult, InitializedParams, Location, MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, Position, PrepareRenameResponse, Range, ReferenceParams, RenameOptions, RenameParams, SemanticToken, SemanticTokens, SemanticTokensFullOptions, SemanticTokensLegend, SemanticTokensOptions, SemanticTokensParams, SemanticTokensResult, SemanticTokensServerCapabilities, ServerCapabilities, TextDocumentEdit, TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, TextEdit, Url, VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit + Diagnostic, DidChangeTextDocumentParams, DidOpenTextDocumentParams, DocumentChangeOperation, + DocumentChanges, DocumentSymbol, DocumentSymbolOptions, DocumentSymbolParams, + DocumentSymbolResponse, InitializeParams, InitializeResult, InitializedParams, Location, + MessageType, OneOf, OptionalVersionedTextDocumentIdentifier, Position, PrepareRenameResponse, + Range, ReferenceParams, RenameOptions, RenameParams, SemanticToken, SemanticTokens, + SemanticTokensFullOptions, SemanticTokensLegend, SemanticTokensOptions, SemanticTokensParams, + SemanticTokensResult, SemanticTokensServerCapabilities, ServerCapabilities, TextDocumentEdit, + TextDocumentPositionParams, TextDocumentSyncCapability, TextDocumentSyncKind, TextEdit, Url, + VersionedTextDocumentIdentifier, WorkDoneProgressOptions, WorkspaceEdit, }; use tower_lsp::{Client, LanguageServer}; diff --git a/nemo-language-server/src/language_server/lsp_component.rs b/nemo-language-server/src/language_server/lsp_component.rs index f35946144..7cc89ce7f 100644 --- a/nemo-language-server/src/language_server/lsp_component.rs +++ b/nemo-language-server/src/language_server/lsp_component.rs @@ -86,15 +86,55 @@ where fn symbol_info(&self) -> Option { let kind = match self.context() { - ParserContext::Program => return Some(LSPSymbolInfo {kind: SymbolKind::FILE, name: "Program".to_string()}), - ParserContext::Rule => return Some(LSPSymbolInfo {kind: SymbolKind::CLASS, name: "Rule".to_string()}), - ParserContext::Base => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Base".to_string()}), - ParserContext::Declare => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Declare".to_string()}), - ParserContext::Import => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Import".to_string()}), - ParserContext::Export => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Export".to_string()}), - ParserContext::Prefix => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Prefix".to_string()}), - ParserContext::Output => return Some(LSPSymbolInfo {kind: SymbolKind::PROPERTY, name: "Output".to_string()}), - + ParserContext::Program => { + return Some(LSPSymbolInfo { + kind: SymbolKind::FILE, + name: "Program".to_string(), + }) + } + ParserContext::Rule => { + return Some(LSPSymbolInfo { + kind: SymbolKind::CLASS, + name: "Rule".to_string(), + }) + } + ParserContext::Base => { + return Some(LSPSymbolInfo { + kind: SymbolKind::PROPERTY, + name: "Base".to_string(), + }) + } + ParserContext::Declare => { + return Some(LSPSymbolInfo { + kind: SymbolKind::PROPERTY, + name: "Declare".to_string(), + }) + } + ParserContext::Import => { + return Some(LSPSymbolInfo { + kind: SymbolKind::PROPERTY, + name: "Import".to_string(), + }) + } + ParserContext::Export => { + return Some(LSPSymbolInfo { + kind: SymbolKind::PROPERTY, + name: "Export".to_string(), + }) + } + ParserContext::Prefix => { + return Some(LSPSymbolInfo { + kind: SymbolKind::PROPERTY, + name: "Prefix".to_string(), + }) + } + ParserContext::Output => { + return Some(LSPSymbolInfo { + kind: SymbolKind::PROPERTY, + name: "Output".to_string(), + }) + } + ParserContext::Atom => Some(SymbolKind::FIELD), ParserContext::DataType => Some(SymbolKind::TYPE_PARAMETER), ParserContext::Variable => Some(SymbolKind::VARIABLE), diff --git a/nemo-language-server/src/language_server/token_type.rs b/nemo-language-server/src/language_server/token_type.rs index fff15666d..2db30ff78 100644 --- a/nemo-language-server/src/language_server/token_type.rs +++ b/nemo-language-server/src/language_server/token_type.rs @@ -1,5 +1,5 @@ -use strum_macros::EnumIter; use nemo::parser::context::ParserContext; +use strum_macros::EnumIter; use tower_lsp::lsp_types::SemanticTokenType; /// All syntax highlighting types that are used in Nemo programs @@ -24,7 +24,11 @@ impl TokenType { match ctx { ParserContext::DataType => Some(TokenType::Type), ParserContext::Variable => Some(TokenType::Variable), - ParserContext::Iri | ParserContext::Constant | ParserContext::RdfLiteral | ParserContext::Blank | ParserContext::String => Some(TokenType::String), + ParserContext::Iri + | ParserContext::Constant + | ParserContext::RdfLiteral + | ParserContext::Blank + | ParserContext::String => Some(TokenType::String), ParserContext::StructureTag => Some(TokenType::Function), ParserContext::Number => Some(TokenType::Number), ParserContext::Boolean => Some(TokenType::Bool), From 869b86cfaf16368865d407b2a8254f32a27d64a6 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 18 Sep 2024 20:44:30 +0200 Subject: [PATCH 188/214] cargo clippy --- nemo/src/parser/span.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index cb4866135..17788467e 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -167,7 +167,7 @@ impl<'a> Span<'a> { /// Return a [Span] that points to the beginning. pub fn beginning(&self) -> Self { if self.fragment.is_empty() { - self.clone() + self } else { Self { allocation_start: self.allocation_start, @@ -213,7 +213,7 @@ impl<'a> Span<'a> { /// The fragment that is spanned. The fragment represents a part of the input of the parser. pub fn fragment(&self) -> &'_ str { - &self.fragment + self.fragment } } @@ -237,7 +237,7 @@ where { fn slice(&self, range: R) -> Self { let next_fragment = self.fragment.slice(range); - let consumed = &self.fragment[..self.fragment.offset(&next_fragment)]; + let consumed = &self.fragment[..self.fragment.offset(next_fragment)]; let line_offset: u32 = consumed .bytes() .filter(|b| *b == b'\n') @@ -247,7 +247,7 @@ where Span { allocation_start: self.allocation_start, - fragment: &next_fragment, + fragment: next_fragment, line: self.line + line_offset, } } From 7327c5db4cb435c007645d069921c15656ff871b Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 18 Sep 2024 20:47:31 +0200 Subject: [PATCH 189/214] doc comment --- nemo/src/parser/span.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 17788467e..2c2f16591 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -1,5 +1,5 @@ //! This module defines data structures that mark spans of text in an input file. -//! The `Span` implementation is inspired by nom_locate. (https://github.com/fflorent/nom_locate) +//! The `Span` implementation is inspired by nom_locate. (See ) use core::str; use std::ops::{Deref, Range}; @@ -167,7 +167,7 @@ impl<'a> Span<'a> { /// Return a [Span] that points to the beginning. pub fn beginning(&self) -> Self { if self.fragment.is_empty() { - self + *self } else { Self { allocation_start: self.allocation_start, From d5dbd4bbb3ebb888a7b5165303185446886d4bf7 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Wed, 18 Sep 2024 20:50:55 +0200 Subject: [PATCH 190/214] fix wasm bindings --- nemo-language-server/src/language_server/lsp_component.rs | 4 ++-- nemo/src/parser/ast/program.rs | 1 - nemo/src/parser/span.rs | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nemo-language-server/src/language_server/lsp_component.rs b/nemo-language-server/src/language_server/lsp_component.rs index 7cc89ce7f..dcbfef344 100644 --- a/nemo-language-server/src/language_server/lsp_component.rs +++ b/nemo-language-server/src/language_server/lsp_component.rs @@ -80,7 +80,7 @@ where scope.map(|scope| LSPIdentifier { scope, - identifier: (self.context(), self.span().0.fragment().to_string()), + identifier: (self.context(), self.span().fragment().to_string()), }) } @@ -155,7 +155,7 @@ where kind.map(|kind| LSPSymbolInfo { kind, - name: format!("{}", self.span().0.fragment()), + name: format!("{}", self.span().fragment()), }) } diff --git a/nemo/src/parser/ast/program.rs b/nemo/src/parser/ast/program.rs index 34500cc25..0877793cd 100644 --- a/nemo/src/parser/ast/program.rs +++ b/nemo/src/parser/ast/program.rs @@ -153,7 +153,6 @@ mod test { // TODO: This test cases causes a warning in miri #[test] - #[cfg_attr(miri, ignore)] fn parser_recover() { let program = "%! Top-level comment\n\ % Declarations:\n\ diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 2c2f16591..b0482842f 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -69,6 +69,7 @@ pub struct Span<'a> { // SAFETY: Conceptionally, a span is just a slice inside a slice. unsafe impl Sync for Span<'_> {} +unsafe impl Send for Span<'_> {} impl<'a> Span<'a> { /// Create a span for a particular input with default offset and line values and empty extra data. From b04582a54118ce32e030cfd03573e72b155e4bc3 Mon Sep 17 00:00:00 2001 From: Matthias Meissner Date: Thu, 19 Sep 2024 11:51:56 +0200 Subject: [PATCH 191/214] fix python test --- nemo-python/README.md | 1 + nemo-python/tests/test_example.py | 1 + 2 files changed, 2 insertions(+) diff --git a/nemo-python/README.md b/nemo-python/README.md index 6619bcab7..1c2f51d15 100644 --- a/nemo-python/README.md +++ b/nemo-python/README.md @@ -31,6 +31,7 @@ data(hi,42) . data(hello,world) . calculated(?x, !v) :- data(?y, ?x) . +@export calculated :- csv {}. """ engine = NemoEngine(load_string(rules)) diff --git a/nemo-python/tests/test_example.py b/nemo-python/tests/test_example.py index 350950ca9..b711b60f0 100644 --- a/nemo-python/tests/test_example.py +++ b/nemo-python/tests/test_example.py @@ -23,6 +23,7 @@ def setUp(self): data(3.14, circle). calculated(?x, !v) :- data(?y, ?x) . + @export calculated :- csv {}. interesting(py). interesting(msg). From e3517bd0113b2422dfdc4847ee212d2347538b06 Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Fri, 20 Sep 2024 08:44:07 +0200 Subject: [PATCH 192/214] Fix Span::get_utf8_column --- nemo/src/parser/span.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index b0482842f..8ad459e8a 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -209,7 +209,7 @@ impl<'a> Span<'a> { pub fn get_utf8_column(&self) -> usize { let slice_before = self.get_slice_before(); let offset = slice_before.rfind('\n').map(|x| x + 1).unwrap_or(0); - bytecount::num_chars(slice_before[offset..].as_bytes()) + bytecount::num_chars(slice_before[offset..].as_bytes()) + 1 } /// The fragment that is spanned. The fragment represents a part of the input of the parser. From c35f1d048a41cda146255349ce8bef71646c3098 Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Fri, 20 Sep 2024 09:06:56 +0200 Subject: [PATCH 193/214] Try to show program errors using language server --- nemo-language-server/src/language_server.rs | 45 ++++++++++++++----- .../src/language_server/lsp_component.rs | 15 +------ nemo/src/rule_model/error.rs | 5 +++ 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 1ad1ec0de..3b76c4b87 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -2,6 +2,8 @@ mod lsp_component; mod nemo_position; mod token_type; +use nemo::rule_model::error::ProgramError; +use nemo::rule_model::translation::ProgramErrorReport; use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::vec; use strum::IntoEnumIterator; @@ -100,15 +102,17 @@ impl Backend { let line_index = LineIndex::new(text); - let (_program, errors): (Program, Option) = + let (program, parse_errors): (Program, Option) = Parser::initialize(text, text_document.uri.to_string()) .parse() .map(|prg| (prg, None)) .unwrap_or_else(|(prg, err)| (*prg, Some(err))); + let translation_result: Option> = parse_errors.is_none().then(|| nemo::rule_model::translation::ASTProgramTranslation::initialize(text, text_document.uri.to_string()).translate(&program)); + // Group errors by position and deduplicate error let mut errors_by_posision: BTreeMap> = BTreeMap::new(); - for error in errors.iter().flat_map(|report| report.errors()) { + for error in parse_errors.iter().flat_map(|report| report.errors()) { if let Some(set) = errors_by_posision.get_mut(&error.position) { set.insert(format!("expected `{}`", error.context[0].name())); } else { @@ -119,18 +123,39 @@ impl Backend { }; } + if let Some(Err(program_error_report)) = translation_result { + for error in program_error_report.errors() { + // TODO: get rid of if; but currently I don't see how to get the position of a + // validation error + if let ProgramError::TranslationError(translation_error) = error { + let position = translation_error.character_range().start; + let message = format!( + "{}{}", + error.message(), + error + .note() + .map(|n| format!("\n{n}")) + .unwrap_or("".to_string()) + ); + + if let Some(set) = errors_by_posision.get_mut(&position) { + set.insert(message); + } else { + errors_by_posision.insert(position, std::iter::once(message).collect()); + }; + } + } + } + let diagnostics = errors_by_posision .into_iter() .map(|(pos, error_set)| { Ok(Diagnostic { - message: format!( - "expected {}", - error_set - .iter() - .map(|s| format!("'{s}'")) - .collect::>() - .join(", ") - ), + message: error_set + .iter() + .map(|s| format!("'{s}'")) + .collect::>() + .join(", "), range: Range::new( line_col_to_lsp_position( &line_index, diff --git a/nemo-language-server/src/language_server/lsp_component.rs b/nemo-language-server/src/language_server/lsp_component.rs index dcbfef344..64126a7a6 100644 --- a/nemo-language-server/src/language_server/lsp_component.rs +++ b/nemo-language-server/src/language_server/lsp_component.rs @@ -86,18 +86,6 @@ where fn symbol_info(&self) -> Option { let kind = match self.context() { - ParserContext::Program => { - return Some(LSPSymbolInfo { - kind: SymbolKind::FILE, - name: "Program".to_string(), - }) - } - ParserContext::Rule => { - return Some(LSPSymbolInfo { - kind: SymbolKind::CLASS, - name: "Rule".to_string(), - }) - } ParserContext::Base => { return Some(LSPSymbolInfo { kind: SymbolKind::PROPERTY, @@ -135,6 +123,7 @@ where }) } + ParserContext::Rule => Some(SymbolKind::CLASS), ParserContext::Atom => Some(SymbolKind::FIELD), ParserContext::DataType => Some(SymbolKind::TYPE_PARAMETER), ParserContext::Variable => Some(SymbolKind::VARIABLE), @@ -155,7 +144,7 @@ where kind.map(|kind| LSPSymbolInfo { kind, - name: format!("{}", self.span().fragment()), + name: format!("{}", self.span().fragment().split_whitespace().collect::>().join(" ")), }) } diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 2aed7cfe2..f5a82f602 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -266,6 +266,11 @@ impl TranslationError { self } + + /// Return the [`CharacterRange`] of the error + pub fn character_range(&self) -> CharacterRange { + self.info.reference + } } /// Error that may occur while translating or validating a nemo program From 2506682dc58e045ed10f8c33d81a981c849ae8cf Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Fri, 20 Sep 2024 09:40:19 +0200 Subject: [PATCH 194/214] Actually show (all) program errors with hints --- nemo-language-server/src/language_server.rs | 65 +++++++++++-------- .../src/language_server/lsp_component.rs | 9 ++- nemo/src/rule_model/error.rs | 24 +++++-- nemo/src/rule_model/translation.rs | 5 ++ 4 files changed, 71 insertions(+), 32 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 3b76c4b87..2b12d38bc 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -2,7 +2,6 @@ mod lsp_component; mod nemo_position; mod token_type; -use nemo::rule_model::error::ProgramError; use nemo::rule_model::translation::ProgramErrorReport; use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::vec; @@ -108,7 +107,15 @@ impl Backend { .map(|prg| (prg, None)) .unwrap_or_else(|(prg, err)| (*prg, Some(err))); - let translation_result: Option> = parse_errors.is_none().then(|| nemo::rule_model::translation::ASTProgramTranslation::initialize(text, text_document.uri.to_string()).translate(&program)); + let translation_result: Option< + Result, + > = parse_errors.is_none().then(|| { + nemo::rule_model::translation::ASTProgramTranslation::initialize( + text, + text_document.uri.to_string(), + ) + .translate(&program) + }); // Group errors by position and deduplicate error let mut errors_by_posision: BTreeMap> = BTreeMap::new(); @@ -125,25 +132,35 @@ impl Backend { if let Some(Err(program_error_report)) = translation_result { for error in program_error_report.errors() { - // TODO: get rid of if; but currently I don't see how to get the position of a - // validation error - if let ProgramError::TranslationError(translation_error) = error { - let position = translation_error.character_range().start; - let message = format!( - "{}{}", - error.message(), - error - .note() - .map(|n| format!("\n{n}")) - .unwrap_or("".to_string()) - ); - - if let Some(set) = errors_by_posision.get_mut(&position) { - set.insert(message); - } else { - errors_by_posision.insert(position, std::iter::once(message).collect()); - }; - } + let range_opt = error.character_range(|origin| { + program_error_report + .origin_map() + .get(origin) + .map(|node| node.span().range()) + }); + let Some(position) = range_opt.map(|r| r.start) else { + continue; + }; + + let message = format!( + "{}{}{}", + error.message(), + error + .note() + .map(|n| format!("\nNote: {n}")) + .unwrap_or("".to_string()), + error + .hints() + .iter() + .map(|h| format!("\nHint: {h}")) + .collect::(), + ); + + if let Some(set) = errors_by_posision.get_mut(&position) { + set.insert(message); + } else { + errors_by_posision.insert(position, std::iter::once(message).collect()); + }; } } @@ -151,11 +168,7 @@ impl Backend { .into_iter() .map(|(pos, error_set)| { Ok(Diagnostic { - message: error_set - .iter() - .map(|s| format!("'{s}'")) - .collect::>() - .join(", "), + message: error_set.into_iter().collect::>().join("\n\n"), range: Range::new( line_col_to_lsp_position( &line_index, diff --git a/nemo-language-server/src/language_server/lsp_component.rs b/nemo-language-server/src/language_server/lsp_component.rs index 64126a7a6..c20b683d9 100644 --- a/nemo-language-server/src/language_server/lsp_component.rs +++ b/nemo-language-server/src/language_server/lsp_component.rs @@ -144,7 +144,14 @@ where kind.map(|kind| LSPSymbolInfo { kind, - name: format!("{}", self.span().fragment().split_whitespace().collect::>().join(" ")), + name: format!( + "{}", + self.span() + .fragment() + .split_whitespace() + .collect::>() + .join(" ") + ), }) } diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index f5a82f602..5e2a0f6f4 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -266,11 +266,6 @@ impl TranslationError { self } - - /// Return the [`CharacterRange`] of the error - pub fn character_range(&self) -> CharacterRange { - self.info.reference - } } /// Error that may occur while translating or validating a nemo program @@ -311,6 +306,17 @@ impl ProgramError { } } + /// Return the [`CharacterRange`] where the error occurred + pub fn character_range(&self, translation: Translation) -> Option + where + Translation: Fn(&Origin) -> Option, + { + match self { + ProgramError::TranslationError(error) => Some(error.info.reference), + ProgramError::ValidationError(error) => translation(&error.info.reference), + } + } + /// Return the note attached to this error, if it exists. pub fn note(&self) -> Option { match self { @@ -320,6 +326,14 @@ impl ProgramError { .map(|string| string.to_string()) } + /// Return the [`Hint`]s attached to this error. + pub fn hints(&self) -> &Vec { + match self { + ProgramError::TranslationError(error) => &error.info.hints, + ProgramError::ValidationError(error) => &error.info.hints, + } + } + /// Append the information of this error to a [ReportBuilder]. pub fn report<'a, Translation>( &'a self, diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index c62094b3b..ca194bd19 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -137,6 +137,11 @@ impl<'a> ProgramErrorReport<'a> { }) .collect() } + + /// Return the mapping from origins to AST nodes. + pub fn origin_map(&self) -> &HashMap> { + &self.origin_map + } /// Return raw [ProgramError]s. pub fn errors(&self) -> &Vec { From d847636c9e63c039c5d414abb866a2f38d0348e4 Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Fri, 20 Sep 2024 10:00:56 +0200 Subject: [PATCH 195/214] Run cargo fmt --- nemo/src/rule_model/translation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index ca194bd19..5fbba8c1c 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -137,7 +137,7 @@ impl<'a> ProgramErrorReport<'a> { }) .collect() } - + /// Return the mapping from origins to AST nodes. pub fn origin_map(&self) -> &HashMap> { &self.origin_map From ed80458a107a212eff14fbac237a8c0e80893499 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Fri, 20 Sep 2024 10:15:43 +0200 Subject: [PATCH 196/214] Fix import of .gz files --- nemo/src/chase_model/translation/import_export.rs | 2 +- .../src/rule_model/translation/directive/import_export.rs | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/nemo/src/chase_model/translation/import_export.rs b/nemo/src/chase_model/translation/import_export.rs index 790a9ff3f..c25d3188d 100644 --- a/nemo/src/chase_model/translation/import_export.rs +++ b/nemo/src/chase_model/translation/import_export.rs @@ -120,7 +120,7 @@ impl ProgramChaseTranslation { attributes .get(&ImportExportAttribute::Resource) .and_then(ImportExportDirective::string_value) - .map(|resource| CompressionFormat::from_resource(&resource)) + .map(|resource| (CompressionFormat::from_resource(&resource).0, resource)) .map(|(format, resource)| (format, ImportExportResource::from_string(resource))) .expect("invalid program: missing resource in import/export") } diff --git a/nemo/src/rule_model/translation/directive/import_export.rs b/nemo/src/rule_model/translation/directive/import_export.rs index 5c24aa63e..0729555c9 100644 --- a/nemo/src/rule_model/translation/directive/import_export.rs +++ b/nemo/src/rule_model/translation/directive/import_export.rs @@ -9,6 +9,7 @@ use crate::{ rule_model::{ components::{ import_export::{ + compression::CompressionFormat, file_formats::{FileFormat, FILE_FORMATS_RDF}, ExportDirective, ImportDirective, }, @@ -24,15 +25,16 @@ use crate::{ impl<'a> ASTProgramTranslation<'a> { /// Find the extension given for this import/export statement. fn import_export_extension( - &self, map: &'a ast::expression::complex::map::Map, ) -> Option<(String, &'a ast::expression::Expression<'a>)> { for (key, value) in map.key_value() { if let ast::expression::Expression::Constant(constant) = key { if &constant.tag().to_string() == "resource" { if let ast::expression::Expression::String(string) = value { + let (_, path) = CompressionFormat::from_resource(&string.content()); + return Some(( - Path::new(&string.content()) + Path::new(&path) .extension()? .to_owned() .into_string() @@ -56,7 +58,7 @@ impl<'a> ASTProgramTranslation<'a> { let format_tag = structure_tag.to_string(); if format_tag.to_ascii_lowercase() == RDF_UNSPECIFIED { - let extension = self.import_export_extension(map); + let extension = Self::import_export_extension(map); if let Some((extension, origin)) = extension { for &rdf_format in FILE_FORMATS_RDF { From 61814945cea399e663a5a82834733c1d423e945c Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Mon, 23 Sep 2024 13:09:00 +0200 Subject: [PATCH 197/214] Improve error reporting --- nemo/src/parser/ast/directive/base.rs | 2 +- nemo/src/parser/ast/directive/declare.rs | 17 +- nemo/src/parser/ast/directive/export.rs | 17 +- nemo/src/parser/ast/directive/import.rs | 17 +- nemo/src/parser/ast/directive/prefix.rs | 11 +- nemo/src/parser/ast/guard.rs | 13 +- nemo/src/parser/ast/statement.rs | 13 +- nemo/src/parser/context.rs | 18 ++ nemo/src/parser/error.rs | 201 +++++++++++------------ 9 files changed, 169 insertions(+), 140 deletions(-) diff --git a/nemo/src/parser/ast/directive/base.rs b/nemo/src/parser/ast/directive/base.rs index a7aea2f9f..fba9e8564 100644 --- a/nemo/src/parser/ast/directive/base.rs +++ b/nemo/src/parser/ast/directive/base.rs @@ -27,7 +27,7 @@ impl<'a> Base<'a> { } pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, Iri<'a>> { - Iri::parse(input) + context(ParserContext::BaseBody, Iri::parse)(input) } } diff --git a/nemo/src/parser/ast/directive/declare.rs b/nemo/src/parser/ast/directive/declare.rs index 8f756f2c5..b70522ad8 100644 --- a/nemo/src/parser/ast/directive/declare.rs +++ b/nemo/src/parser/ast/directive/declare.rs @@ -42,13 +42,16 @@ impl<'a> Declare<'a> { pub fn parse_body( input: ParserInput<'a>, ) -> ParserResult<'a, (StructureTag<'a>, Sequence<'a, NameTypePair<'a>>)> { - separated_pair( - StructureTag::parse, - WSoC::parse, - delimited( - pair(Token::atom_open, WSoC::parse), - Sequence::::parse, - pair(WSoC::parse, Token::atom_close), + context( + ParserContext::DeclareBody, + separated_pair( + StructureTag::parse, + WSoC::parse, + delimited( + pair(Token::atom_open, WSoC::parse), + Sequence::::parse, + pair(WSoC::parse, Token::atom_close), + ), ), )(input) } diff --git a/nemo/src/parser/ast/directive/export.rs b/nemo/src/parser/ast/directive/export.rs index 2a28bf811..5c01fc687 100644 --- a/nemo/src/parser/ast/directive/export.rs +++ b/nemo/src/parser/ast/directive/export.rs @@ -37,13 +37,16 @@ impl<'a> Export<'a> { } pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (StructureTag<'a>, Map<'a>)> { - tuple(( - StructureTag::parse, - WSoC::parse, - Token::export_assignment, - WSoC::parse, - Map::parse, - ))(input) + context( + ParserContext::ExportBody, + tuple(( + StructureTag::parse, + WSoC::parse, + Token::export_assignment, + WSoC::parse, + Map::parse, + )), + )(input) .map(|(rest, (predicate, _, _, _, instructions))| (rest, (predicate, instructions))) } } diff --git a/nemo/src/parser/ast/directive/import.rs b/nemo/src/parser/ast/directive/import.rs index e180ea0cd..37b8dd583 100644 --- a/nemo/src/parser/ast/directive/import.rs +++ b/nemo/src/parser/ast/directive/import.rs @@ -37,13 +37,16 @@ impl<'a> Import<'a> { } pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (StructureTag<'a>, Map<'a>)> { - tuple(( - StructureTag::parse, - WSoC::parse, - Token::import_assignment, - WSoC::parse, - Map::parse, - ))(input) + context( + ParserContext::ImportBody, + tuple(( + StructureTag::parse, + WSoC::parse, + Token::import_assignment, + WSoC::parse, + Map::parse, + )), + )(input) .map(|(rest, (predicate, _, _, _, instructions))| (rest, (predicate, instructions))) } } diff --git a/nemo/src/parser/ast/directive/prefix.rs b/nemo/src/parser/ast/directive/prefix.rs index f6aea6ad3..ff44d42f4 100644 --- a/nemo/src/parser/ast/directive/prefix.rs +++ b/nemo/src/parser/ast/directive/prefix.rs @@ -42,10 +42,13 @@ impl<'a> Prefix<'a> { } pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (Token<'a>, Iri<'a>)> { - separated_pair( - alt((Token::name, Token::empty)), - tuple((WSoC::parse, Token::prefix_assignment, WSoC::parse)), - Iri::parse, + context( + ParserContext::PrefixBody, + separated_pair( + alt((Token::name, Token::empty)), + tuple((WSoC::parse, Token::prefix_assignment, WSoC::parse)), + Iri::parse, + ), )(input) } } diff --git a/nemo/src/parser/ast/guard.rs b/nemo/src/parser/ast/guard.rs index 675d94370..5deb0c91c 100644 --- a/nemo/src/parser/ast/guard.rs +++ b/nemo/src/parser/ast/guard.rs @@ -2,7 +2,7 @@ use nom::{branch::alt, combinator::map}; -use crate::parser::context::ParserContext; +use crate::parser::context::{context, ParserContext}; use super::{ expression::{complex::infix::InfixExpression, Expression}, @@ -49,10 +49,13 @@ impl<'a> ProgramAST<'a> for Guard<'a> { where Self: Sized + 'a, { - alt(( - map(InfixExpression::parse, Self::Infix), - map(Expression::parse, Self::Expression), - ))(input) + context( + CONTEXT, + alt(( + map(InfixExpression::parse, Self::Infix), + map(Expression::parse_complex, Self::Expression), + )), + )(input) } fn context(&self) -> ParserContext { diff --git a/nemo/src/parser/ast/statement.rs b/nemo/src/parser/ast/statement.rs index f1b47dcf9..ac0a438e9 100644 --- a/nemo/src/parser/ast/statement.rs +++ b/nemo/src/parser/ast/statement.rs @@ -48,11 +48,14 @@ impl<'a> StatementKind<'a> { /// Parse the [StatementKind]. pub fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> { - alt(( - map(Directive::parse, Self::Directive), - map(Rule::parse, Self::Rule), - map(Guard::parse, Self::Fact), - ))(input) + context( + ParserContext::StatementKind, + alt(( + map(Directive::parse, Self::Directive), + map(Rule::parse, Self::Rule), + map(Guard::parse, Self::Fact), + )), + )(input) } } diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index 8573ef989..9913f6ebf 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -46,21 +46,36 @@ pub enum ParserContext { /// Base directive #[assoc(name = "base directive")] Base, + /// body of base directive + #[assoc(name = "base body")] + BaseBody, /// Declare directive #[assoc(name = "declare directive")] Declare, + /// body of declare directive + #[assoc(name = "declare body")] + DeclareBody, /// Export directive #[assoc(name = "export directive")] Export, + /// body of export directive + #[assoc(name = "export body")] + ExportBody, /// Import directive #[assoc(name = "import directive")] Import, + /// body of import directive + #[assoc(name = "import body")] + ImportBody, /// Output directive #[assoc(name = "output directive")] Output, /// Prefix directive #[assoc(name = "prefix directive")] Prefix, + /// body of prefix directive + #[assoc(name = "prefix body")] + PrefixBody, /// Unknown directive #[assoc(name = "unknown directive")] UnknownDirective, @@ -133,6 +148,9 @@ pub enum ParserContext { /// Statement #[assoc(name = "statement")] Statement, + /// Statement kind + #[assoc(name = "statement kind")] + StatementKind, /// Program #[assoc(name = "program")] Program, diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index 14aee9ef6..d6b925aeb 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -96,36 +96,41 @@ pub(crate) fn report_error<'a>( match &e { nom::Err::Incomplete(_) => (), nom::Err::Error(err) | nom::Err::Failure(err) => { - let error = match err { - GenericErrorTree::Base { location, .. } => ParserError { - position: CharacterPosition { - offset: location.span.location_offset(), - line: location.span.location_line(), - column: location.span.get_utf8_column() as u32, - }, - context: vec![], - }, - GenericErrorTree::Stack { - base: _base, - contexts, - } => ParserError { - position: CharacterPosition { - offset: contexts[0].0.span.location_offset(), - line: contexts[0].0.span.location_line(), - column: contexts[0].0.span.get_utf8_column() as u32, - }, - context: match contexts[0].1 { - StackContext::Kind(_) => todo!(), - StackContext::Context(ctx) => { - vec![ctx] - } - }, - }, - GenericErrorTree::Alt(_vec) => { - todo!() - } - }; - input.state.report_error(error); + // dbg!(&err); + // let error = _get_deepest_error(&err); + // dbg!(&error); + // let error = match err { + // GenericErrorTree::Base { location, .. } => ParserError { + // position: CharacterPosition { + // offset: location.span.location_offset(), + // line: location.span.location_line(), + // column: location.span.get_utf8_column() as u32, + // }, + // context: vec![], + // }, + // GenericErrorTree::Stack { + // base: _base, + // contexts, + // } => ParserError { + // position: CharacterPosition { + // offset: contexts[0].0.span.location_offset(), + // line: contexts[0].0.span.location_line(), + // column: contexts[0].0.span.get_utf8_column() as u32, + // }, + // context: match contexts[0].1 { + // StackContext::Kind(_) => todo!(), + // StackContext::Context(ctx) => { + // vec![ctx] + // } + // }, + // }, + // GenericErrorTree::Alt(_vec) => { + // todo!() + // } + // }; + for error in _get_deepest_error(&err) { + input.state.report_error(error); + } // let (_deepest_pos, errors) = get_deepest_errors(err); // for error in errors { // input.state.report_error(error); @@ -151,10 +156,13 @@ pub(crate) fn _transform_error_tree<'a, Output>( match &e { nom::Err::Incomplete(_) => (), nom::Err::Error(err) | nom::Err::Failure(err) => { - let (_deepest_pos, errors) = _get_deepest_errors(err); - for error in errors { - input.state.report_error(error); - } + let error = _get_deepest_error(err); + dbg!(error); + todo!() + // let (_deepest_pos, errors) = _get_deepest_errors(err); + // for error in errors { + // input.state.report_error(error); + // } } }; Err(e) @@ -174,88 +182,73 @@ fn _context_strs( .collect() } -fn _get_deepest_errors<'a>(e: &'a ParserErrorTree<'a>) -> (CharacterPosition, Vec) { +fn _get_deepest_error<'a>(e: &'a ParserErrorTree<'a>) -> Vec { match e { ParserErrorTree::Base { location, .. } => { let span = location.span; - let err_pos = CharacterPosition { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - }; - ( - err_pos, - vec![ParserError { - position: err_pos, - context: Vec::new(), - }], - ) + vec![ParserError { + position: CharacterPosition { + offset: span.location_offset(), + line: span.location_line(), + column: span.get_utf8_column() as u32, + }, + context: vec![], + }] } ParserErrorTree::Stack { base, contexts } => { - // let mut err_pos = Position::default(); - match &**base { - ParserErrorTree::Base { location, .. } => { - let span = location.span; - let err_pos = CharacterPosition { - offset: span.location_offset(), - line: span.location_line(), - column: span.get_utf8_column() as u32, - }; - let mut msg = String::from(""); - for (_, context) in contexts { - match context { - StackContext::Kind(_) => todo!(), - StackContext::Context(c) => { - if let ParserContext::Token { kind: t } = c { - msg.push_str(t.name()); - } - } - } - } - ( - err_pos, - vec![ParserError { - position: err_pos, - context: _context_strs(contexts), - }], - ) - } - ParserErrorTree::Stack { base, contexts } => { - let (pos, mut deepest_errors) = _get_deepest_errors(base); - let contexts = _context_strs(contexts); - for error in &mut deepest_errors { - error.context.append(&mut contexts.clone()); - } - (pos, deepest_errors) - } - ParserErrorTree::Alt(_error_tree) => { - let (pos, mut deepest_errors) = _get_deepest_errors(base); - let contexts = _context_strs(contexts); - for error in &mut deepest_errors { - error.context.append(&mut contexts.clone()); - } - (pos, deepest_errors) - } + let mut errors = _get_deepest_error(base); + if errors.len() == 1 { + let error = errors.get_mut(0).expect( + "get deepest error called on base should return a vec with one element", + ); + let mut context = vec![]; + context.append(&mut error.context); + context.append( + &mut contexts + .into_iter() + .map(|(_, stack_context)| match stack_context { + StackContext::Kind(_) => todo!("unclear when NomErrorKind will occur"), + StackContext::Context(cxt) => *cxt, + }) + .collect(), + ); + vec![ParserError { + position: error.position, + context, + }] + } else { + vec![ParserError { + position: errors[0].position, + context: contexts + .into_iter() + .map(|(_, stack_context)| match stack_context { + StackContext::Kind(_) => todo!("unclear when NomErrorKind will occur"), + StackContext::Context(cxt) => *cxt, + }) + .collect(), + }] } } ParserErrorTree::Alt(vec) => { - let mut return_vec: Vec = Vec::new(); - let mut deepest_pos = CharacterPosition::default(); + let mut farthest_pos = CharacterPosition::default(); + let mut farthest_errors = Vec::new(); for error in vec { - let (pos, mut deepest_errors) = _get_deepest_errors(error); - match pos.cmp(&deepest_pos) { - std::cmp::Ordering::Equal => { - return_vec.append(&mut deepest_errors); - } - std::cmp::Ordering::Greater => { - deepest_pos = pos; - return_vec.clear(); - return_vec.append(&mut deepest_errors); + let errors = _get_deepest_error(error); + for inner_error in errors { + if inner_error.position == farthest_pos { + farthest_errors.push(inner_error); + } else if inner_error.position > farthest_pos { + farthest_pos = inner_error.position; + farthest_errors.clear(); + farthest_errors.push(inner_error); } - std::cmp::Ordering::Less => {} } } - (deepest_pos, return_vec) + farthest_errors + // ParserError { + // position: farthest_pos, + // context: vec![], + // } } } } From a7c1bf0065ccdb9d3cc229b60aefb97723b031ca Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 23 Sep 2024 14:54:53 +0200 Subject: [PATCH 198/214] Parse parenthesized expressions --- nemo/src/parser/ast/expression.rs | 11 ++++- nemo/src/parser/ast/expression/complex.rs | 1 + .../ast/expression/complex/arithmetic.rs | 43 +++++++++++++++--- ...hesised_expression.rs => parenthesized.rs} | 44 +++++++++++-------- nemo/src/parser/context.rs | 6 +-- nemo/src/rule_model/translation/rule.rs | 3 ++ 6 files changed, 79 insertions(+), 29 deletions(-) rename nemo/src/parser/ast/expression/complex/{parenthesised_expression.rs => parenthesized.rs} (59%) diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index 12e0eb3b2..6ab521739 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -9,7 +9,7 @@ use basic::{ }; use complex::{ aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, map::Map, negation::Negation, - operation::Operation, tuple::Tuple, + operation::Operation, parenthesized::ParenthesizedExpression, tuple::Tuple, }; use nom::{branch::alt, combinator::map}; @@ -45,6 +45,8 @@ pub enum Expression<'a> { Number(Number<'a>), /// Operation Operation(Operation<'a>), + /// Parenthesized expression + Parenthesized(ParenthesizedExpression<'a>), /// Rdf literal RdfLiteral(RdfLiteral<'a>), /// String @@ -69,6 +71,7 @@ impl<'a> Expression<'a> { Expression::Number(expression) => expression.context(), Expression::Negation(expression) => expression.context(), Expression::Operation(expression) => expression.context(), + Expression::Parenthesized(expression) => expression.context(), Expression::RdfLiteral(expression) => expression.context(), Expression::String(expression) => expression.context(), Expression::Tuple(expression) => expression.context(), @@ -117,6 +120,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Expression::Number(expression) => expression, Expression::Negation(expression) => expression, Expression::Operation(expression) => expression, + Expression::Parenthesized(expression) => expression, Expression::RdfLiteral(expression) => expression, Expression::String(expression) => expression, Expression::Tuple(expression) => expression, @@ -136,6 +140,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Expression::Number(expression) => expression.span(), Expression::Negation(expression) => expression.span(), Expression::Operation(expression) => expression.span(), + Expression::Parenthesized(expression) => expression.span(), Expression::RdfLiteral(expression) => expression.span(), Expression::String(expression) => expression.span(), Expression::Tuple(expression) => expression.span(), @@ -151,6 +156,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { CONTEXT, alt(( map(Arithmetic::parse, Self::Arithmetic), + map(ParenthesizedExpression::parse, Self::Parenthesized), Self::parse_complex, Self::parse_basic, )), @@ -177,7 +183,7 @@ mod test { fn parse_expression() { let test = vec![ ("#sum(1 + POW(?x, 2), ?y, ?z)", ParserContext::Aggregation), - ("(1 + 2)", ParserContext::Arithmetic), + ("1 + 2", ParserContext::Arithmetic), ("test(?x, (1,), (1 + 2))", ParserContext::Atom), ("_:12", ParserContext::Blank), ("true", ParserContext::Boolean), @@ -186,6 +192,7 @@ mod test { ("12", ParserContext::Number), ("~test(1)", ParserContext::Negation), ("substr(\"string\", 1+?x)", ParserContext::Operation), + ("(int)", ParserContext::ParenthesizedExpression), ( "\"true\"^^", ParserContext::RdfLiteral, diff --git a/nemo/src/parser/ast/expression/complex.rs b/nemo/src/parser/ast/expression/complex.rs index 9ea488770..4bddd217e 100644 --- a/nemo/src/parser/ast/expression/complex.rs +++ b/nemo/src/parser/ast/expression/complex.rs @@ -7,4 +7,5 @@ pub mod infix; pub mod map; pub mod negation; pub mod operation; +pub mod parenthesized; pub mod tuple; diff --git a/nemo/src/parser/ast/expression/complex/arithmetic.rs b/nemo/src/parser/ast/expression/complex/arithmetic.rs index f1a9c682f..9284eb55d 100644 --- a/nemo/src/parser/ast/expression/complex/arithmetic.rs +++ b/nemo/src/parser/ast/expression/complex/arithmetic.rs @@ -25,6 +25,8 @@ use crate::parser::{ ParserResult, }; +use super::parenthesized::ParenthesizedExpression; + /// Types of arithmetic operations #[derive(Assoc, Debug, Copy, Clone, PartialEq, Eq)] #[func(pub fn token(token: TokenKind) -> Option)] @@ -146,23 +148,53 @@ impl<'a> ArithmeticChain<'a> { } impl<'a> Arithmetic<'a> { + /// Parse expression (not including arithmetic expressions). fn parse_non_arithmetic(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { alt((Expression::parse_complex, Expression::parse_basic))(input) } + /// Parse parenthesized non-arithmetic expressions. + fn parse_parenthesized_non_arithmetic( + input: ParserInput<'a>, + ) -> ParserResult<'a, Expression<'a>> { + let input_span = input.span; + delimited( + pair(Token::open_parenthesis, WSoC::parse), + Self::parse_non_arithmetic, + pair(WSoC::parse, Token::closed_parenthesis), + )(input) + .map(|(rest, expression)| { + let rest_span = rest.span; + ( + rest, + Expression::Parenthesized(ParenthesizedExpression::new( + input_span.until_rest(&rest_span), + expression, + )), + ) + }) + } + /// Parse an expression enclosed in parenthesis. fn parse_parenthesized_expression(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + let input_span = input.span; + delimited( pair(Token::open_parenthesis, WSoC::parse), Self::parse, pair(WSoC::parse, Token::closed_parenthesis), )(input) - .map(|(rest, arithmetic_expr)| (rest, Expression::Arithmetic(arithmetic_expr))) + .map(|(rest, mut arithmetic_expression)| { + arithmetic_expression.span = input_span.until_rest(&rest.span); + + (rest, Expression::Arithmetic(arithmetic_expression)) + }) } /// Parse factor. fn parse_factor(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { alt(( + Self::parse_parenthesized_non_arithmetic, Self::parse_non_arithmetic, Self::parse_parenthesized_expression, ))(input) @@ -275,15 +307,16 @@ mod test { let test = vec![ ("1 * 2", 2), ("1 * 2 * ?y", 3), - ("1 * (2 / ?y)", 3), // FIXME: Span has missing `)` + ("1 * (2 / ?y)", 3), ("(1 / 2) * ?y", 3), ("1 + 2", 2), ("1 + 2 + ?x", 3), - ("1 + 2 * (3 * ?y)", 4), // FIXME: This test produces weird spans - ("1 + (2 * 3) * ?y + 4", 5), // FIXME: Here the spans are also wrong + ("1 + 2 * (3 * ?y)", 4), + ("1 + (2 * 3) * ?y + 4", 5), ("1 + (2 * ((3 * ?y)))", 4), - ("1 + 2 * POW(3, 4)", 3), // FIXME: The same + ("1 + 2 * POW(3, 4)", 3), ("2 * (((18 + 3)))", 3), + ("1 + (2)", 2), ]; for (input, expected) in test { diff --git a/nemo/src/parser/ast/expression/complex/parenthesised_expression.rs b/nemo/src/parser/ast/expression/complex/parenthesized.rs similarity index 59% rename from nemo/src/parser/ast/expression/complex/parenthesised_expression.rs rename to nemo/src/parser/ast/expression/complex/parenthesized.rs index 85fe2ab43..1b8c94de8 100644 --- a/nemo/src/parser/ast/expression/complex/parenthesised_expression.rs +++ b/nemo/src/parser/ast/expression/complex/parenthesized.rs @@ -1,3 +1,5 @@ +//! This module defines [ParenthesizedExpression]. + use nom::sequence::{delimited, pair}; use crate::parser::{ @@ -8,34 +10,41 @@ use crate::parser::{ ParserResult, }; +/// An [Expression] enclosed in parenthesis #[derive(Debug)] -pub struct ParenthesisedExpression<'a> { +pub struct ParenthesizedExpression<'a> { + /// [Span] associated with this node span: Span<'a>, - expression: Expression<'a>, + /// The [Expression] + expression: Box>, } -impl<'a> ParenthesisedExpression<'a> { - pub fn expression(&self) -> &Expression { - &self.expression +impl<'a> ParenthesizedExpression<'a> { + /// Create a new [ParenthesizedExpression]. + pub(crate) fn new(span: Span<'a>, expression: Expression<'a>) -> Self { + Self { + span, + expression: Box::new(expression), + } } - pub fn parse_expression(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { - Self::parse(input).map(|(rest, paren_expr)| (rest, paren_expr.expression)) + /// Return the underlying expression. + pub fn expression(&self) -> &Expression { + &self.expression } } -const CONTEXT: ParserContext = ParserContext::ParenthesisedExpression; +const CONTEXT: ParserContext = ParserContext::ParenthesizedExpression; -impl<'a> ProgramAST<'a> for ParenthesisedExpression<'a> { - fn children(&'a self) -> Vec<&'a dyn ProgramAST> { - vec![&self.expression] +impl<'a> ProgramAST<'a> for ParenthesizedExpression<'a> { + fn children(&'a self) -> Vec<&'a dyn ProgramAST<'a>> { + vec![&*self.expression] } fn span(&self) -> Span<'a> { self.span } - /// Parse an expression enclosed in parenthesis. fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> where Self: Sized + 'a, @@ -50,9 +59,9 @@ impl<'a> ProgramAST<'a> for ParenthesisedExpression<'a> { let rest_span = rest.span; ( rest, - ParenthesisedExpression { + ParenthesizedExpression { span: input_span.until_rest(&rest_span), - expression, + expression: Box::new(expression), }, ) }) @@ -73,17 +82,14 @@ mod test { #[test] fn paren_expr() { - let test = ["(1 * 2)"]; + let test = ["(int)"]; for input in test { let parser_input = ParserInput::new(input, ParserState::default()); - let result = all_consuming(Expression::parse)(parser_input); + let result = all_consuming(ParenthesizedExpression::parse)(parser_input); dbg!(&result); assert!(result.is_ok()); - - // let result = result.unwrap(); - // assert_eq!(result.1.context_type(), expect); } } } diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index 9913f6ebf..c032b395a 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -88,9 +88,9 @@ pub enum ParserContext { /// Guard #[assoc(name = "expression")] // Guard seems like a technical name Guard, - /// Parenthesised expression - #[assoc(name = "parenthesised expression")] - ParenthesisedExpression, + /// Parenthesized expression + #[assoc(name = "expression")] + ParenthesizedExpression, /// Tuple #[assoc(name = "tuple")] Tuple, diff --git a/nemo/src/rule_model/translation/rule.rs b/nemo/src/rule_model/translation/rule.rs index ea894e4de..704820145 100644 --- a/nemo/src/rule_model/translation/rule.rs +++ b/nemo/src/rule_model/translation/rule.rs @@ -161,6 +161,9 @@ impl<'a> ASTProgramTranslation<'a> { negation.span(), TranslationErrorKind::InnerExpressionNegation, )), + ast::expression::Expression::Parenthesized(parenthesized) => { + self.build_inner_term(parenthesized.expression()) + } }? .set_origin(self.register_node(expression))) } From 26cf09212a60b481549cc50b361529ba0783fa7f Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 23 Sep 2024 15:41:15 +0200 Subject: [PATCH 199/214] Remove empty token parsing from string expression parsing --- nemo/src/parser/ast/directive/prefix.rs | 1 + nemo/src/parser/ast/expression/basic/string.rs | 7 +------ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/nemo/src/parser/ast/directive/prefix.rs b/nemo/src/parser/ast/directive/prefix.rs index ff44d42f4..9accc70a9 100644 --- a/nemo/src/parser/ast/directive/prefix.rs +++ b/nemo/src/parser/ast/directive/prefix.rs @@ -41,6 +41,7 @@ impl<'a> Prefix<'a> { &self.iri } + /// Parse the main part of this directive. pub fn parse_body(input: ParserInput<'a>) -> ParserResult<'a, (Token<'a>, Iri<'a>)> { context( ParserContext::PrefixBody, diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs index d3478faa7..22c9db92d 100644 --- a/nemo/src/parser/ast/expression/basic/string.rs +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -2,7 +2,6 @@ #![allow(missing_docs)] use nom::{ - branch::alt, combinator::opt, sequence::{delimited, pair}, }; @@ -40,11 +39,7 @@ impl<'a> StringLiteral<'a> { /// Parse the main part of the string. pub fn parse_string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { - delimited( - Token::quote, - alt((Token::string, Token::empty)), - Token::quote, - )(input) + delimited(Token::quote, Token::string, Token::quote)(input) } /// Parse the language tag of the string. From 9ec0b303c508f22f37b2f2402b09f228c0cbaa3d Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 23 Sep 2024 16:02:48 +0200 Subject: [PATCH 200/214] Add test case for boolean builtin in rule body --- resources/testcases/arithmetic/filter.rls | 3 +++ resources/testcases/arithmetic/filter/r4.csv | 2 ++ 2 files changed, 5 insertions(+) create mode 100644 resources/testcases/arithmetic/filter/r4.csv diff --git a/resources/testcases/arithmetic/filter.rls b/resources/testcases/arithmetic/filter.rls index 64ab28dd7..f1c662191 100644 --- a/resources/testcases/arithmetic/filter.rls +++ b/resources/testcases/arithmetic/filter.rls @@ -11,7 +11,10 @@ r2(?x) :- a(?x, ?y), 2 / ?y != 1. r3(?x) :- a(?x, ?y), ?z = ?x + ?y, ?z = 4. +r4(?x, ?y) :- a(?x, ?y), OR(NUMGREATER(?x, 2), EQUALITY(?y, 0)). + @export r0 :- csv {}. @export r1 :- csv {}. @export r2 :- csv {}. @export r3 :- csv {}. +@export r4 :- csv {}. diff --git a/resources/testcases/arithmetic/filter/r4.csv b/resources/testcases/arithmetic/filter/r4.csv new file mode 100644 index 000000000..1e15c9994 --- /dev/null +++ b/resources/testcases/arithmetic/filter/r4.csv @@ -0,0 +1,2 @@ +1,0 +3,1 \ No newline at end of file From be6d41336f9b672da54b87a909410b6cc1bf3390 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 23 Sep 2024 17:38:49 +0200 Subject: [PATCH 201/214] Add validation errors relating to aggregates and rules without positive literals --- nemo/src/rule_model/components/rule.rs | 120 +++++++++++++----- .../rule_model/components/term/aggregate.rs | 34 +++++ nemo/src/rule_model/error/validation_error.rs | 41 ++++-- 3 files changed, 150 insertions(+), 45 deletions(-) diff --git a/nemo/src/rule_model/components/rule.rs b/nemo/src/rule_model/components/rule.rs index ca5ef67c9..30b4051ed 100644 --- a/nemo/src/rule_model/components/rule.rs +++ b/nemo/src/rule_model/components/rule.rs @@ -73,6 +73,14 @@ impl Rule { &self.body } + /// Return an iterator over the positive literals in the body of this rule. + pub fn body_positive(&self) -> impl Iterator { + self.body.iter().filter_map(|literal| match literal { + Literal::Positive(atom) => Some(atom), + Literal::Negative(_) | Literal::Operation(_) => None, + }) + } + /// Return a mutable reference to the body of the rule. pub fn body_mut(&mut self) -> &mut Vec { &mut self.body @@ -143,18 +151,41 @@ impl Rule { /// Check if /// * are no complex terms occurring in the head /// * an aggregate occurs at most once - fn validate_term_head(builder: &mut ValidationErrorBuilder, term: &Term) -> Result { + /// * there is no aggregation over a group-by variable + fn validate_term_head( + builder: &mut ValidationErrorBuilder, + term: &Term, + group_by_variable: &HashSet<&Variable>, + ) -> Option { if term.is_map() || term.is_tuple() || term.is_function() { builder .report_error(*term.origin(), ValidationErrorKind::UnsupportedComplexTerm) .add_hint_option(Self::hint_term_operation(term)); - return Err(()); + return None; } - let mut first_aggregate = term.is_aggregate(); + let mut first_aggregate = if let Term::Aggregate(aggregate) = term { + if let Term::Primitive(Primitive::Variable(aggregate_variable)) = + aggregate.aggregate_term() + { + if group_by_variable.contains(aggregate_variable) { + builder.report_error( + *aggregate.aggregate_term().origin(), + ValidationErrorKind::AggregateOverGroupByVariable { + variable: aggregate_variable.name().unwrap_or_default(), + }, + ); + return None; + } + } + + true + } else { + false + }; for subterm in term.arguments() { - let contains_aggregate = Self::validate_term_head(builder, subterm)?; + let contains_aggregate = Self::validate_term_head(builder, subterm, group_by_variable)?; if contains_aggregate && first_aggregate { builder.report_error( @@ -162,13 +193,13 @@ impl Rule { ValidationErrorKind::UnsupportedAggregateMultiple, ); - return Err(()); + return None; } first_aggregate |= contains_aggregate; } - Ok(first_aggregate) + Some(first_aggregate) } /// Check if @@ -181,18 +212,18 @@ impl Rule { builder: &mut ValidationErrorBuilder, term: &Term, safe_variables: &HashSet<&Variable>, - ) -> Result<(), ()> { + ) -> Option<()> { if let Term::Primitive(Primitive::Variable(Variable::Existential(existential))) = term { builder.report_error( *existential.origin(), ValidationErrorKind::BodyExistential(Variable::Existential(existential.clone())), ); - return Err(()); + return None; } if term.is_aggregate() { builder.report_error(*term.origin(), ValidationErrorKind::BodyAggregate); - return Err(()); + return None; } if term.is_operation() { @@ -202,7 +233,7 @@ impl Rule { *operation_variable.origin(), ValidationErrorKind::OperationAnonymous, ); - return Err(()); + return None; } if !safe_variables.contains(operation_variable) { @@ -210,7 +241,7 @@ impl Rule { *operation_variable.origin(), ValidationErrorKind::OperationUnsafe(operation_variable.clone()), ); - return Err(()); + return None; } } } @@ -219,14 +250,14 @@ impl Rule { builder .report_error(*term.origin(), ValidationErrorKind::UnsupportedComplexTerm) .add_hint_option(Self::hint_term_operation(term)); - return Err(()); + return None; } for subterm in term.arguments() { Self::validate_term_body(builder, subterm, safe_variables)?; } - Ok(()) + Some(()) } /// If the given [Term] is a function term, @@ -306,6 +337,14 @@ impl ProgramComponent for Rule { where Self: Sized, { + if self.body_positive().next().is_none() { + builder.report_error( + self.origin, + ValidationErrorKind::UnsupportedNoPositiveLiterals, + ); + return None; + } + let safe_variables = self.safe_variables(); let is_existential = self .head() @@ -316,27 +355,6 @@ impl ProgramComponent for Rule { for atom in self.head() { atom.validate(builder)?; - let mut contains_aggregate = false; - for term in atom.arguments() { - if let Ok(aggregate) = Self::validate_term_head(builder, term) { - if aggregate && contains_aggregate { - builder.report_error( - *term.origin(), - ValidationErrorKind::UnsupportedAggregateMultiple, - ); - } - - if aggregate && is_existential { - builder.report_error( - *term.origin(), - ValidationErrorKind::UnsupportedAggregatesAndExistentials, - ); - } - - contains_aggregate |= aggregate; - } - } - for variable in atom.variables() { if let Some(variable_name) = variable.name() { if !variable.is_existential() && !safe_variables.contains(variable) { @@ -358,6 +376,40 @@ impl ProgramComponent for Rule { return None; } } + + let group_by_variables = atom + .arguments() + .flat_map(|term| { + if let Term::Primitive(Primitive::Variable(variable)) = term { + Some(variable) + } else { + None + } + }) + .collect::>(); + + let mut contains_aggregate = false; + for term in atom.arguments() { + if let Some(aggregate) = + Self::validate_term_head(builder, term, &group_by_variables) + { + if aggregate && contains_aggregate { + builder.report_error( + *term.origin(), + ValidationErrorKind::UnsupportedAggregateMultiple, + ); + } + + if aggregate && is_existential { + builder.report_error( + *term.origin(), + ValidationErrorKind::UnsupportedAggregatesAndExistentials, + ); + } + + contains_aggregate |= aggregate; + } + } } let mut negative_variables = HashSet::<&Variable>::new(); diff --git a/nemo/src/rule_model/components/term/aggregate.rs b/nemo/src/rule_model/components/term/aggregate.rs index 4e5931aae..f225e47af 100644 --- a/nemo/src/rule_model/components/term/aggregate.rs +++ b/nemo/src/rule_model/components/term/aggregate.rs @@ -2,6 +2,7 @@ #![allow(missing_docs)] use std::{ + collections::HashSet, fmt::{Display, Write}, hash::Hash, }; @@ -254,6 +255,39 @@ impl ProgramComponent for Aggregate { } } + let mut distinct_set = HashSet::new(); + for variable in &self.distinct { + let name = if variable.is_universal() { + if let Some(name) = variable.name() { + name + } else { + builder.report_error( + *variable.origin(), + ValidationErrorKind::AggregateDistinctNonNamedUniversal { + variable_type: String::from("anonymous"), + }, + ); + return None; + } + } else { + builder.report_error( + *variable.origin(), + ValidationErrorKind::AggregateDistinctNonNamedUniversal { + variable_type: String::from("existential"), + }, + ); + return None; + }; + + if !distinct_set.insert(variable) { + builder.report_error( + *variable.origin(), + ValidationErrorKind::AggregateRepeatedDistinctVariable { variable: name }, + ); + return None; + } + } + Some(()) } diff --git a/nemo/src/rule_model/error/validation_error.rs b/nemo/src/rule_model/error/validation_error.rs index 37337d553..9d9b29c8e 100644 --- a/nemo/src/rule_model/error/validation_error.rs +++ b/nemo/src/rule_model/error/validation_error.rs @@ -68,37 +68,52 @@ pub enum ValidationErrorKind { #[error(r#"used aggregate term of type `{found}`, expected `{expected}`"#)] #[assoc(code = 212)] AggregateInvalidValueType { found: String, expected: String }, + /// Aggregate has repeated distinct variables + #[error(r#"found repeated variable: `{variable}`"#)] + #[assoc(code = 213)] + #[assoc(note = "variables marked as distinct must not be repeated")] + AggregateRepeatedDistinctVariable { variable: String }, + /// Aggregate variable cannot be group-by-variable + #[error(r#"aggregation over group-by variable: `{variable}`"#)] + #[assoc(code = 214)] + #[assoc(note = "cannot aggregate over a variable that is also a group-by variable")] + AggregateOverGroupByVariable { variable: String }, + /// Distinct variables in aggregate must be named universal variables + #[error(r#"aggregation marks {variable_type} as distinct."#)] + #[assoc(code = 215)] + #[assoc(note = "distinct variables must be named universal variables")] + AggregateDistinctNonNamedUniversal { variable_type: String }, /// Empty function term #[error(r#"function term without arguments"#)] - #[assoc(code = 213)] + #[assoc(code = 216)] FunctionTermEmpty, /// Wrong number of arguments for function #[error(r#"operation used with {used} arguments, expected {expected}"#)] - #[assoc(code = 214)] + #[assoc(code = 217)] OperationArgumentNumber { used: usize, expected: String }, /// Anonymous variable used in operation #[error(r#"anonymous variable used in operation"#)] - #[assoc(code = 215)] + #[assoc(code = 218)] OperationAnonymous, /// Inconsistent arities for predicates #[error(r#"predicate {predicate} used with multiple arities."#)] - #[assoc(code = 216)] + #[assoc(code = 219)] #[assoc(note = "each predicate is only allowed to have one arity")] InconsistentArities { predicate: String }, /// Import/Export: Missing required attribute #[error(r#"missing required parameter `{attribute}` in {direction} statement"#)] - #[assoc(code = 217)] + #[assoc(code = 220)] ImportExportMissingRequiredAttribute { attribute: String, direction: String, }, /// Import/Export: Unrecognized parameter #[error(r#"file format {format} does not recognize parameter `{attribute}`"#)] - #[assoc(code = 218)] + #[assoc(code = 221)] ImportExportUnrecognizedAttribute { format: String, attribute: String }, /// Import/Export: wrong input type for resource attribute #[error(r#"parameter `{parameter}` was given as a `{given}`, expected `{expected}`"#)] - #[assoc(code = 219)] + #[assoc(code = 222)] ImportExportAttributeValueType { parameter: String, given: String, @@ -106,19 +121,19 @@ pub enum ValidationErrorKind { }, /// Import/Export: dsv wrong value format #[error(r#"unknown {file_format} value format"#)] - #[assoc(code = 220)] + #[assoc(code = 223)] ImportExportValueFormat { file_format: String }, /// Import/Export: negative limit #[error(r#"limit was negative"#)] - #[assoc(code = 221)] + #[assoc(code = 224)] ImportExportLimitNegative, /// Import/Export: delimiter #[error(r#"delimiter must be a single character"#)] - #[assoc(code = 222)] + #[assoc(code = 225)] ImportExportDelimiter, /// Import/Export: unknown compression format #[error(r#"unknown compression format `{format}`"#)] - #[assoc(code = 223)] + #[assoc(code = 226)] ImportExportUnknownCompression { format: String }, /// Unsupported feature: Multiple aggregates in one rule @@ -141,4 +156,8 @@ pub enum ValidationErrorKind { #[error(r#"exporting in json is currently unsupported"#)] #[assoc(code = 995)] UnsupportedJsonExport, + /// Unsupported feature: Rules without positive literals + #[error(r#"rule without positive literals are currently unsupported"#)] + #[assoc(code = 994)] + UnsupportedNoPositiveLiterals, } From 8b1dcced9dd76ca4ee522342409682ec4ae583b4 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 23 Sep 2024 17:39:04 +0200 Subject: [PATCH 202/214] Adjust testcase for new validation errors --- .../regression/planning_engine/independent_head/run.rls | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/testcases/regression/planning_engine/independent_head/run.rls b/resources/testcases/regression/planning_engine/independent_head/run.rls index 7b396c897..69da7cc0a 100644 --- a/resources/testcases/regression/planning_engine/independent_head/run.rls +++ b/resources/testcases/regression/planning_engine/independent_head/run.rls @@ -3,9 +3,10 @@ %%% %%% TODO: Handle missing rule +b(0). b(1). -a(1) :- ~b(1). +a(1) :- b(0), ~b(1). % a(2) :- ~b(2). @export a :- csv {}. From 8b21a7c0c7df5a3ca92b84cf75eff2fcecd789fb Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 23 Sep 2024 17:54:59 +0200 Subject: [PATCH 203/214] Clippy --- nemo/src/parser/error.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index d6b925aeb..3fcbd4401 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -128,7 +128,7 @@ pub(crate) fn report_error<'a>( // todo!() // } // }; - for error in _get_deepest_error(&err) { + for error in _get_deepest_error(err) { input.state.report_error(error); } // let (_deepest_pos, errors) = get_deepest_errors(err); @@ -205,7 +205,7 @@ fn _get_deepest_error<'a>(e: &'a ParserErrorTree<'a>) -> Vec { context.append(&mut error.context); context.append( &mut contexts - .into_iter() + .iter() .map(|(_, stack_context)| match stack_context { StackContext::Kind(_) => todo!("unclear when NomErrorKind will occur"), StackContext::Context(cxt) => *cxt, @@ -220,7 +220,7 @@ fn _get_deepest_error<'a>(e: &'a ParserErrorTree<'a>) -> Vec { vec![ParserError { position: errors[0].position, context: contexts - .into_iter() + .iter() .map(|(_, stack_context)| match stack_context { StackContext::Kind(_) => todo!("unclear when NomErrorKind will occur"), StackContext::Context(cxt) => *cxt, @@ -235,12 +235,14 @@ fn _get_deepest_error<'a>(e: &'a ParserErrorTree<'a>) -> Vec { for error in vec { let errors = _get_deepest_error(error); for inner_error in errors { - if inner_error.position == farthest_pos { - farthest_errors.push(inner_error); - } else if inner_error.position > farthest_pos { - farthest_pos = inner_error.position; - farthest_errors.clear(); - farthest_errors.push(inner_error); + match inner_error.position.cmp(&farthest_pos) { + std::cmp::Ordering::Equal => farthest_errors.push(inner_error), + std::cmp::Ordering::Greater => { + farthest_pos = inner_error.position; + farthest_errors.clear(); + farthest_errors.push(inner_error); + } + std::cmp::Ordering::Less => {} } } } From bc7d957ffa34b06dae85fcfc72c11bcdac4425ff Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 24 Sep 2024 11:46:18 +0200 Subject: [PATCH 204/214] Fix wrong error position reporting bug (#522) --- nemo/src/rule_model/error.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 5e2a0f6f4..a83245334 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -11,7 +11,7 @@ use std::{ ops::Range, }; -use ariadne::{Color, Label, ReportBuilder}; +use ariadne::{Color, Config, Label, ReportBuilder}; use hint::Hint; use translation_error::TranslationErrorKind; use validation_error::ValidationErrorKind; @@ -344,9 +344,11 @@ impl ProgramError { where Translation: Fn(&Origin) -> Range, { + let config = Config::default().with_index_type(ariadne::IndexType::Byte); report = report .with_code(self.error_code()) - .with_message(self.message()); + .with_message(self.message()) + .with_config(config); if let Some(note) = self.note() { report = report.with_note(note); From 28c93d8664fde4aaa53a2df4db4503e14499c2d6 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Tue, 24 Sep 2024 13:43:18 +0200 Subject: [PATCH 205/214] Add missing ariadne config to ReportBuilder (#522) --- nemo/src/parser.rs | 3 ++- nemo/src/rule_model/error.rs | 12 +++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index 922409d94..0e84988c3 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -8,7 +8,7 @@ pub mod span; use std::{cell::RefCell, ops::Range, rc::Rc}; -use ariadne::{Color, Label, Report, ReportKind, Source}; +use ariadne::{Color, Config, Label, Report, ReportKind, Source}; use ast::{program::Program, ProgramAST}; use error::{ParserError, ParserErrorTree}; use input::ParserInput; @@ -82,6 +82,7 @@ impl<'a> ParserErrorReport<'a> { .with_message(message) .with_color(Color::Red), ) + .with_config(Config::default().with_index_type(ariadne::IndexType::Byte)) .finish() }) } diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index a83245334..1fb2d00a2 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -149,11 +149,13 @@ where ComplexErrorLabelKind::Information => Color::BrightBlue, }; - report = report.with_label( - Label::new((source_label.clone(), translation(&label.reference))) - .with_message(label.message.clone()) - .with_color(color), - ); + report = report + .with_label( + Label::new((source_label.clone(), translation(&label.reference))) + .with_message(label.message.clone()) + .with_color(color), + ) + .with_config(Config::default().with_index_type(ariadne::IndexType::Byte)); } for hint in &self.hints { From 3777c47436a2047a389194a5413ad7a543d1d8c4 Mon Sep 17 00:00:00 2001 From: monsterkrampe Date: Tue, 24 Sep 2024 14:17:51 +0200 Subject: [PATCH 206/214] LSP: Mark whole error range Fixes #538 --- nemo-language-server/src/language_server.rs | 24 +++++++++--------- nemo/src/parser/span.rs | 27 ++++++++++++++++++++- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 2b12d38bc..11f1790b1 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -14,7 +14,7 @@ use lsp_component::LSPComponent; use nemo::parser::ast::program::Program; use nemo::parser::ast::ProgramAST; use nemo::parser::context::ParserContext; -use nemo::parser::span::CharacterPosition; +use nemo::parser::span::{CharacterPosition, CharacterRange}; use nemo::parser::{Parser, ParserErrorReport}; use nemo_position::{ lsp_position_to_nemo_position, nemo_range_to_lsp_range, PositionConversionError, @@ -118,13 +118,13 @@ impl Backend { }); // Group errors by position and deduplicate error - let mut errors_by_posision: BTreeMap> = BTreeMap::new(); + let mut errors_by_posision: BTreeMap> = BTreeMap::new(); for error in parse_errors.iter().flat_map(|report| report.errors()) { - if let Some(set) = errors_by_posision.get_mut(&error.position) { + if let Some(set) = errors_by_posision.get_mut(&CharacterRange::from(error.position)) { set.insert(format!("expected `{}`", error.context[0].name())); } else { errors_by_posision.insert( - error.position, + CharacterRange::from(error.position), std::iter::once(format!("expected `{}`", error.context[0].name())).collect(), ); }; @@ -138,7 +138,7 @@ impl Backend { .get(origin) .map(|node| node.span().range()) }); - let Some(position) = range_opt.map(|r| r.start) else { + let Some(range) = range_opt else { continue; }; @@ -156,33 +156,33 @@ impl Backend { .collect::(), ); - if let Some(set) = errors_by_posision.get_mut(&position) { + if let Some(set) = errors_by_posision.get_mut(&range) { set.insert(message); } else { - errors_by_posision.insert(position, std::iter::once(message).collect()); + errors_by_posision.insert(range, std::iter::once(message).collect()); }; } } let diagnostics = errors_by_posision .into_iter() - .map(|(pos, error_set)| { + .map(|(range, error_set)| { Ok(Diagnostic { message: error_set.into_iter().collect::>().join("\n\n"), range: Range::new( line_col_to_lsp_position( &line_index, LineCol { - line: pos.line - 1, - col: pos.column - 1, + line: range.start.line - 1, + col: range.start.column - 1, }, ) .unwrap(), line_col_to_lsp_position( &line_index, LineCol { - line: pos.line - 1, - col: pos.column - 1 + 1, + line: range.end.line - 1, + col: range.end.column - 1, }, ) .unwrap(), diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 8ad459e8a..818449557 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -44,7 +44,7 @@ impl Ord for CharacterPosition { } /// Describes a region of text with [CharacterPosition]s -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct CharacterRange { /// Start position pub start: CharacterPosition, @@ -59,6 +59,31 @@ impl CharacterRange { } } +impl Ord for CharacterRange { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.start.cmp(&other.start).then(self.end.cmp(&other.end)) + } +} + +impl PartialOrd for CharacterRange { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl From for CharacterRange { + fn from(pos: CharacterPosition) -> Self { + CharacterRange { + start: pos, + end: CharacterPosition { + offset: pos.offset + 1, + line: pos.line, + column: pos.column + 1, + }, + } + } +} + /// Maker for a region of text within a string slice #[derive(Debug, Clone, Copy)] pub struct Span<'a> { From 9a036d1366593bfb5487caf9de1c49eb55a012d5 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 25 Sep 2024 09:31:37 +0200 Subject: [PATCH 207/214] Add testcase releated to 443 --- .../regression/planning_engine/arithmetic_chain/run.rls | 9 +++++++++ .../planning_engine/arithmetic_chain/run/C.csv | 1 + 2 files changed, 10 insertions(+) create mode 100644 resources/testcases/regression/planning_engine/arithmetic_chain/run.rls create mode 100644 resources/testcases/regression/planning_engine/arithmetic_chain/run/C.csv diff --git a/resources/testcases/regression/planning_engine/arithmetic_chain/run.rls b/resources/testcases/regression/planning_engine/arithmetic_chain/run.rls new file mode 100644 index 000000000..467ea16fb --- /dev/null +++ b/resources/testcases/regression/planning_engine/arithmetic_chain/run.rls @@ -0,0 +1,9 @@ +%! Test related to +%! https://github.com/knowsys/nemo/issues/443 + +A(1, 2). +B(1, 10). + +C(?x, ?y * 5) :- A(?x, ?z), B(?x, ?v), ?y = ?z + ?v, ?y > 10 . + +@export C :- csv {}. \ No newline at end of file diff --git a/resources/testcases/regression/planning_engine/arithmetic_chain/run/C.csv b/resources/testcases/regression/planning_engine/arithmetic_chain/run/C.csv new file mode 100644 index 000000000..af9c9e975 --- /dev/null +++ b/resources/testcases/regression/planning_engine/arithmetic_chain/run/C.csv @@ -0,0 +1 @@ +1,60 \ No newline at end of file From b9641584d7716966f92f0c9d55ba96fc07e15ad1 Mon Sep 17 00:00:00 2001 From: Jakob Steinberg Date: Fri, 27 Sep 2024 12:12:29 +0200 Subject: [PATCH 208/214] Replace ANSI escape codes with methods from colored crate --- Cargo.lock | 1 + nemo/Cargo.toml | 1 + nemo/src/parser/ast.rs | 17 +++++++++-------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 96aae21f4..861bebd8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1470,6 +1470,7 @@ dependencies = [ "ascii_tree", "assert_fs", "bytecount", + "colored", "csv", "dyn-clone", "enum-assoc", diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index 6ac85cd79..d4b4e9c18 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -52,6 +52,7 @@ strum = "0.26.3" strum_macros = "0.26.4" similar-string = "1.4.3" bytecount = "0.6.8" +colored = "2" [dev-dependencies] env_logger = "*" diff --git a/nemo/src/parser/ast.rs b/nemo/src/parser/ast.rs index cc1f3eb06..5f332cd1d 100644 --- a/nemo/src/parser/ast.rs +++ b/nemo/src/parser/ast.rs @@ -16,6 +16,7 @@ use std::fmt::Debug; use super::{context::ParserContext, span::Span, ParserInput, ParserResult}; use ascii_tree::Tree; +use colored::Colorize; /// Trait implemented by nodes in the abstract syntax tree pub trait ProgramAST<'a>: Debug + Sync { @@ -39,11 +40,6 @@ pub(crate) fn ast_to_ascii_tree<'a>(node: &'a dyn ProgramAST<'a>) -> Tree { for child in node.children() { vec.push(ast_to_ascii_tree(child)); } - let colour = if node.children().is_empty() { - "\x1b[91m" - } else { - "\x1b[92m" - }; let span = node.span(); let str = if span.fragment().len() > 60 { format!("{:?}[…]", &span.fragment()[0..60]) @@ -52,10 +48,15 @@ pub(crate) fn ast_to_ascii_tree<'a>(node: &'a dyn ProgramAST<'a>) -> Tree { }; Tree::Node( format!( - "{} \x1b[34m@{}:{} {colour}{str}\x1b[0m", + "{} @{}:{} {}", node.context().name(), - node.span().location_line(), - node.span().get_utf8_column() + node.span().location_line().to_string().blue(), + node.span().get_utf8_column(), + if node.children().is_empty() { + str.bright_red() + } else { + str.bright_green() + } ), vec, ) From 2f84e04fe4c3fd7911b86706385c3d6619b510c3 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 30 Sep 2024 15:28:28 +0200 Subject: [PATCH 209/214] Fix parser crash when error was pointing to last character --- nemo/src/parser/span.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/src/parser/span.rs b/nemo/src/parser/span.rs index 818449557..efbb51819 100644 --- a/nemo/src/parser/span.rs +++ b/nemo/src/parser/span.rs @@ -19,9 +19,9 @@ pub struct CharacterPosition { } impl CharacterPosition { - /// Return a one character range at this position + /// Return a zero character range at this position pub fn range(&self) -> Range { - self.offset..(self.offset + 1) + self.offset..self.offset } } From 9fa5af18d97733ed614625755b4f9b5657ad9beb Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 30 Sep 2024 15:37:22 +0200 Subject: [PATCH 210/214] Clippy --- nemo-physical/src/columnar/columnscan.rs | 2 +- nemo-physical/src/datavalues/integer_datavalues.rs | 2 +- nemo-physical/src/function/definitions/string.rs | 2 +- nemo-physical/src/meta/timing.rs | 2 +- nemo-physical/src/util/mapping/ordered_choice.rs | 1 - nemo/src/rule_model/components.rs | 1 - 6 files changed, 4 insertions(+), 6 deletions(-) diff --git a/nemo-physical/src/columnar/columnscan.rs b/nemo-physical/src/columnar/columnscan.rs index dbea9bd8e..baa4fe3db 100644 --- a/nemo-physical/src/columnar/columnscan.rs +++ b/nemo-physical/src/columnar/columnscan.rs @@ -80,7 +80,7 @@ where Union(ColumnScanUnion<'a, T>), } -/// The following impl statements allow converting from a specific [ColumnScan] into a gerneral [ColumnScanEnum] +// The following impl statements allow converting from a specific [ColumnScan] into a general [ColumnScanEnum] impl<'a, T> From> for ColumnScanEnum<'a, T> where diff --git a/nemo-physical/src/datavalues/integer_datavalues.rs b/nemo-physical/src/datavalues/integer_datavalues.rs index e02e16b06..89ed6609d 100644 --- a/nemo-physical/src/datavalues/integer_datavalues.rs +++ b/nemo-physical/src/datavalues/integer_datavalues.rs @@ -20,7 +20,7 @@ const U32MAX_AS_I64: i64 = 0xFFFF_FFFF; const I32MAX_AS_I64: i64 = U32MAX_AS_I64 >> 1; /// Minimal value of an i32, i.e. -2^31. const I32MIN_AS_I64: i64 = -I32MAX_AS_I64 - 1; -/// Minimal value of an i64, i.e. -2^63. +// /// Minimal value of an i64, i.e. -2^63. //const I64MIN_AS_I64: i64 = i64::MIN; /// Physical representation of an integer as an u64. diff --git a/nemo-physical/src/function/definitions/string.rs b/nemo-physical/src/function/definitions/string.rs index 5851b1f6e..b4ffff62c 100644 --- a/nemo-physical/src/function/definitions/string.rs +++ b/nemo-physical/src/function/definitions/string.rs @@ -17,7 +17,7 @@ use super::{ /// /// Returns the index of the first occurrence of the needle in the haystack /// or `None` if the needle is not found. -fn unicode_find(haystack: &String, needle: &String) -> Option { +fn unicode_find(haystack: &str, needle: &str) -> Option { let haystack_graphemes = haystack.graphemes(true).collect::>(); let needle_graphemes = needle.graphemes(true).collect::>(); if needle_graphemes.len() > haystack_graphemes.len() { diff --git a/nemo-physical/src/meta/timing.rs b/nemo-physical/src/meta/timing.rs index 37aac1d7a..f2b6f7209 100644 --- a/nemo-physical/src/meta/timing.rs +++ b/nemo-physical/src/meta/timing.rs @@ -255,7 +255,7 @@ impl TimedCode { } /// Turns e.g. (Test, 0.642355,1234,56) into "Test [64.2%, 1234ms, 56x]" - fn format_title(title: &String, percentage: f64, msecs: u128, runs: u64) -> String { + fn format_title(title: &str, percentage: f64, msecs: u128, runs: u64) -> String { let result = format!("{title} [{percentage:.1}%, {msecs}ms, {runs}x]"); result } diff --git a/nemo-physical/src/util/mapping/ordered_choice.rs b/nemo-physical/src/util/mapping/ordered_choice.rs index 72bcf8446..bf791d7bc 100644 --- a/nemo-physical/src/util/mapping/ordered_choice.rs +++ b/nemo-physical/src/util/mapping/ordered_choice.rs @@ -153,7 +153,6 @@ impl Display for SortedChoice { #[cfg(test)] impl SortedChoice { /// Return an instance of the function from a vector representation where the input `vec[i]` is mapped to `i`. - pub(crate) fn from_vector(vec: Vec, domain_size: usize) -> Self { let mut map = HashMap::::new(); for (value, input) in vec.into_iter().enumerate() { diff --git a/nemo/src/rule_model/components.rs b/nemo/src/rule_model/components.rs index fedebebd4..af2e333ac 100644 --- a/nemo/src/rule_model/components.rs +++ b/nemo/src/rule_model/components.rs @@ -3,7 +3,6 @@ #![allow(missing_docs)] #[macro_use] - pub mod atom; pub mod datatype; pub mod fact; From 03f3a2afa0d6510511acb0118d639dda043ef526 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 1 Oct 2024 11:08:51 +0200 Subject: [PATCH 211/214] Print pretty error messges in nemo-web output console --- Cargo.lock | 100 +++++++++++++++++++- nemo-cli/src/main.rs | 4 +- nemo-language-server/src/language_server.rs | 4 +- nemo-wasm/src/lib.rs | 80 ++++++++-------- nemo/Cargo.toml | 2 +- nemo/src/parser.rs | 47 +++++++-- nemo/src/parser/error.rs | 9 ++ nemo/src/rule_model/error.rs | 6 -- nemo/src/rule_model/translation.rs | 46 +++++++-- 9 files changed, 228 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 861bebd8a..96e9b96bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -123,6 +123,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44055e597c674aef7cb903b2b9f6e4cba1277ed0d2d61dae7cd52d7ffa81f8e2" dependencies = [ + "concolor", "unicode-width", "yansi", ] @@ -423,6 +424,26 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "concolor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b946244a988c390a94667ae0e3958411fa40cc46ea496a929b263d883f5f9c3" +dependencies = [ + "bitflags 1.3.2", + "concolor-query", + "is-terminal", +] + +[[package]] +name = "concolor-query" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" +dependencies = [ + "windows-sys 0.45.0", +] + [[package]] name = "console_error_panic_hook" version = "0.1.7" @@ -1259,6 +1280,17 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi 0.3.9", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.0" @@ -3121,6 +3153,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -3139,6 +3180,21 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -3170,6 +3226,12 @@ dependencies = [ "windows_x86_64_msvc 0.52.5", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -3182,6 +3244,12 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3194,6 +3262,12 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3212,6 +3286,12 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -3224,6 +3304,12 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -3236,6 +3322,12 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -3248,6 +3340,12 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/nemo-cli/src/main.rs b/nemo-cli/src/main.rs index 6f4fe512e..7d2ce78e8 100644 --- a/nemo-cli/src/main.rs +++ b/nemo-cli/src/main.rs @@ -253,7 +253,7 @@ fn run(mut cli: CliApp) -> Result<(), CliError> { { Ok(program) => program, Err((_program, report)) => { - report.eprint(report.build_reports())?; + report.eprint()?; return Err(CliError::ProgramParsing { filename: program_filename.clone(), }); @@ -268,7 +268,7 @@ fn run(mut cli: CliApp) -> Result<(), CliError> { { Ok(program) => program, Err(report) => { - report.eprint(report.build_reports().into_iter())?; + report.eprint()?; return Err(CliError::ProgramParsing { filename: program_filename, }); diff --git a/nemo-language-server/src/language_server.rs b/nemo-language-server/src/language_server.rs index 11f1790b1..57254a746 100644 --- a/nemo-language-server/src/language_server.rs +++ b/nemo-language-server/src/language_server.rs @@ -121,11 +121,11 @@ impl Backend { let mut errors_by_posision: BTreeMap> = BTreeMap::new(); for error in parse_errors.iter().flat_map(|report| report.errors()) { if let Some(set) = errors_by_posision.get_mut(&CharacterRange::from(error.position)) { - set.insert(format!("expected `{}`", error.context[0].name())); + set.insert(error.to_string()); } else { errors_by_posision.insert( CharacterRange::from(error.position), - std::iter::once(format!("expected `{}`", error.context[0].name())).collect(), + std::iter::once(error.to_string()).collect(), ); }; } diff --git a/nemo-wasm/src/lib.rs b/nemo-wasm/src/lib.rs index b4360d002..0b9738ebe 100644 --- a/nemo-wasm/src/lib.rs +++ b/nemo-wasm/src/lib.rs @@ -1,38 +1,34 @@ #![feature(alloc_error_hook)] -use std::alloc::Layout; -use std::collections::HashMap; -use std::fmt::Formatter; -use std::io::Cursor; - -use js_sys::Array; -use js_sys::Reflect; -use js_sys::Set; -use js_sys::Uint8Array; -use nemo::execution::tracing::trace::ExecutionTrace; -use nemo::execution::tracing::trace::ExecutionTraceTree; -use nemo::execution::tracing::trace::TraceFactHandle; -use nemo::execution::ExecutionEngine; - -use nemo::io::resource_providers::{ResourceProvider, ResourceProviders}; -use nemo::io::ImportManager; -use nemo::rule_model::components::import_export::attributes::ImportExportAttribute; -use nemo::rule_model::components::ProgramComponent; -use nemo::rule_model::components::{ - fact::Fact, import_export::compression::CompressionFormat, tag::Tag, - term::primitive::Primitive, term::Term, -}; -use nemo_physical::datavalues::AnyDataValue; -use nemo_physical::datavalues::DataValue; -use nemo_physical::error::ExternalReadingError; -use nemo_physical::error::ReadingError; -use nemo_physical::resource::Resource; +use std::{alloc::Layout, collections::HashMap, fmt::Formatter, io::Cursor}; + +use js_sys::{Array, Reflect, Set, Uint8Array}; use thiserror::Error; -use wasm_bindgen::prelude::wasm_bindgen; -use wasm_bindgen::JsCast; -use wasm_bindgen::JsValue; -use web_sys::Blob; -use web_sys::FileReaderSync; +use wasm_bindgen::{prelude::wasm_bindgen, JsCast, JsValue}; +use web_sys::{Blob, FileReaderSync}; + +use nemo::{ + datavalues::{AnyDataValue, DataValue}, + error::ReadingError, + execution::{ + tracing::trace::{ExecutionTrace, ExecutionTraceTree, TraceFactHandle}, + ExecutionEngine, + }, + io::{ + resource_providers::{ResourceProvider, ResourceProviders}, + ImportManager, + }, + rule_model::components::{ + fact::Fact, + import_export::{attributes::ImportExportAttribute, compression::CompressionFormat}, + parse::ComponentParseError, + tag::Tag, + term::{primitive::Primitive, Term}, + ProgramComponent, + }, +}; + +use nemo_physical::{error::ExternalReadingError, resource::Resource}; mod language_server; @@ -47,12 +43,12 @@ enum WasmOrInternalNemoError { /// Nemo-internal error #[error(transparent)] Nemo(#[from] nemo::error::Error), - #[error("ComponentParseError: {0:#?}")] - ComponentParse(nemo::rule_model::components::parse::ComponentParseError), - #[error("ParserError: {0:#?}")] - Parser(Vec), - #[error("ProgramError: {0:#?}")] - Program(Vec), + #[error("Unable to parse component:\n {0}")] + ComponentParse(ComponentParseError), + #[error("Unable to parse program:\n {0}")] + Parser(String), + #[error("Invalid program:\n {0}")] + Program(String), #[error("Internal reflection error: {0:#?}")] Reflection(JsValue), } @@ -67,7 +63,7 @@ impl NemoError { #[allow(clippy::inherent_to_string)] #[wasm_bindgen(js_name = "toString")] pub fn to_string(&self) -> String { - format!("NemoError: {}", self.0) + format!("{}", self.0) } } @@ -77,7 +73,7 @@ impl NemoProgram { pub fn new(input: &str) -> Result { nemo::parser::Parser::initialize(input, PROGRAM_LABEL.to_string()) .parse() - .map_err(|(_, report)| WasmOrInternalNemoError::Parser(report.errors().clone())) + .map_err(|(_, report)| WasmOrInternalNemoError::Parser(format!("{}", report))) .map_err(NemoError) .and_then(|ast| { nemo::rule_model::translation::ASTProgramTranslation::initialize( @@ -85,7 +81,7 @@ impl NemoProgram { PROGRAM_LABEL.to_string(), ) .translate(&ast) - .map_err(|report| WasmOrInternalNemoError::Program(report.errors().clone())) + .map_err(|report| WasmOrInternalNemoError::Program(format!("{}", report))) .map_err(NemoError) .map(NemoProgram) }) @@ -392,7 +388,7 @@ impl NemoEngine { iter.into_iter().flatten().nth(row_index); if let Some(terms_to_trace) = terms_to_trace_opt { - let fact_to_trace: Fact = Fact::new( + let fact_to_trace = Fact::new( &predicate, terms_to_trace .into_iter() diff --git a/nemo/Cargo.toml b/nemo/Cargo.toml index d4b4e9c18..013974e13 100644 --- a/nemo/Cargo.toml +++ b/nemo/Cargo.toml @@ -47,7 +47,7 @@ unicode-ident = "1.0.12" nom-greedyerror = "0.5.0" nom-supreme = "0.8.0" enum-assoc = "1.1.0" -ariadne = "0.4.1" +ariadne = { version = "0.4.1", features = ["auto-color"] } strum = "0.26.3" strum_macros = "0.26.4" similar-string = "1.4.3" diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index 0e84988c3..d463bc5fb 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -44,7 +44,6 @@ pub struct Parser<'a> { } /// Contains all errors that occurred during parsing -#[derive(Debug)] pub struct ParserErrorReport<'a> { /// Reference to the text that is going to be parsed input: &'a str, @@ -56,13 +55,9 @@ pub struct ParserErrorReport<'a> { impl<'a> ParserErrorReport<'a> { /// Print the given reports. - pub fn eprint<'s, ReportIterator>( - &'s self, - reports: ReportIterator, - ) -> Result<(), std::io::Error> - where - ReportIterator: Iterator)>>, - { + pub fn eprint(&self) -> Result<(), std::io::Error> { + let reports = self.build_reports(); + for report in reports { report.eprint((self.label.clone(), Source::from(self.input)))?; } @@ -70,10 +65,21 @@ impl<'a> ParserErrorReport<'a> { Ok(()) } + /// Write this report to a given writer. + pub fn write(&self, writer: &mut impl std::io::Write) -> Result<(), std::io::Error> { + let reports = self.build_reports(); + + for report in reports { + report.write((self.label.clone(), Source::from(self.input)), &mut *writer)? + } + + Ok(()) + } + /// Build a [Report] for each error. pub fn build_reports(&'a self) -> impl Iterator)>> { self.errors.iter().map(move |error| { - let message = format!("expected `{}`", error.context[0].name()); + let message = error.to_string(); Report::build(ReportKind::Error, self.label.clone(), error.position.offset) .with_message(message.clone()) @@ -93,6 +99,29 @@ impl<'a> ParserErrorReport<'a> { } } +impl<'a> std::fmt::Debug for ParserErrorReport<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let reports = self.build_reports(); + + for report in reports { + report.fmt(f)?; + } + + Ok(()) + } +} + +impl<'a> std::fmt::Display for ParserErrorReport<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut buffer = Vec::new(); + if let Err(_) = self.write(&mut buffer) { + return Err(std::fmt::Error); + } + + write!(f, "{}", String::from_utf8(buffer).expect("invalid string")) + } +} + impl<'a> Parser<'a> { /// Initialize the parser. pub fn initialize(input: &'a str, label: String) -> Self { diff --git a/nemo/src/parser/error.rs b/nemo/src/parser/error.rs index 3fcbd4401..d20fd5785 100644 --- a/nemo/src/parser/error.rs +++ b/nemo/src/parser/error.rs @@ -1,5 +1,7 @@ //! This module defines the error type that is returned when the parser is unsuccessful. +use std::fmt::Display; + use nom::{ branch::alt, bytes::complete::{take_until, take_while}, @@ -37,6 +39,13 @@ pub struct ParserError { pub context: Vec, } +impl Display for ParserError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // TODO: We only use the first context to generate an error message + f.write_fmt(format_args!("expected `{}`", self.context[0].name())) + } +} + /// Skip a statement, returning an error token. pub(crate) fn skip_statement(input: ParserInput<'_>) -> ParserResult<'_, Token<'_>> { let input_span = input.span; diff --git a/nemo/src/rule_model/error.rs b/nemo/src/rule_model/error.rs index 1fb2d00a2..d705daf3f 100644 --- a/nemo/src/rule_model/error.rs +++ b/nemo/src/rule_model/error.rs @@ -175,12 +175,6 @@ pub struct ValidationError { info: ComplexError, } -impl Display for ValidationError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.kind) - } -} - /// Builder for [ValidationError] #[derive(Debug, Default)] pub struct ValidationErrorBuilder { diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index 5fbba8c1c..e7083c226 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -85,11 +85,10 @@ impl<'a> ASTProgramTranslation<'a> { /// Report of all [ProgramError]s occurred /// during the translation and validation of the AST -#[derive(Debug)] pub struct ProgramErrorReport<'a> { /// Original input string input: &'a str, - /// Label of the input file + /// Label of the input file::Program label: String, /// Mapping of [Origin] to [ProgramAST] nodes origin_map: HashMap>, @@ -99,11 +98,10 @@ pub struct ProgramErrorReport<'a> { } impl<'a> ProgramErrorReport<'a> { - /// Print the given reports. - pub fn eprint<'s, ReportIterator>(&self, reports: ReportIterator) -> Result<(), std::io::Error> - where - ReportIterator: Iterator)>>, - { + /// Print this report to standard error. + pub fn eprint(&self) -> Result<(), std::io::Error> { + let reports = self.build_reports(); + for report in reports { report.eprint((self.label.clone(), Source::from(self.input)))?; } @@ -111,6 +109,17 @@ impl<'a> ProgramErrorReport<'a> { Ok(()) } + /// Write this report to a given writer. + pub fn write(&self, writer: &mut impl std::io::Write) -> Result<(), std::io::Error> { + let reports = self.build_reports(); + + for report in reports { + report.write((self.label.clone(), Source::from(self.input)), &mut *writer)? + } + + Ok(()) + } + /// Build a [Report] for each error. pub fn build_reports(&self) -> Vec)>> { self.errors @@ -149,6 +158,29 @@ impl<'a> ProgramErrorReport<'a> { } } +impl<'a> std::fmt::Debug for ProgramErrorReport<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let reports = self.build_reports(); + + for report in reports { + report.fmt(f)? + } + + Ok(()) + } +} + +impl<'a> std::fmt::Display for ProgramErrorReport<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut buffer = Vec::new(); + if let Err(_) = self.write(&mut buffer) { + return Err(std::fmt::Error); + } + + write!(f, "{}", String::from_utf8(buffer).expect("invalid string")) + } +} + impl<'a> ASTProgramTranslation<'a> { /// Translate the given [ProgramAST] into a [Program]. pub fn translate( From f3d4db1032ca5823ddbc2503bc5039822e1a8ddc Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 1 Oct 2024 11:27:06 +0200 Subject: [PATCH 212/214] Include more information in inconsistent arity error message --- nemo/src/rule_model/error/validation_error.rs | 4 ++-- nemo/src/rule_model/program.rs | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/nemo/src/rule_model/error/validation_error.rs b/nemo/src/rule_model/error/validation_error.rs index 9d9b29c8e..12f6e5fbe 100644 --- a/nemo/src/rule_model/error/validation_error.rs +++ b/nemo/src/rule_model/error/validation_error.rs @@ -96,10 +96,10 @@ pub enum ValidationErrorKind { #[assoc(code = 218)] OperationAnonymous, /// Inconsistent arities for predicates - #[error(r#"predicate {predicate} used with multiple arities."#)] + #[error(r#"predicate `{predicate}` used with arity {arity}."#)] #[assoc(code = 219)] #[assoc(note = "each predicate is only allowed to have one arity")] - InconsistentArities { predicate: String }, + InconsistentArities { predicate: String, arity: usize }, /// Import/Export: Missing required attribute #[error(r#"missing required parameter `{attribute}` in {direction} statement"#)] #[assoc(code = 220)] diff --git a/nemo/src/rule_model/program.rs b/nemo/src/rule_model/program.rs index 7b01cfe8b..029b4ee70 100644 --- a/nemo/src/rule_model/program.rs +++ b/nemo/src/rule_model/program.rs @@ -197,6 +197,7 @@ impl Program { origin, ValidationErrorKind::InconsistentArities { predicate: predicate_string, + arity, }, ) .add_label( From 19a134f6f2bc29fc0b2dd8ad931ebf307312efd3 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 1 Oct 2024 11:33:26 +0200 Subject: [PATCH 213/214] Clippy --- nemo/src/parser.rs | 2 +- nemo/src/rule_model/translation.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/src/parser.rs b/nemo/src/parser.rs index d463bc5fb..bd0fed15e 100644 --- a/nemo/src/parser.rs +++ b/nemo/src/parser.rs @@ -114,7 +114,7 @@ impl<'a> std::fmt::Debug for ParserErrorReport<'a> { impl<'a> std::fmt::Display for ParserErrorReport<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut buffer = Vec::new(); - if let Err(_) = self.write(&mut buffer) { + if self.write(&mut buffer).is_err() { return Err(std::fmt::Error); } diff --git a/nemo/src/rule_model/translation.rs b/nemo/src/rule_model/translation.rs index e7083c226..8d9e2e001 100644 --- a/nemo/src/rule_model/translation.rs +++ b/nemo/src/rule_model/translation.rs @@ -173,7 +173,7 @@ impl<'a> std::fmt::Debug for ProgramErrorReport<'a> { impl<'a> std::fmt::Display for ProgramErrorReport<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut buffer = Vec::new(); - if let Err(_) = self.write(&mut buffer) { + if self.write(&mut buffer).is_err() { return Err(std::fmt::Error); } From ef216ae86dee491270afab66fb49f91ef5d363c6 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Tue, 1 Oct 2024 11:38:42 +0200 Subject: [PATCH 214/214] Run cargo update --- Cargo.lock | 1075 ++++++++++++++++++++++++---------------------------- 1 file changed, 499 insertions(+), 576 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 96e9b96bc..7c78f089a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,18 +4,18 @@ version = 4 [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" @@ -55,9 +55,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.14" +version = "0.6.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" dependencies = [ "anstyle", "anstyle-parse", @@ -70,33 +70,33 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "anstyle-parse" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" dependencies = [ "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" dependencies = [ "anstyle", "windows-sys 0.52.0", @@ -104,9 +104,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" [[package]] name = "arbitrary" @@ -130,9 +130,9 @@ dependencies = [ [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "ascii_tree" @@ -142,13 +142,14 @@ checksum = "ca6c635b3aa665c649ad1415f1573c85957dfa47690ec27aebe7ec17efe3c643" [[package]] name = "assert_cmd" -version = "2.0.14" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" +checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d" dependencies = [ "anstyle", "bstr", "doc-comment", + "libc", "predicates", "predicates-core", "predicates-tree", @@ -157,9 +158,9 @@ dependencies = [ [[package]] name = "assert_fs" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cd762e110c8ed629b11b6cde59458cc1c71de78ebbcc30099fc8e0403a2a2ec" +checksum = "7efdb1fdb47602827a342857666feb372712cbc64b414172bd6b167a02927674" dependencies = [ "anstyle", "doc-comment", @@ -172,13 +173,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.80" +version = "0.1.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -206,28 +207,28 @@ checksum = "3c87f3f15e7794432337fc718554eaa4dc8f04c9677a950ffe366f20a162ae42" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "backtrace" -version = "0.3.73" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -267,9 +268,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "bitvec" @@ -294,12 +295,12 @@ dependencies = [ [[package]] name = "bstr" -version = "1.9.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" dependencies = [ "memchr", - "regex-automata 0.4.7", + "regex-automata 0.4.8", "serde", ] @@ -315,17 +316,26 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" [[package]] name = "cc" -version = "1.0.99" +version = "1.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" +checksum = "3bbb537bb4a30b90362caddba8f360c0a56bc13d3a5570028e7197204cb54a17" +dependencies = [ + "shlex", +] [[package]] name = "cexpr" @@ -370,9 +380,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.7" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" dependencies = [ "clap_builder", "clap_derive", @@ -380,9 +390,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.7" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" dependencies = [ "anstream", "anstyle", @@ -392,27 +402,27 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.5" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "colorchoice" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" [[package]] name = "colored" @@ -466,9 +476,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "crc32fast" @@ -546,7 +556,7 @@ checksum = "4e018fccbeeb50ff26562ece792ed06659b9c2dae79ece77c4456bb10d9bf79b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -557,7 +567,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -587,17 +597,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "displaydoc" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - [[package]] name = "doc-comment" version = "0.3.3" @@ -639,14 +638,14 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] name = "env_filter" -version = "0.1.0" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" dependencies = [ "log", "regex", @@ -667,9 +666,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.3" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" +checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" dependencies = [ "anstream", "anstyle", @@ -717,9 +716,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "fixedbitset" @@ -729,9 +728,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.30" +version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", "miniz_oxide", @@ -838,7 +837,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -886,9 +885,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" [[package]] name = "glob" @@ -898,15 +897,15 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "globset" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", + "regex-automata 0.4.8", + "regex-syntax 0.8.5", ] [[package]] @@ -915,7 +914,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "ignore", "walkdir", ] @@ -935,9 +934,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" dependencies = [ "atomic-waker", "bytes", @@ -989,6 +988,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "howlong" version = "0.1.7" @@ -1017,9 +1022,9 @@ dependencies = [ [[package]] name = "http-body" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http", @@ -1040,9 +1045,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.9.3" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0e7a4dd27b9476dc40cb050d3632d3bba3a70ddbff012285f7f8559a1e7e545" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "humantime" @@ -1052,9 +1057,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.3.1" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" +checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" dependencies = [ "bytes", "futures-channel", @@ -1070,6 +1075,23 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -1088,9 +1110,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.5" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" +checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" dependencies = [ "bytes", "futures-channel", @@ -1101,152 +1123,31 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower", "tower-service", "tracing", ] -[[package]] -name = "icu_collections" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locid" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" - -[[package]] -name = "icu_normalizer" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "write16", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" - -[[package]] -name = "icu_properties" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f8ac670d7422d7f76b32e17a5db556510825b29ec9154f235977c9caba61036" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid_transform", - "icu_properties_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" - -[[package]] -name = "icu_provider" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - [[package]] name = "idna" -version = "1.0.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4716a3a0933a1d01c2f72450e89596eb51dd34ef3c211ccd875acdf1f8fe47ed" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ - "icu_normalizer", - "icu_properties", - "smallvec", - "utf8_iter", + "unicode-bidi", + "unicode-normalization", ] [[package]] name = "ignore" -version = "0.4.22" +version = "0.4.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b46810df39e66e925525d6e38ce1e7f6e1d208f72dc39757880fcb66e2c58af1" +checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b" dependencies = [ "crossbeam-deque", "globset", "log", "memchr", - "regex-automata 0.4.7", + "regex-automata 0.4.8", "same-file", "walkdir", "winapi-util", @@ -1260,9 +1161,9 @@ checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3" [[package]] name = "indexmap" -version = "2.2.6" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown", @@ -1276,26 +1177,26 @@ checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "ipnet" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +checksum = "187674a687eed5fe42285b40c6291f9a01517d415fad1c3cbc6a9f778af7fcd4" [[package]] name = "is-terminal" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" dependencies = [ - "hermit-abi 0.3.9", + "hermit-abi 0.4.0", "libc", "windows-sys 0.52.0", ] [[package]] name = "is_terminal_polyfill" -version = "1.70.0" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" @@ -1311,18 +1212,18 @@ checksum = "72167d68f5fce3b8655487b8038691a3c9984ee769590f93f2a631f4ad64e4f5" [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "lazycell" @@ -1332,18 +1233,18 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" [[package]] name = "libloading" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -1368,12 +1269,6 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "litemap" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" - [[package]] name = "lock_api" version = "0.4.12" @@ -1386,15 +1281,15 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lru" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc" +checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904" dependencies = [ "hashbrown", ] @@ -1418,7 +1313,7 @@ version = "0.0.1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -1432,9 +1327,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memoffset" @@ -1451,6 +1346,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minicov" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c71e683cd655513b99affab7d317deb690528255a0d5f717f1024093c12b169" +dependencies = [ + "cc", + "walkdir", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1459,22 +1364,23 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.3" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "adler", + "adler2", ] [[package]] name = "mio" -version = "0.8.11" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ + "hermit-abi 0.3.9", "libc", "wasi", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -1506,7 +1412,7 @@ dependencies = [ "csv", "dyn-clone", "enum-assoc", - "env_logger 0.11.3", + "env_logger 0.11.5", "flate2", "getrandom", "log", @@ -1547,10 +1453,10 @@ dependencies = [ "ariadne", "assert_cmd", "assert_fs", - "clap 4.5.7", + "clap 4.5.18", "colored", "dir-test", - "env_logger 0.11.3", + "env_logger 0.11.5", "log", "nemo", "predicates", @@ -1583,7 +1489,7 @@ dependencies = [ "bitvec", "delegate", "enum_dispatch", - "env_logger 0.11.3", + "env_logger 0.11.5", "flate2", "hashbrown", "howlong", @@ -1722,9 +1628,9 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", @@ -1779,38 +1685,31 @@ dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.9", - "libc", -] - [[package]] name = "object" -version = "0.36.0" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "82881c4be219ab5faaf2ad5e5e5ecdff8c66bd7402ca3160975c93b24961afd1" +dependencies = [ + "portable-atomic", +] [[package]] name = "openssl" -version = "0.10.64" +version = "0.10.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "cfg-if", "foreign-types", "libc", @@ -1827,7 +1726,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -1838,9 +1737,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.102" +version = "0.9.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" dependencies = [ "cc", "libc", @@ -1865,9 +1764,9 @@ dependencies = [ [[package]] name = "oxiri" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05417ee46e2eb40dd9d590b4d67fc2408208b3a48a6b7f71d2bc1d7ce12a3e0" +checksum = "85d9439ace287894b327bd5522d4f3d813311c719143a1af37826c6a12f808d0" [[package]] name = "parking_lot" @@ -1889,7 +1788,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -1922,9 +1821,9 @@ dependencies = [ [[package]] name = "petgraph-graphml" -version = "3.0.0" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f99237d858a7675759c308324348d81742553ed1d65ddce0854a55688e8487b" +checksum = "246368e83d42c556dd08e321cfc2234c979e6ccf874f60bc4255eb1e09aa1f18" dependencies = [ "petgraph", "xml-rs", @@ -1947,7 +1846,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -1964,27 +1863,30 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "predicates" -version = "3.1.0" +version = "3.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b87bfd4605926cdfefc1c3b5f8fe560e3feca9d5552cf68c466d3d8236c7e8" +checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" dependencies = [ "anstyle", "difflib", @@ -1996,15 +1898,15 @@ dependencies = [ [[package]] name = "predicates-core" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" +checksum = "ae8177bee8e75d6846599c6b9ff679ed51e882816914eec639944d7c9aa11931" [[package]] name = "predicates-tree" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13" dependencies = [ "predicates-core", "termtree", @@ -2012,9 +1914,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -2066,7 +1968,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -2079,14 +1981,14 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] name = "quick-xml" -version = "0.28.2" +version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1" +checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" dependencies = [ "memchr", ] @@ -2115,9 +2017,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -2169,23 +2071,23 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.1" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", ] [[package]] name = "regex" -version = "1.10.5" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", + "regex-automata 0.4.8", + "regex-syntax 0.8.5", ] [[package]] @@ -2199,13 +2101,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.4", + "regex-syntax 0.8.5", ] [[package]] @@ -2216,15 +2118,15 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.4" +version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" +checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" dependencies = [ "base64", "bytes", @@ -2236,6 +2138,7 @@ dependencies = [ "http-body", "http-body-util", "hyper", + "hyper-rustls", "hyper-tls", "hyper-util", "ipnet", @@ -2259,20 +2162,35 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "winreg", + "windows-registry", +] + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", ] [[package]] name = "rio_api" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1924fa1f0e6d851f9b73b3c569e607c368a0d92995d99d563ad7bf1414696603" +checksum = "61d0c76ddf8b00cbb4d2c5932d067d49245c2f1f651809bde3cf265033ddb1af" [[package]] name = "rio_turtle" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cec59971eafd99b9c7e3544bfcabafea81a7072ac51c9f46985ca0bd7ba6016" +checksum = "d6f351b77353c7c896f0cd5ced2a25a7e95b5360cb68d1d7c16682ee096d7f40" dependencies = [ "oxilangtag", "oxiri", @@ -2281,9 +2199,9 @@ dependencies = [ [[package]] name = "rio_xml" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2edda57b877119dc326c612ba822e3ca1ee22bfc86781a4e9dc0884756b58c3" +checksum = "abd3384ae785ed3b0159607adc08adef580a28e277fbfa375c42d162e9da93b1" dependencies = [ "oxilangtag", "oxiri", @@ -2305,32 +2223,55 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "errno 0.3.9", "libc", "linux-raw-sys", "windows-sys 0.52.0", ] +[[package]] +name = "rustls" +version = "0.23.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dabaac7466917e566adb06783a81ca48944c6898a1b08b9374106dd671f4c8" +dependencies = [ + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + [[package]] name = "rustls-pemfile" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" dependencies = [ - "base64", "rustls-pki-types", ] [[package]] name = "rustls-pki-types" -version = "1.7.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] [[package]] name = "rustversion" @@ -2361,11 +2302,11 @@ checksum = "19d36299972b96b8ae7e8f04ecbf75fb41a27bf3781af00abcf57609774cb911" [[package]] name = "schannel" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +checksum = "e9aaafd5a2b6e3d657ff009d82fbd630b6bd54dd4eb06f21693925cdf80f9b8b" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2382,11 +2323,11 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "security-framework" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "core-foundation", "core-foundation-sys", "libc", @@ -2395,9 +2336,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" dependencies = [ "core-foundation-sys", "libc", @@ -2405,31 +2346,32 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.203" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.203" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -2442,7 +2384,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -2504,10 +2446,10 @@ dependencies = [ ] [[package]] -name = "stable_deref_trait" -version = "1.2.0" +name = "spin" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "streaming-iterator" @@ -2543,9 +2485,15 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.66", + "syn 2.0.79", ] +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "1.0.109" @@ -2559,9 +2507,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.66" +version = "2.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" dependencies = [ "proc-macro2", "quote", @@ -2570,37 +2518,29 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - -[[package]] -name = "synstructure" -version = "0.13.1" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", + "futures-core", ] [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -2614,20 +2554,21 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "target-lexicon" -version = "0.12.14" +version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.10.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2651,7 +2592,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dffced63c2b5c7be278154d76b479f9f9920ed34e7574201407f0b14e2bbb93" dependencies = [ - "env_logger 0.11.3", + "env_logger 0.11.5", "test-log-macros", "tracing-subscriber", ] @@ -2664,7 +2605,7 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -2684,22 +2625,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -2713,41 +2654,45 @@ dependencies = [ ] [[package]] -name = "tinystr" -version = "0.7.6" +name = "tinyvec" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ - "displaydoc", - "zerovec", + "tinyvec_macros", ] +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" -version = "1.38.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -2760,11 +2705,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls", + "rustls-pki-types", + "tokio", +] + [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -2783,16 +2739,15 @@ dependencies = [ "futures-util", "pin-project", "pin-project-lite", - "tokio", "tower-layer", "tower-service", ] [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-lsp" @@ -2825,14 +2780,14 @@ checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -2853,7 +2808,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] @@ -2900,23 +2855,38 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-normalization" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] [[package]] name = "unicode-segmentation" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unindent" @@ -2924,11 +2894,17 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" -version = "2.5.1" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c25da092f0a868cdf09e8674cd3b7ef3a7d92a24253e663a2fb85e2496de56" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", "idna", @@ -2936,18 +2912,6 @@ dependencies = [ "serde", ] -[[package]] -name = "utf16_iter" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "utf8parse" version = "0.2.2" @@ -2974,9 +2938,9 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wait-timeout" @@ -3014,34 +2978,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" dependencies = [ "cfg-if", "js-sys", @@ -3051,9 +3016,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3061,31 +3026,32 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "wasm-bindgen-test" -version = "0.3.42" +version = "0.3.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9bf62a58e0780af3e852044583deee40983e5886da43a271dd772379987667b" +checksum = "68497a05fb21143a08a7d24fc81763384a3072ee43c44e86aad1744d6adef9d9" dependencies = [ "console_error_panic_hook", "js-sys", + "minicov", "scoped-tls", "wasm-bindgen", "wasm-bindgen-futures", @@ -3094,20 +3060,20 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.42" +version = "0.3.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" +checksum = "4b8220be1fa9e4c889b30fd207d4906657e7e90b12e0e6b0c8b8d8709f5de021" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" dependencies = [ "js-sys", "wasm-bindgen", @@ -3140,11 +3106,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3153,6 +3119,36 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.45.0" @@ -3177,7 +3173,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -3212,18 +3217,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -3240,9 +3245,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -3258,9 +3263,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -3276,15 +3281,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -3300,9 +3305,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -3318,9 +3323,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -3336,9 +3341,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -3354,31 +3359,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" - -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - -[[package]] -name = "write16" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" - -[[package]] -name = "writeable" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "wyz" @@ -3391,9 +3374,9 @@ dependencies = [ [[package]] name = "xml-rs" -version = "0.8.20" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" +checksum = "af4e2e2f7cba5a093896c1e150fbfe177d1883e7448200efb81d40b9d339ef26" [[package]] name = "yansi" @@ -3401,89 +3384,29 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" -[[package]] -name = "yoke" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", - "synstructure", -] - [[package]] name = "zerocopy" -version = "0.7.34" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ + "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.34" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.79", ] [[package]] -name = "zerofrom" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", - "synstructure", -] - -[[package]] -name = "zerovec" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.10.2" +name = "zeroize" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"