From 3b88dbe1014ca45cbbd55f84c3b0b6672f5388b9 Mon Sep 17 00:00:00 2001 From: Moxvallix Date: Tue, 29 Oct 2024 16:18:38 +1030 Subject: [PATCH 1/3] feat(syntax): replace '/' command syntax with '`' command syntax --- examples/{config.json => config.json5} | 0 examples/main.zog | 5 +++- src/compiler/mod.rs | 3 ++ src/lexer/mod.rs | 38 ++++++++++++++++---------- src/lexer/token.rs | 2 +- src/parser/mod.rs | 11 ++++++-- 6 files changed, 40 insertions(+), 19 deletions(-) rename examples/{config.json => config.json5} (100%) diff --git a/examples/config.json b/examples/config.json5 similarity index 100% rename from examples/config.json rename to examples/config.json5 diff --git a/examples/main.zog b/examples/main.zog index f9ce29d..a668d1f 100644 --- a/examples/main.zog +++ b/examples/main.zog @@ -3,6 +3,9 @@ namespace diamond module foo { fn bar() { - ~ = 10 + `data modify storage foo:bar baz set value [ + "a", "b", "c", + "d", "e", "f" + ]` } } diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 7aaeaf7..a9748e7 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -643,6 +643,9 @@ impl Compiler { } } + let words: Vec<_> = result.split_ascii_whitespace().collect(); + result = words.join(" ").to_eco_string(); + if is_macro && !has_macro_prefix { result = eco_format!("${result}") } diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 7694b5b..138d221 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -61,18 +61,22 @@ impl Lexer { self.dependent_files.insert(self.file.clone()); loop { let next = self.next_token()?; - if next.kind == TokenKind::IncludeKeyword { - tokens.extend(self.parse_include()?); - } else if next.kind == TokenKind::CommandBegin { - tokens.push(next); - tokens.extend(self.parse_command()?); - } else { - tokens.push(next); - if tokens.last().expect("Tokens was just pushed to").kind == TokenKind::EndOfFile { - break; + + match next.kind { + TokenKind::IncludeKeyword => {tokens.extend(self.parse_include()?);}, + TokenKind::CommandBegin(backtick) => { + tokens.push(next); + tokens.extend(self.parse_command(backtick)?); } - } + _ => { + tokens.push(next); + if tokens.last().expect("Tokens was just pushed to").kind == TokenKind::EndOfFile { + break; + } + } + }; } + Ok(tokens) } @@ -106,9 +110,9 @@ impl Lexer { if !self.tokenise_json() { value = Some(self.src[position + 1..self.position - 1].into()); } - } else if self.current() == '/' && self.is_newline { + } else if self.current() == '`' && self.is_newline { self.consume(); - kind = TokenKind::CommandBegin; + kind = TokenKind::CommandBegin(true); } else if self.current() == '#' { while !self.current_is_delim() { self.consume(); @@ -253,7 +257,7 @@ impl Lexer { self.position = position; self.line = line; self.column = column; - Ok((TokenKind::CommandBegin, EcoString::new())) + Ok((TokenKind::CommandBegin(false), EcoString::new())) } else { Ok((TokenKind::Identifier, identifier_value.into())) } @@ -460,14 +464,14 @@ impl Lexer { Ok(tokens) } - fn parse_command(&mut self) -> Result> { + fn parse_command(&mut self, backtick: bool) -> Result> { let mut tokens = Vec::new(); let mut current_part = EcoString::new(); let mut line = self.line; let mut column = self.column; - while !self.current_is_delim() { + while if backtick {self.current() != '`'} else {!self.current_is_delim()} { if self.current() == '\\' && self.peek(1) == '&' { self.consume(); current_part.push(self.current()); @@ -519,6 +523,10 @@ impl Lexer { location: self.location(self.line, self.column), }); + if backtick { + self.consume(); + } + Ok(tokens) } diff --git a/src/lexer/token.rs b/src/lexer/token.rs index c6ffb07..e06bb84 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -43,7 +43,7 @@ pub enum TokenKind { ReturnKeyword, // Non-zoglin - CommandBegin, + CommandBegin(bool), CommandString, CommandEnd, Json, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f293775..c1f4e3f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -422,7 +422,7 @@ impl Parser { fn parse_statement(&mut self) -> Result { Ok(match self.current_including(&[TokenKind::Comment]).kind { - TokenKind::CommandBegin => Statement::Command(self.parse_command()?), + TokenKind::CommandBegin(_) => Statement::Command(self.parse_command()?), TokenKind::Comment => { let comment = self.consume_including(&[TokenKind::Comment]).get_value().clone(); Statement::Comment(comment) @@ -435,7 +435,14 @@ impl Parser { } fn parse_command(&mut self) -> Result { - self.expect(TokenKind::CommandBegin)?; + let next = self.consume(); + if next.kind != TokenKind::CommandBegin(true) && next.kind != TokenKind::CommandBegin(false) { + return Err(raise_error( + next.location.clone(), + format!("Expected {:?}, got {:?}", TokenKind::CommandBegin(true), next.kind), + )); + } + let mut parts = Vec::new(); while self.current().kind != TokenKind::CommandEnd { From fb96f23c551d227bf309a11a4c5b70b11e29c471 Mon Sep 17 00:00:00 2001 From: Moxvallix Date: Fri, 15 Nov 2024 12:23:33 +1030 Subject: [PATCH 2/3] feat(lexer): command literals properly strip whitespace except from within strings --- src/compiler/mod.rs | 3 +-- src/lexer/mod.rs | 39 ++++++++++++++++++++++++++++++++++++--- test-project/main.zog | 21 ++++++++++++++++----- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 18aef22..88f0892 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -656,8 +656,7 @@ impl Compiler { } } - let words: Vec<_> = result.split_ascii_whitespace().collect(); - result = words.join(" ").to_eco_string(); + result = result.trim().into(); if is_macro && !has_macro_prefix { result = eco_format!("${result}") diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 9c34c39..e3b25dd 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -5,7 +5,7 @@ use crate::error::{raise_error, raise_floating_error, raise_warning, Location, R use ecow::EcoString; use glob::glob; use registries::{COMMANDS, KEYWORD_REGISTRY, OPERATOR_REGISTRY}; -use std::{collections::HashSet, fs, path::Path}; +use std::{collections::HashSet, fs, path::Path, str}; use token::{Token, TokenKind}; pub struct Lexer { @@ -466,14 +466,24 @@ impl Lexer { let mut current_part = EcoString::new(); let mut line = self.line; let mut column = self.column; + let mut string_char: Option = None; + let mut last_was_whitespace: bool = false; while if backtick {self.current() != '`'} else {!self.current_is_delim()} { - match (self.current(), self.peek(1)) { + let current = self.current(); + + match (current, self.peek(1)) { ('\\', '\\' | '&' | '%' | '`') => { self.consume(); current_part.push(self.current()); self.consume(); } + ('\\', '\'' | '"') => { + if string_char.is_some() { + current_part.push(self.consume()); + } + current_part.push(self.consume()); + } ('&', '{') => { tokens.push(Token { kind: TokenKind::CommandString, @@ -544,10 +554,33 @@ impl Lexer { line = self.line; column = self.column; } - _ => { + ('\'' | '"', _) => { + if let Some(value) = string_char { + if value == self.current() { + string_char = None; + } + } else { + string_char = Some(self.current()); + } current_part.push(self.consume()); } + (current, _) => { + if current == '\n' { + current_part.push(' '); + self.consume(); + } else if current.is_ascii_whitespace() { + if last_was_whitespace && !string_char.is_some() { + self.consume(); + } else { + current_part.push(self.consume()); + } + } else { + current_part.push(self.consume()); + } + } } + + last_was_whitespace = current.is_ascii_whitespace(); } tokens.push(Token { diff --git a/test-project/main.zog b/test-project/main.zog index 5f5e34f..b1dc27e 100644 --- a/test-project/main.zog +++ b/test-project/main.zog @@ -1,8 +1,19 @@ namespace example -fn load() { - foo = 1 - $bar = 2b - a = {a: 1, b: 2, c: foo} - b = [L; 1L, 2L, 3L, $bar] +fn &foo(something) { + return &something +} + +fn foo() {} + +fn load(%text) { + &value = 10 + data modify storage example:load something set value &value + scoreboard players set @s example.load &foo(12) + tellraw @a "%text" + `execute + if data storage foo:bar {baz: true} + run &{foo()} + ` + `tellraw @s "Foo Bar bbbb \" bax"` } From e6fd06c0f6d8c27748f164c8c9ce417ae4020e9c Mon Sep 17 00:00:00 2001 From: Moxvallix Mox Date: Thu, 28 Nov 2024 00:57:01 +1030 Subject: [PATCH 3/3] chore: implement suggestions, and clarify some code --- src/lexer/mod.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index e3b25dd..6936260 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -110,7 +110,7 @@ impl Lexer { if !self.tokenise_json() { value = Some(self.src[position + 1..self.position - 1].into()); } - } else if self.current() == '`' && self.is_newline { + } else if self.current() == '`' { self.consume(); kind = TokenKind::CommandBegin(true); } else if self.current() == '#' { @@ -466,7 +466,12 @@ impl Lexer { let mut current_part = EcoString::new(); let mut line = self.line; let mut column = self.column; + + // Which char the string within the command was opened with. One of either `'` or `"`. + // Used to determine when the string has closed. let mut string_char: Option = None; + + // Flag for when the last character was a whitespace character. Used to strip whitespace. let mut last_was_whitespace: bool = false; while if backtick {self.current() != '`'} else {!self.current_is_delim()} { @@ -569,7 +574,7 @@ impl Lexer { current_part.push(' '); self.consume(); } else if current.is_ascii_whitespace() { - if last_was_whitespace && !string_char.is_some() { + if last_was_whitespace && string_char.is_none() { self.consume(); } else { current_part.push(self.consume());