Skip to content

Commit 4f9c6dc

Browse files
committed
refactor(pest)!: greatly simplified grammar, removed expensive look-ahead that offer no real benefit
expect some change in templates that relied on smart escaping, now it's more intentional
1 parent 7f69371 commit 4f9c6dc

File tree

6 files changed

+37
-95
lines changed

6 files changed

+37
-95
lines changed

docs/template-system.md

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -940,9 +940,11 @@ The range system includes robust edge case handling:
940940

941941
### When is Escaping Required?
942942

943-
Different argument types have different escaping requirements:
943+
The template parser uses a unified argument parsing system where all operations follow the same escaping rules for consistency and maintainability:
944944

945-
### Simple Arguments (append, prepend, join, etc.)
945+
### Operation Arguments
946+
947+
All operations use the same argument parsing rules. The following characters require escaping:
946948

947949
| Character | Escape | Reason |
948950
|-----------|--------|----------------------|
@@ -952,18 +954,6 @@ Different argument types have different escaping requirements:
952954
| `{` | `\{` | Starts template |
953955
| `\` | `\\` | Escape character |
954956

955-
### Regex Arguments (filter, regex_extract)
956-
957-
Regex patterns can contain most characters naturally.
958-
959-
### Split Arguments
960-
961-
Split separators can contain most characters. Only escape:
962-
963-
| Character | Escape | Reason |
964-
|-----------|--------|--------|
965-
| `:` | `\:` | Visual helper |
966-
967957
### Special Sequences
968958

969959
| Sequence | Result | Description |

src/pipeline/parser.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
//! Pest parser generator for robust syntax handling with comprehensive error reporting.
66
//!
77
//! The parser supports the full template syntax including operations, ranges,
8-
//! escape sequences, and debug flags, with intelligent handling of special
9-
//! characters in different contexts.
8+
//! escape sequences, and debug flags.
109
//!
1110
1211
use once_cell::sync::OnceCell;

src/pipeline/template.pest

Lines changed: 29 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -40,33 +40,38 @@ shorthand_range = {
4040
| range_full
4141
}
4242

43+
// Argument parsing - requires escaping for special characters
44+
argument = { (escaped_char | normal_char)* }
45+
normal_char = { !("|" | "}" | "{" | ":" | "\\") ~ ANY }
46+
escaped_char = { "\\" ~ ANY }
47+
4348
// Main operations - using specific arg types where needed
44-
regex_extract = { "regex_extract" ~ ":" ~ regex_arg ~ (":" ~ number)? }
45-
filter_not = { "filter_not" ~ ":" ~ regex_arg }
46-
filter = { "filter" ~ ":" ~ regex_arg }
49+
regex_extract = { "regex_extract" ~ ":" ~ argument ~ (":" ~ number)? }
50+
filter_not = { "filter_not" ~ ":" ~ argument }
51+
filter = { "filter" ~ ":" ~ argument }
4752
strip_ansi = @{ "strip_ansi" }
48-
map = { "map" ~ ":" ~ map_operation }
49-
split = { "split" ~ ":" ~ split_arg ~ ":" ~ range_spec? }
50-
substring = { "substring" ~ ":" ~ range_spec }
51-
replace = { "replace" ~ ":" ~ sed_string }
52-
append = { "append" ~ ":" ~ simple_arg }
53-
prepend = { "prepend" ~ ":" ~ simple_arg }
54-
surround = { "surround" ~ ":" ~ simple_arg }
55-
quote = { "quote" ~ ":" ~ simple_arg }
53+
map = { "map" ~ ":" ~ map_operation }
54+
split = { "split" ~ ":" ~ argument ~ ":" ~ range_spec? }
55+
substring = { "substring" ~ ":" ~ range_spec }
56+
replace = { "replace" ~ ":" ~ sed_string }
57+
append = { "append" ~ ":" ~ argument }
58+
prepend = { "prepend" ~ ":" ~ argument }
59+
surround = { "surround" ~ ":" ~ argument }
60+
quote = { "quote" ~ ":" ~ argument }
5661
upper = @{ "upper" }
5762
lower = @{ "lower" }
58-
trim = { "trim" ~ (":" ~ simple_arg)? ~ (":" ~ direction)? }
59-
join = { "join" ~ ":" ~ simple_arg }
60-
slice = { "slice" ~ ":" ~ range_spec }
61-
sort = { "sort" ~ (":" ~ sort_direction)? }
63+
trim = { "trim" ~ (":" ~ argument)? ~ (":" ~ direction)? }
64+
join = { "join" ~ ":" ~ argument }
65+
slice = { "slice" ~ ":" ~ range_spec }
66+
sort = { "sort" ~ (":" ~ sort_direction)? }
6267
reverse = @{ "reverse" }
6368
unique = @{ "unique" }
64-
pad = { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)? }
69+
pad = { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)? }
6570

6671
// Direction specifiers
6772
direction = @{ "left" | "right" | "both" }
6873
sort_direction = @{ "asc" | "desc" }
69-
pad_char = @{ simple_arg_content+ }
74+
pad_char = @{ argument }
7075

7176
// Map operation
7277
map_operation = { "{" ~ map_operation_list ~ "}" }
@@ -95,68 +100,16 @@ map_inner_operation = {
95100
}
96101

97102
// Map-specific operations that need special handling
98-
map_split = { "split" ~ ":" ~ split_arg ~ (":" ~ range_spec)? }
99-
map_join = { "join" ~ ":" ~ simple_arg }
100-
map_slice = { "slice" ~ ":" ~ range_spec }
101-
map_sort = { "sort" ~ (":" ~ sort_direction)? }
103+
map_split = { "split" ~ ":" ~ argument ~ (":" ~ range_spec)? }
104+
map_join = { "join" ~ ":" ~ argument }
105+
map_slice = { "slice" ~ ":" ~ range_spec }
106+
map_sort = { "sort" ~ (":" ~ sort_direction)? }
102107
map_unique = @{ "unique" }
103-
map_filter = { "filter" ~ ":" ~ map_regex_arg }
104-
map_filter_not = { "filter_not" ~ ":" ~ map_regex_arg }
108+
map_filter = { "filter" ~ ":" ~ argument }
109+
map_filter_not = { "filter_not" ~ ":" ~ argument }
105110

106111
// Map-specific regex extract
107-
map_regex_extract = { "regex_extract" ~ ":" ~ map_regex_arg ~ (":" ~ number)? }
108-
109-
// Simplified argument handling - three types to handle specific cases
110-
simple_arg = @{ simple_arg_content* }
111-
simple_arg_content = { escaped_char | simple_normal_char }
112-
simple_normal_char = { !(":" | "|" | "}" | "{" | "\\") ~ ANY }
113-
114-
// Split args - need to handle pipes that aren't operations
115-
split_arg = @{ (split_escaped_char | split_content)* }
116-
split_content = { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
117-
split_escaped_char = { "\\" ~ ANY }
118-
119-
// Regex args - need to handle pipes and braces in regex patterns
120-
regex_arg = @{ (regex_escaped_char | regex_content)* }
121-
regex_content = { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
122-
regex_escaped_char = { "\\" ~ ANY }
123-
124-
// Map regex args - handle braces in regex patterns
125-
map_regex_arg = @{ (map_regex_escaped_char | map_regex_brace | map_regex_content)* }
126-
map_regex_brace = { "{" ~ (!"}" ~ ANY)* ~ "}" }
127-
map_regex_content = { !(":" ~ number) ~ !("|" ~ operation_keyword) ~ !("{" | ("}" ~ ("|" | "}" | EOI))) ~ ANY }
128-
map_regex_escaped_char = { "\\" ~ ANY }
129-
130-
// Common escaped character handling
131-
escaped_char = { "\\" ~ ANY }
132-
133-
// Operation keywords for lookahead (simplified list)
134-
operation_keyword = _{
135-
"split"
136-
| "upper"
137-
| "lower"
138-
| "trim"
139-
| "append"
140-
| "prepend"
141-
| "surround"
142-
| "quote"
143-
| "join"
144-
| "substring"
145-
| "replace"
146-
| "map"
147-
| "filter"
148-
| "filter_not"
149-
| "slice"
150-
| "sort"
151-
| "reverse"
152-
| "unique"
153-
| "regex_extract"
154-
| "strip_ansi"
155-
| "pad"
156-
}
157-
158-
// Range parts for lookahead
159-
range_part = _{ ".." | "..=" }
112+
map_regex_extract = { "regex_extract" ~ ":" ~ argument ~ (":" ~ number)? }
160113

161114
// Sed strings
162115
sed_string = { "s/" ~ sed_pattern ~ "/" ~ sed_replacement ~ "/" ~ sed_flags? }

tests/multi_template_tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ fn test_multi_template_caching_optimization() {
5555
fn test_multi_template_different_separators() {
5656
// Test multiple template sections with different separators
5757
let template =
58-
MultiTemplate::parse("Comma: {split:,:0} Space: {split: :1} Pipe: {split:|:0}").unwrap();
58+
MultiTemplate::parse("Comma: {split:,:0} Space: {split: :1} Pipe: {split:\\|:0}").unwrap();
5959
let result = template.format("a,b c|d").unwrap();
6060
assert_eq!(result, "Comma: a Space: c|d Pipe: a,b c");
6161
}

tests/template/complex_pipeline.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ fn test_special_chars_pipeline() {
395395

396396
#[test]
397397
fn test_escaped_pipes_pipeline() {
398-
let result = process("test", r"{replace:s/test/a|b/|split:|:..|join:-}");
398+
let result = process("test", r"{replace:s/test/a|b/|split:\|:..|join:-}");
399399
assert_eq!(result.unwrap(), "a-b");
400400
}
401401

tests/template/simple_pipeline.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1289,7 +1289,7 @@ pub mod regex_extract_operations {
12891289
assert_eq!(
12901290
process(
12911291
"Version: 1.2.3-beta",
1292-
r"{regex_extract:Version: (\d+\.\d+\.\d+):1}"
1292+
r"{regex_extract:Version\: (\d+\.\d+\.\d+):1}"
12931293
)
12941294
.unwrap(),
12951295
"1.2.3"

0 commit comments

Comments
 (0)