From 0e3cecf59e39ee84063b2c152dd7520357734a53 Mon Sep 17 00:00:00 2001 From: LM Date: Mon, 18 Aug 2025 16:05:38 +0200 Subject: [PATCH 1/3] feat(lib): introduce structured templates gives the option to pass different arguments to the different sections inside a MultiTemplate --- src/lib.rs | 35 +++- src/pipeline/mod.rs | 2 +- src/pipeline/template.rs | 318 +++++++++++++++++++++++++++++++++ tests/multi_template_tests.rs | 321 +++++++++++++++++++++++++++++++++- 4 files changed, 673 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3657cf4..0c68789 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -199,6 +199,33 @@ //! //! Use `map:{operation}` to apply string operations to each item in a list. //! +//! ## Structured Templates (Advanced) +//! +//! **NEW in v0.13.0**: Apply multiple inputs to different template sections with individual separators. +//! This enables powerful scenarios like batch processing, command construction, and data transformation. +//! +//! ```rust +//! use string_pipeline::Template; +//! +//! // Multiple inputs per template section with different separators +//! let template = Template::parse("Users: {upper} | Files: {lower}").unwrap(); +//! let result = template.format_with_inputs(&[ +//! &["john doe", "jane smith"], // Multiple users for first section +//! &["FILE1.TXT", "FILE2.TXT"] // Multiple files for second section +//! ], &[" ", ","]).unwrap(); // Space separator for users, comma for files +//! assert_eq!(result, "Users: JOHN DOE JANE SMITH | Files: file1.txt,file2.txt"); +//! +//! // Template introspection +//! let sections = template.get_template_sections(); // Get template section info +//! assert_eq!(sections.len(), 2); // Two template sections: {strip_ansi|lower} and {} +//! ``` +//! +//! **Key Features:** +//! - **🎯 Flexible Input**: Each template section can receive multiple input values +//! - **⚙️ Custom Separators**: Individual separator for each template section +//! - **🔍 Introspection**: Examine template structure before processing +//! - **🏗️ Batch Processing**: Perfect for processing multiple items per section +//! //! ## Error Handling //! //! All operations return `Result` for comprehensive error handling: @@ -215,6 +242,12 @@ //! let result = template.format("not_a_list"); //! assert!(result.is_err()); //! // Error: "Sort operation can only be applied to lists" +//! +//! // Structured template input count validation +//! let template = Template::parse("A: {upper} B: {lower}").unwrap(); +//! let result = template.format_with_inputs(&[&["only_one"]], &[" ", " "]); +//! assert!(result.is_err()); +//! // Error: "Expected 2 input slices for 2 template sections, got 1" //! ``` //! //! ## Performance Notes @@ -245,4 +278,4 @@ mod pipeline; -pub use pipeline::{MultiTemplate, Template}; +pub use pipeline::{MultiTemplate, SectionInfo, SectionType, Template}; diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs index 1868d64..387ff73 100644 --- a/src/pipeline/mod.rs +++ b/src/pipeline/mod.rs @@ -52,7 +52,7 @@ use std::collections::HashMap; use std::time::{Duration, Instant}; use strip_ansi_escapes::strip; -pub use crate::pipeline::template::{MultiTemplate, Template}; +pub use crate::pipeline::template::{MultiTemplate, SectionInfo, SectionType, Template}; pub use debug::DebugTracer; /* ------------------------------------------------------------------------ */ diff --git a/src/pipeline/template.rs b/src/pipeline/template.rs index c799d58..97763c4 100644 --- a/src/pipeline/template.rs +++ b/src/pipeline/template.rs @@ -132,6 +132,92 @@ pub enum TemplateSection { Template(Vec), } +/// Type of template section for introspection and analysis. +/// +/// Distinguishes between literal text sections and template operation sections +/// when examining template structure programmatically. Used by introspection +/// methods like [`MultiTemplate::get_section_info`] to provide detailed template analysis. +/// +/// # Examples +/// +/// ```rust +/// use string_pipeline::{Template, SectionType}; +/// +/// let template = Template::parse("Hello {upper} world!").unwrap(); +/// let sections = template.get_section_info(); +/// +/// assert_eq!(sections[0].section_type, SectionType::Literal); // "Hello " +/// assert_eq!(sections[1].section_type, SectionType::Template); // {upper} +/// assert_eq!(sections[2].section_type, SectionType::Literal); // " world!" +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SectionType { + /// A literal text section that appears unchanged in template output. + /// + /// Literal sections contain static text that is copied directly to the + /// output without any transformation or processing. + Literal, + /// A template section containing string operations. + /// + /// Template sections contain operation sequences like `{upper|trim}` that + /// transform input data before including it in the output. + Template, +} + +/// Detailed information about a template section for introspection and debugging. +/// +/// Provides comprehensive metadata about each section in a template, including +/// its type, position, and content. Used by [`MultiTemplate::get_section_info`] +/// to enable programmatic template analysis and debugging. +/// +/// This struct contains all necessary information to understand both the structure +/// and content of template sections, making it useful for tools that need to +/// analyze or manipulate templates programmatically. +/// +/// # Field Details +/// +/// - **`section_type`**: Whether this is a literal text section or template operation section +/// - **`overall_position`**: Zero-based position among all sections in the template +/// - **`template_position`**: Zero-based position among template sections only (None for literals) +/// - **`content`**: The literal text content (populated only for literal sections) +/// - **`operations`**: The operation sequence (populated only for template sections) +/// +/// # Examples +/// +/// ```rust +/// use string_pipeline::{Template, SectionType}; +/// +/// let template = Template::parse("Name: {upper} | Age: {lower}").unwrap(); +/// let sections = template.get_section_info(); +/// +/// // First section: "Name: " +/// assert_eq!(sections[0].section_type, SectionType::Literal); +/// assert_eq!(sections[0].overall_position, 0); +/// assert_eq!(sections[0].template_position, None); +/// assert_eq!(sections[0].content, Some("Name: ".to_string())); +/// assert!(sections[0].operations.is_none()); +/// +/// // Second section: {upper} +/// assert_eq!(sections[1].section_type, SectionType::Template); +/// assert_eq!(sections[1].overall_position, 1); +/// assert_eq!(sections[1].template_position, Some(0)); +/// assert!(sections[1].content.is_none()); +/// assert_eq!(sections[1].operations.as_ref().unwrap().len(), 1); +/// ``` +#[derive(Debug, Clone)] +pub struct SectionInfo { + /// The type of this section (literal or template). + pub section_type: SectionType, + /// Position within all sections (both literal and template). + pub overall_position: usize, + /// Position among template sections only (None for literal sections). + pub template_position: Option, + /// Text content for literal sections (None for template sections). + pub content: Option, + /// Operations for template sections (None for literal sections). + pub operations: Option>, +} + /* ---------- per-format call cache (operation results only) -------------- */ /// Per-template-instance cache for operation results. @@ -504,6 +590,238 @@ impl MultiTemplate { self.debug = debug; } + /* -------- structured template processing ----------------------------- */ + + /// Format template with multiple inputs per template section. + /// + /// This method enables advanced template processing where each template section + /// can receive multiple input values that are joined with individual separators. + /// This is useful for complex formatting scenarios like batch processing or + /// command construction where different template sections need different data. + /// + /// # Arguments + /// + /// * `inputs` - Slice of input slices, where each inner slice contains the inputs for one template section + /// * `separators` - Slice of separators, one for each template section to join multiple inputs + /// + /// # Returns + /// + /// * `Ok(String)` - The formatted result with each template section processed with its joined inputs + /// * `Err(String)` - Error if inputs/separators length doesn't match template section count or processing fails + /// + /// # Template Section Ordering + /// + /// Template sections are numbered from left to right, starting at 0. Literal sections + /// are not counted. For example, in `"Hello {upper} world {lower}!"`: + /// - Template section 0: `{upper}` + /// - Template section 1: `{lower}` + /// - Total template sections: 2 + /// + /// # Input Processing + /// + /// For each template section: + /// - **Empty slice `[]`**: Uses empty string as input + /// - **Single item `["value"]`**: Uses "value" directly as input + /// - **Multiple items `["a", "b", "c"]`**: Joins with corresponding separator + /// + /// # Errors + /// + /// Returns an error if: + /// - The number of input slices doesn't match the number of template sections + /// - The number of separators doesn't match the number of template sections + /// - Any template section processing fails + /// + /// # Examples + /// + /// ```rust + /// use string_pipeline::Template; + /// + /// // Multiple inputs for first section, single input for second + /// let template = Template::parse("Users: {upper} | Email: {lower}").unwrap(); + /// let result = template.format_with_inputs(&[ + /// &["john doe", "peter parker"], + /// &["ADMIN@EXAMPLE.COM"], + /// ], &[" ", " "]).unwrap(); + /// assert_eq!(result, "Users: JOHN DOE PETER PARKER | Email: admin@example.com"); + /// + /// // File batch processing with different separators + /// let template = Template::parse("tar -czf {lower}.tar.gz {join: }").unwrap(); + /// let result = template.format_with_inputs(&[ + /// &["BACKUP"], + /// &["file1.txt", "file2.txt", "file3.txt"], + /// ], &[" ", " "]).unwrap(); + /// assert_eq!(result, "tar -czf backup.tar.gz file1.txt file2.txt file3.txt"); + /// + /// // Command construction with custom separators + /// let template = Template::parse("grep {join:\\|} {join:,}").unwrap(); + /// let result = template.format_with_inputs(&[ + /// &["error", "warning"], + /// &["log1.txt", "log2.txt"], + /// ], &["|", ","]).unwrap(); + /// assert_eq!(result, "grep error|warning log1.txt,log2.txt"); + /// ``` + pub fn format_with_inputs( + &self, + inputs: &[&[&str]], + separators: &[&str], + ) -> Result { + let template_sections_count = self.template_section_count(); + + if inputs.len() != template_sections_count { + return Err(format!( + "Expected {} input slices for {} template sections, got {}", + template_sections_count, + template_sections_count, + inputs.len() + )); + } + + if separators.len() != template_sections_count { + return Err(format!( + "Expected {} separators for {} template sections, got {}", + template_sections_count, + template_sections_count, + separators.len() + )); + } + + let mut result = String::new(); + let mut template_index = 0; + let mut cache = TemplateCache::new(); + + for section in &self.sections { + match section { + TemplateSection::Literal(text) => { + result.push_str(text); + } + TemplateSection::Template(ops) => { + if template_index >= inputs.len() { + return Err("Internal error: template index out of bounds".to_string()); + } + + // Join multiple inputs with the corresponding separator + let section_inputs = inputs[template_index]; + let separator = separators[template_index]; + let input = match section_inputs.len() { + 0 => String::new(), + 1 => section_inputs[0].to_string(), + _ => section_inputs.join(separator), + }; + + let mut input_hasher = std::collections::hash_map::DefaultHasher::new(); + std::hash::Hash::hash(&input, &mut input_hasher); + let input_hash = input_hasher.finish(); + + let output = self.apply_template_section( + &input, ops, input_hash, &mut cache, + &None, // No debug tracing for structured processing + )?; + result.push_str(&output); + template_index += 1; + } + } + } + + Ok(result) + } + + /// Get information about template sections for introspection. + /// + /// Returns a vector of tuples containing the position and operations for each + /// template section in the template. This is useful for understanding the + /// structure of a template before processing it with `format_with_inputs`. + /// + /// # Returns + /// + /// A vector where each element is a tuple of: + /// - `usize` - The position/index of the template section (0-based) + /// - `&Vec` - Reference to the operations in that section + /// + /// # Examples + /// + /// ```rust + /// use string_pipeline::Template; + /// + /// let template = Template::parse("Hello {upper} world {lower|trim}!").unwrap(); + /// let sections = template.get_template_sections(); + /// + /// assert_eq!(sections.len(), 2); + /// assert_eq!(sections[0].0, 0); // First template section at position 0 + /// assert_eq!(sections[1].0, 1); // Second template section at position 1 + /// assert_eq!(sections[0].1.len(), 1); // {upper} has 1 operation + /// assert_eq!(sections[1].1.len(), 2); // {lower|trim} has 2 operations + /// ``` + pub fn get_template_sections(&self) -> Vec<(usize, &Vec)> { + let mut result = Vec::new(); + let mut template_index = 0; + + for section in &self.sections { + if let TemplateSection::Template(ops) = section { + result.push((template_index, ops)); + template_index += 1; + } + } + + result + } + + /// Get detailed information about all sections in the template. + /// + /// Returns information about both literal and template sections, including + /// their types, positions, and content. This provides a complete view of + /// the template structure for debugging and introspection. + /// + /// # Returns + /// + /// A vector of section information structs containing: + /// - Section type (literal or template) + /// - Position within all sections + /// - Content or operation details + /// + /// # Examples + /// + /// ```rust + /// use string_pipeline::Template; + /// + /// let template = Template::parse("Hello {upper} world!").unwrap(); + /// let info = template.get_section_info(); + /// + /// assert_eq!(info.len(), 3); + /// // info[0]: Literal("Hello ") + /// // info[1]: Template(position=0, operations=[Upper]) + /// // info[2]: Literal(" world!") + /// ``` + pub fn get_section_info(&self) -> Vec { + let mut result = Vec::new(); + let mut template_position = 0; + + for (overall_position, section) in self.sections.iter().enumerate() { + match section { + TemplateSection::Literal(text) => { + result.push(SectionInfo { + section_type: SectionType::Literal, + overall_position, + template_position: None, + content: Some(text.clone()), + operations: None, + }); + } + TemplateSection::Template(ops) => { + result.push(SectionInfo { + section_type: SectionType::Template, + overall_position, + template_position: Some(template_position), + content: None, + operations: Some(ops.clone()), + }); + template_position += 1; + } + } + } + + result + } + /* ------------------------------------------------------------------ */ /* internal helpers */ /* ------------------------------------------------------------------ */ diff --git a/tests/multi_template_tests.rs b/tests/multi_template_tests.rs index e902400..0d020b3 100644 --- a/tests/multi_template_tests.rs +++ b/tests/multi_template_tests.rs @@ -1,4 +1,4 @@ -use string_pipeline::MultiTemplate; +use string_pipeline::{MultiTemplate, SectionType}; #[test] fn test_multi_template_literal_text_only() { @@ -301,3 +301,322 @@ fn test_multi_template_mixed_operations() { let result = template.format("hello world test").unwrap(); assert_eq!(result, "First: hello Upper: HELLO WORLD TEST Last: test"); } + +// Tests for the structured template functionality + +#[test] +fn test_format_with_inputs_basic() { + // Test basic usage of format_with_inputs (single inputs) + let template = MultiTemplate::parse("User: {upper} | Email: {lower}").unwrap(); + let result = template + .format_with_inputs(&[&["john doe"], &["JOHN@EXAMPLE.COM"]], &[" ", " "]) + .unwrap(); + assert_eq!(result, "User: JOHN DOE | Email: john@example.com"); +} + +#[test] +fn test_format_with_inputs_redirect() { + // Test basic with multiple operations + let template = MultiTemplate::parse("bat {strip_ansi|lower} > {}.txt").unwrap(); + let result = template + .format_with_inputs(&[&["MyFile.log"], &["output"]], &[" ", " "]) + .unwrap(); + assert_eq!(result, "bat myfile.log > output.txt"); +} + +#[test] +fn test_format_with_inputs_multiple_values() { + // Test multiple inputs per template section + let template = MultiTemplate::parse("Users: {upper} | Files: {lower}").unwrap(); + let result = template + .format_with_inputs( + &[&["john doe", "peter parker"], &["FILE1.TXT", "FILE2.TXT"]], + &[" ", ","], + ) + .unwrap(); + assert_eq!( + result, + "Users: JOHN DOE PETER PARKER | Files: file1.txt,file2.txt" + ); +} + +#[test] +fn test_format_with_inputs_multiple_values_quoted() { + // Test multiple inputs per template section + let template = MultiTemplate::parse("Users: {upper} | Files: {lower}").unwrap(); + let result = template + .format_with_inputs( + &[ + &["john doe", "peter parker"], + &["'FILE1.TXT'", "'FILE2.TXT'"], + ], + &[" ", " "], + ) + .unwrap(); + assert_eq!( + result, + "Users: JOHN DOE PETER PARKER | Files: 'file1.txt' 'file2.txt'" + ); +} + +#[test] +fn test_format_with_inputs_complex_operations() { + // Test with complex operations in each section + let template = MultiTemplate::parse( + "Files: {split:,:..|filter:\\.txt$|join: \\| } Count: {split:,:..|map:{upper}|join:-}", + ) + .unwrap(); + let result = template + .format_with_inputs( + &[&["file1.txt,doc.pdf,file2.txt,readme.md"], &["a,b,c"]], + &[" ", " "], + ) + .unwrap(); + assert_eq!(result, "Files: file1.txt | file2.txt Count: A-B-C"); +} + +#[test] +fn test_format_with_inputs_single_template() { + // Test with just one template section + let template = MultiTemplate::parse("Result: {upper}").unwrap(); + let result = template + .format_with_inputs(&[&["hello world"]], &[" "]) + .unwrap(); + assert_eq!(result, "Result: HELLO WORLD"); +} + +#[test] +fn test_format_with_inputs_no_templates() { + // Test with no template sections (only literals) + let template = MultiTemplate::parse("Just literal text").unwrap(); + let result = template.format_with_inputs(&[], &[]).unwrap(); + assert_eq!(result, "Just literal text"); +} + +#[test] +fn test_format_with_inputs_multiple_sections() { + // Test with multiple template sections + let template = MultiTemplate::parse("A: {upper} B: {lower} C: {trim} D: {append:!}").unwrap(); + let result = template + .format_with_inputs( + &[&["hello"], &["WORLD"], &[" test "], &["done"]], + &[" ", " ", " ", " "], + ) + .unwrap(); + assert_eq!(result, "A: HELLO B: world C: test D: done!"); +} + +#[test] +fn test_format_with_inputs_error_wrong_input_count() { + // Test error when input count doesn't match template section count + let template = MultiTemplate::parse("A: {upper} B: {lower}").unwrap(); + + // Too few inputs + let result = template.format_with_inputs(&[&["only_one"]], &[" ", " "]); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .contains("Expected 2 input slices for 2 template sections, got 1") + ); + + // Too many inputs + let result = template.format_with_inputs(&[&["one"], &["two"], &["three"]], &[" ", " "]); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .contains("Expected 2 input slices for 2 template sections, got 3") + ); +} + +#[test] +fn test_format_with_inputs_error_wrong_separator_count() { + // Test error when separator count doesn't match template section count + let template = MultiTemplate::parse("A: {upper} B: {lower}").unwrap(); + + // Too few separators + let result = template.format_with_inputs(&[&["one"], &["two"]], &[" "]); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .contains("Expected 2 separators for 2 template sections, got 1") + ); + + // Too many separators + let result = template.format_with_inputs(&[&["one"], &["two"]], &[" ", " ", " "]); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .contains("Expected 2 separators for 2 template sections, got 3") + ); +} + +#[test] +fn test_format_with_inputs_consecutive_templates() { + // Test consecutive template sections without literal text + let template = MultiTemplate::parse("{upper}{lower}{trim}").unwrap(); + let result = template + .format_with_inputs(&[&["Hello"], &["WORLD"], &[" test "]], &[" ", " ", " "]) + .unwrap(); + assert_eq!(result, "HELLOworldtest"); +} + +#[test] +fn test_format_with_inputs_empty_sections() { + // Test empty input sections + let template = MultiTemplate::parse("A: {upper} B: {lower}").unwrap(); + let result = template + .format_with_inputs(&[&[], &["test"]], &[" ", " "]) + .unwrap(); + assert_eq!(result, "A: B: test"); +} + +#[test] +fn test_format_with_inputs_custom_separators() { + // Test different separators for each section + let template = MultiTemplate::parse("List1: {join:;} | List2: {join:,}").unwrap(); + let result = template + .format_with_inputs(&[&["a", "b", "c"], &["x", "y", "z"]], &["-", "|"]) + .unwrap(); + assert_eq!(result, "List1: a-b-c | List2: x|y|z"); +} + +#[test] +fn test_format_with_inputs_processing_error() { + // Test that processing errors are properly propagated + let template = MultiTemplate::parse("Valid: {upper} Invalid: {regex_extract:[}").unwrap(); + let result = template.format_with_inputs(&[&["test"], &["input"]], &[" ", " "]); + assert!(result.is_err()); +} + +#[test] +fn test_get_template_sections() { + // Test introspection method for template sections + let template = MultiTemplate::parse("Hello {upper} world {lower|trim} end").unwrap(); + let sections = template.get_template_sections(); + + assert_eq!(sections.len(), 2); + assert_eq!(sections[0].0, 0); // First template section at position 0 + assert_eq!(sections[1].0, 1); // Second template section at position 1 + assert_eq!(sections[0].1.len(), 1); // {upper} has 1 operation + assert_eq!(sections[1].1.len(), 2); // {lower|trim} has 2 operations +} + +#[test] +fn test_get_template_sections_empty() { + // Test with no template sections + let template = MultiTemplate::parse("Just literal text with no templates").unwrap(); + let sections = template.get_template_sections(); + assert_eq!(sections.len(), 0); +} + +#[test] +fn test_get_section_info() { + // Test detailed section info method + let template = MultiTemplate::parse("Start {upper} middle {lower} end").unwrap(); + let info = template.get_section_info(); + + assert_eq!(info.len(), 5); + + // Check first section (literal) + assert_eq!(info[0].section_type, SectionType::Literal); + assert_eq!(info[0].overall_position, 0); + assert_eq!(info[0].template_position, None); + assert_eq!(info[0].content.as_ref().unwrap(), "Start "); + assert!(info[0].operations.is_none()); + + // Check second section (template) + assert_eq!(info[1].section_type, SectionType::Template); + assert_eq!(info[1].overall_position, 1); + assert_eq!(info[1].template_position, Some(0)); + assert!(info[1].content.is_none()); + assert_eq!(info[1].operations.as_ref().unwrap().len(), 1); + + // Check third section (literal) + assert_eq!(info[2].section_type, SectionType::Literal); + assert_eq!(info[2].content.as_ref().unwrap(), " middle "); + + // Check fourth section (template) + assert_eq!(info[3].section_type, SectionType::Template); + assert_eq!(info[3].template_position, Some(1)); + + // Check fifth section (literal) + assert_eq!(info[4].section_type, SectionType::Literal); + assert_eq!(info[4].content.as_ref().unwrap(), " end"); +} + +#[test] +fn test_get_section_info_only_templates() { + // Test section info with only template sections + let template = MultiTemplate::parse("{upper}{lower}").unwrap(); + let info = template.get_section_info(); + + assert_eq!(info.len(), 2); + assert_eq!(info[0].section_type, SectionType::Template); + assert_eq!(info[0].template_position, Some(0)); + assert_eq!(info[1].section_type, SectionType::Template); + assert_eq!(info[1].template_position, Some(1)); +} + +#[test] +fn test_backwards_compatibility_maintained() { + // Test that existing format() method still works exactly as before + let template = MultiTemplate::parse("Hello {upper} world {lower}!").unwrap(); + let result_old = template.format("test").unwrap(); + assert_eq!(result_old, "Hello TEST world test!"); + + // Verify section counting methods work + assert_eq!(template.template_section_count(), 2); + assert_eq!(template.section_count(), 5); +} + +#[test] +fn test_structured_template_complex_scenario() { + // Test a complex real-world scenario + let template = + MultiTemplate::parse("cp {split:/:-1} /backup/{split:/:-1|replace:s/\\.txt$/.bak/}") + .unwrap(); + let result = template + .format_with_inputs( + &[ + &["/home/user/documents/important.txt"], + &["/home/user/documents/important.txt"], + ], + &[" ", " "], + ) + .unwrap(); + assert_eq!(result, "cp important.txt /backup/important.bak"); +} + +#[test] +fn test_structured_template_data_processing() { + // Test structured processing for data transformation + let template = MultiTemplate::parse("Name: {split:,:..|slice:0..1|join:} Age: {split:,:..|slice:1..2|join:} Email: {split:,:..|slice:2..3|join:}").unwrap(); + let csv_data = "John Doe,30,john@example.com"; + let result = template + .format_with_inputs(&[&[csv_data], &[csv_data], &[csv_data]], &[" ", " ", " "]) + .unwrap(); + assert_eq!(result, "Name: John Doe Age: 30 Email: john@example.com"); +} + +#[test] +fn test_structured_template_file_operations() { + // Test file operation template + let template = MultiTemplate::parse("mkdir -p {split:/:..-1|join:/} && touch {}.tmp").unwrap(); + let result = template + .format_with_inputs( + &[ + &["/home/user/projects/new/file.txt"], + &["/home/user/projects/new/file.txt"], + ], + &[" ", " "], + ) + .unwrap(); + assert_eq!( + result, + "mkdir -p /home/user/projects/new && touch /home/user/projects/new/file.txt.tmp" + ); +} From e63bdd4cd4391b0bf46573358388a349733944c4 Mon Sep 17 00:00:00 2001 From: LM Date: Mon, 18 Aug 2025 22:24:47 +0200 Subject: [PATCH 2/3] fix(lib): remove interior mutability issue from OnceCell that prevented stable use in HashMaps/Sets --- src/pipeline/mod.rs | 35 +++++++++-------------------------- src/pipeline/parser.rs | 11 +---------- 2 files changed, 10 insertions(+), 36 deletions(-) diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs index 387ff73..a073dbf 100644 --- a/src/pipeline/mod.rs +++ b/src/pipeline/mod.rs @@ -47,7 +47,7 @@ mod template; use dashmap::DashMap; use memchr::memchr_iter; -use once_cell::sync::{Lazy, OnceCell}; +use once_cell::sync::Lazy; use std::collections::HashMap; use std::time::{Duration, Instant}; use strip_ansi_escapes::strip; @@ -686,10 +686,7 @@ pub enum StringOp { /// let template = Template::parse("{split:,:..|filter:\\.txt$|join:\\n}").unwrap(); /// assert_eq!(template.format("file.txt,readme.md,data.txt").unwrap(), "file.txt\ndata.txt"); /// ``` - Filter { - pattern: String, - regex: OnceCell, - }, + Filter { pattern: String }, /// Remove list items matching a regex pattern. /// @@ -724,10 +721,7 @@ pub enum StringOp { /// let template = Template::parse("{split:\\n:..|filter_not:^$|join:\\n}").unwrap(); /// assert_eq!(template.format("line1\n\nline2\n\nline3").unwrap(), "line1\nline2\nline3"); /// ``` - FilterNot { - pattern: String, - regex: OnceCell, - }, + FilterNot { pattern: String }, /// Select a range of items from a list. /// @@ -899,7 +893,6 @@ pub enum StringOp { RegexExtract { pattern: String, group: Option, - regex: OnceCell, }, } @@ -1360,10 +1353,8 @@ fn apply_single_operation( StringOp::Slice { range } => { apply_list_operation(val, |list| apply_range(&list, range), "Slice") } - StringOp::Filter { pattern, regex } => { - let re = regex.get_or_try_init(|| { - Regex::new(pattern).map_err(|e| format!("Invalid regex: {e}")) - })?; + StringOp::Filter { pattern } => { + let re = get_cached_regex(pattern)?; match val { Value::List(list) => Ok(Value::List( list.into_iter().filter(|s| re.is_match(s)).collect(), @@ -1371,10 +1362,8 @@ fn apply_single_operation( Value::Str(s) => Ok(Value::Str(if re.is_match(&s) { s } else { String::new() })), } } - StringOp::FilterNot { pattern, regex } => { - let re = regex.get_or_try_init(|| { - Regex::new(pattern).map_err(|e| format!("Invalid regex: {e}")) - })?; + StringOp::FilterNot { pattern } => { + let re = get_cached_regex(pattern)?; match val { Value::List(list) => Ok(Value::List( list.into_iter().filter(|s| !re.is_match(s)).collect(), @@ -1576,15 +1565,9 @@ fn apply_single_operation( ) } } - StringOp::RegexExtract { - pattern, - group, - regex, - } => { + StringOp::RegexExtract { pattern, group } => { if let Value::Str(s) = val { - let re = regex.get_or_try_init(|| { - Regex::new(pattern).map_err(|e| format!("Invalid regex: {e}")) - })?; + let re = get_cached_regex(pattern)?; let result = if let Some(group_idx) = group { re.captures(&s) .and_then(|caps| caps.get(*group_idx)) diff --git a/src/pipeline/parser.rs b/src/pipeline/parser.rs index 43245b1..bfb87aa 100644 --- a/src/pipeline/parser.rs +++ b/src/pipeline/parser.rs @@ -8,7 +8,6 @@ //! escape sequences, and debug flags. //! -use once_cell::sync::OnceCell; use pest::Parser; use pest_derive::Parser; use smallvec::SmallVec; @@ -270,11 +269,9 @@ fn parse_operation(pair: pest::iterators::Pair) -> Result Ok(StringOp::StripAnsi), Rule::filter => Ok(StringOp::Filter { pattern: extract_single_arg_raw(pair)?, - regex: OnceCell::new(), }), Rule::filter_not => Ok(StringOp::FilterNot { pattern: extract_single_arg_raw(pair)?, - regex: OnceCell::new(), }), Rule::slice => Ok(StringOp::Slice { range: extract_range_arg(pair)?, @@ -500,11 +497,7 @@ fn parse_regex_extract_operation(pair: pest::iterators::Pair) -> Result) -> Result Ok(StringOp::Unique), Rule::map_filter => Ok(StringOp::Filter { pattern: extract_single_arg_raw(pair)?, - regex: OnceCell::new(), }), Rule::map_filter_not => Ok(StringOp::FilterNot { pattern: extract_single_arg_raw(pair)?, - regex: OnceCell::new(), }), _ => Err(format!("Unsupported map operation: {:?}", pair.as_rule())), From 6f916d64199de671ec26af979558411a313ea675 Mon Sep 17 00:00:00 2001 From: LM Date: Tue, 19 Aug 2025 10:11:42 +0200 Subject: [PATCH 3/3] fix(lib): fix logic error where we join first then processed intead of processing then joining --- src/pipeline/template.rs | 44 ++++++++++++++++++++++++----------- tests/multi_template_tests.rs | 10 ++++++-- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/pipeline/template.rs b/src/pipeline/template.rs index 97763c4..7dea36a 100644 --- a/src/pipeline/template.rs +++ b/src/pipeline/template.rs @@ -699,23 +699,41 @@ impl MultiTemplate { return Err("Internal error: template index out of bounds".to_string()); } - // Join multiple inputs with the corresponding separator + // Process each input individually, then join the results let section_inputs = inputs[template_index]; let separator = separators[template_index]; - let input = match section_inputs.len() { + let output = match section_inputs.len() { 0 => String::new(), - 1 => section_inputs[0].to_string(), - _ => section_inputs.join(separator), + 1 => { + let mut input_hasher = std::collections::hash_map::DefaultHasher::new(); + std::hash::Hash::hash(§ion_inputs[0], &mut input_hasher); + let input_hash = input_hasher.finish(); + + self.apply_template_section( + section_inputs[0], + ops, + input_hash, + &mut cache, + &None, // No debug tracing for structured processing + )? + } + _ => { + let mut results = Vec::new(); + for input in section_inputs { + let mut input_hasher = + std::collections::hash_map::DefaultHasher::new(); + std::hash::Hash::hash(&input, &mut input_hasher); + let input_hash = input_hasher.finish(); + + let result = self.apply_template_section( + input, ops, input_hash, &mut cache, + &None, // No debug tracing for structured processing + )?; + results.push(result); + } + results.join(separator) + } }; - - let mut input_hasher = std::collections::hash_map::DefaultHasher::new(); - std::hash::Hash::hash(&input, &mut input_hasher); - let input_hash = input_hasher.finish(); - - let output = self.apply_template_section( - &input, ops, input_hash, &mut cache, - &None, // No debug tracing for structured processing - )?; result.push_str(&output); template_index += 1; } diff --git a/tests/multi_template_tests.rs b/tests/multi_template_tests.rs index 0d020b3..7a7b109 100644 --- a/tests/multi_template_tests.rs +++ b/tests/multi_template_tests.rs @@ -582,13 +582,19 @@ fn test_structured_template_complex_scenario() { let result = template .format_with_inputs( &[ - &["/home/user/documents/important.txt"], + &[ + "/home/user/documents/important1.txt", + "/home/user/documents/important2.txt", + ], &["/home/user/documents/important.txt"], ], &[" ", " "], ) .unwrap(); - assert_eq!(result, "cp important.txt /backup/important.bak"); + assert_eq!( + result, + "cp important1.txt important2.txt /backup/important.bak" + ); } #[test]