From b4f89a6d4ab4c72d18aa2b574901a9bdf04b55f4 Mon Sep 17 00:00:00 2001 From: megamaths Date: Fri, 1 Aug 2025 10:00:05 +0100 Subject: [PATCH 1/9] added basic markdown attrib and parsing --- src/elements/element.rs | 2 +- src/elements/text.rs | 569 +++++++++++++++++++++++++-- tests/integration_tests/text_attr.rs | 17 + 3 files changed, 550 insertions(+), 38 deletions(-) diff --git a/src/elements/element.rs b/src/elements/element.rs index afd6323..71800fa 100644 --- a/src/elements/element.rs +++ b/src/elements/element.rs @@ -436,7 +436,7 @@ impl SvgElement { // TODO: refactor this method to handle text event gen better let phantom = matches!(self.name(), "point" | "box"); - if self.has_attr("text") { + if self.has_attr("text") || self.has_attr("md") { let (orig_elem, text_elements) = process_text_attr(self)?; if orig_elem.name != "text" && !phantom { diff --git a/src/elements/text.rs b/src/elements/text.rs index ef8c4da..df741fc 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -11,6 +11,279 @@ fn get_text_value(element: &mut SvgElement) -> String { text_string(&text_value) } +fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { + let text_value = element + .pop_attr("md") + .expect("no md attr in process_text_attr"); + + let (parsed_string, sections) = md_parse(&text_value); + + let mut state_per_char = vec![0; parsed_string.len()]; + + for i in 0..sections.len() { + let bit = sections[i].code_bold_italic; + for j in sections[i].start_ind..sections[i].end_ind { + state_per_char[j] |= 1 << bit; + } + } + + let mut strings = vec![]; + let mut states = vec![]; + for i in 0..parsed_string.len() { + if i == 0 || states[states.len() - 1] != state_per_char[i] { + strings.push(String::new()); + states.push(state_per_char[i]) + } + strings + .last_mut() + .expect("filled from i == 0") + .push(parsed_string[i]); + } + + return (strings, states); +} + +#[derive(Debug)] +struct SectionData { + start_ind: usize, + end_ind: usize, + code_bold_italic: u32, +} + +// based on the commonmarkdown implementation +#[derive(Debug)] +struct DelimiterData { + ind: usize, // goes just before this char + char_type: char, + num_delimiters: u32, + is_active: bool, + could_open: bool, + could_close: bool, +} + +fn md_parse(text_value: &str) -> (Vec, Vec) { + let mut sections = vec![]; + let mut result = vec![]; + let mut delimiters = vec![DelimiterData { + ind: 0, + char_type: ' ', + num_delimiters: 0, + is_active: false, + could_open: false, + could_close: false, + }]; + let mut escaped = false; + + // first pass process \ and find delimiters + for c in text_value.chars() { + let mut add = true; + if c == '\\' { + if !escaped { + add = false; + escaped = true; + } else { + escaped = false; + } + } + // the delimiters + else if c == '`' || c == '_' || c == '*' { + if !escaped { + let last = delimiters.last_mut().expect("garenteed not to be empty"); + if c == last.char_type && last.ind == result.len() { + // is a continuation + last.num_delimiters += 1; + } else { + delimiters.push(DelimiterData { + ind: result.len(), + char_type: c, + num_delimiters: 1, + is_active: true, + could_open: true, + could_close: true, + }); + } + add = false; + } else { + escaped = true; + } + } else if escaped { + if c == 'n' { + add = false; + result.push('\n'); + } else { + // was not an escape + result.push('\\'); + } + escaped = false; + } + + if add { + result.push(c); + } + } + + // set could open/close + for i in 0..delimiters.len() { + let prev_char; + let next_char; + if i != 0 && delimiters[i - 1].ind == delimiters[i].ind { + prev_char = delimiters[i - 1].char_type; + } else if delimiters[i].ind == 0 { + prev_char = ' '; + } else { + prev_char = result[delimiters[i].ind - 1]; + } + + if i != delimiters.len() - 1 && delimiters[i + 1].ind == delimiters[i].ind { + next_char = delimiters[i + 1].char_type; + } else if delimiters[i].ind == result.len() { + next_char = ' '; + } else { + next_char = result[delimiters[i].ind]; + } + + if next_char.is_whitespace() { + delimiters[i].could_open = false; + } + if prev_char.is_whitespace() { + delimiters[i].could_close = false; + } + if !next_char.is_whitespace() + && !prev_char.is_whitespace() + && delimiters[i].char_type == '_' + { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } + + if next_char.is_ascii_punctuation() + && (!prev_char.is_whitespace() || !prev_char.is_ascii_punctuation()) + { + delimiters[i].could_open = false; + } + if prev_char.is_ascii_punctuation() + && (!next_char.is_whitespace() || !next_char.is_ascii_punctuation()) + { + delimiters[i].could_close = false; + } + } + + let stack_bottom = 0; // because I have a null element in it + let mut current_position = stack_bottom + 1; + let mut opener_a = [stack_bottom; 3]; + let mut opener_d = [stack_bottom; 3]; + let mut opener_t = [stack_bottom; 3]; + + loop { + while current_position != delimiters.len() + && !delimiters[current_position].could_close + && delimiters[current_position].is_active + { + current_position += 1; + } + if current_position == delimiters.len() { + break; + } + let opener_min = match delimiters[current_position].char_type { + '*' => &mut opener_a, + '_' => &mut opener_d, + '`' => &mut opener_t, + _ => panic!(), + }; + println!("{} {:?}", current_position, delimiters); + + let min = opener_min[(delimiters[current_position].num_delimiters % 3) as usize] + .max(stack_bottom); + let mut opener_ind = current_position - 1; + while opener_ind > min { + // found opener + if delimiters[opener_ind].is_active + && delimiters[opener_ind].could_open + && delimiters[opener_ind].char_type == delimiters[current_position].char_type + { + if (delimiters[opener_ind].could_close || delimiters[current_position].could_open) + && delimiters[opener_ind].num_delimiters % 3 + != delimiters[current_position].num_delimiters % 3 + { + } else { + break; + } + } + opener_ind -= 1; + } + + if opener_ind == min { + // not found a opener + opener_min[(delimiters[current_position].num_delimiters % 3) as usize] = + current_position - 1; + current_position += 1; + } else { + delimiters[current_position].could_open = false; + delimiters[opener_ind].could_close = false; + // did + let code = delimiters[current_position].char_type == '`'; + let strong = !code + && delimiters[opener_ind].num_delimiters >= 2 + && delimiters[current_position].num_delimiters >= 2; + sections.push(SectionData { + start_ind: delimiters[opener_ind].ind, + end_ind: delimiters[current_position].ind, + code_bold_italic: if code { + 0 + } else if strong { + 1 + } else { + 2 + }, + }); + + println!("{} {} {}", opener_ind, current_position, strong); + delimiters[opener_ind].num_delimiters -= 1 + (strong as u32); + delimiters[current_position].num_delimiters -= 1 + (strong as u32); + + if delimiters[opener_ind].num_delimiters == 0 { + delimiters[opener_ind].is_active = false; + } + if delimiters[current_position].num_delimiters == 0 { + delimiters[current_position].is_active = false; + current_position += 1; + } + + for i in (opener_ind + 1)..current_position { + delimiters[i].is_active = false; + } + } + } + println!(); + + let mut final_result = vec![]; + + // work from the back to avoid index invalidation + for i in (0..delimiters.len()).rev() { + while delimiters[i].ind < result.len() { + if let Some(thing) = result.pop() { + final_result.push(thing); + } + } + + for j in 0..sections.len() { + // if start needs to be after or equal + if sections[j].start_ind >= delimiters[i].ind { + sections[j].start_ind += delimiters[i].num_delimiters as usize; + } + if sections[j].end_ind > delimiters[i].ind { + // if end needs to be after + sections[j].end_ind += delimiters[i].num_delimiters as usize; + } + } + for _ in 0..delimiters[i].num_delimiters { + final_result.push(delimiters[i].char_type); + } + } + + return (final_result.into_iter().rev().collect(), sections); +} + /// Convert unescaped r"\n" into newline characters for multi-line text fn text_string(text_value: &str) -> String { let mut result = String::new(); @@ -150,6 +423,27 @@ fn get_text_position(element: &mut SvgElement) -> Result<(f32, f32, bool, LocSpe Ok((tdx, tdy, outside, text_anchor, text_classes)) } +fn get_text_len(mono: bool, text: String) -> f32 { + if mono { + return 0.6 * text.len() as f32; + } + let mut length = 0.0; + + let long = ['m', 'w']; + let short = ['f', 'i', 'j', 'l', 'r', 't']; + for i in text.chars() { + if long.contains(&i) { + length += 0.8; + } else if short.contains(&i) { + length += 0.33; + } else { + length += 0.6; + } + } + + return length; +} + pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec)> { // Different conversions from line count to first-line offset based on whether // top, center, or bottom justification. @@ -162,22 +456,80 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec = text_value.lines().collect(); + let mut lines = vec![vec![]]; + let mut line_types = vec![vec![]]; + for i in 0..text_values.len() { + let mut segments = text_values[i].lines(); + + if let Some(first) = segments.next() { + if first != "" { + lines + .last_mut() + .expect("added item not removed") + .push(first); + line_types + .last_mut() + .expect("added item not removed") + .push(state_values[i]); + } else if i != 0 { + lines.push(vec![]); + line_types.push(vec![]); + } + } + + for s in segments { + lines.push(vec![s]); + line_types.push(vec![state_values[i]]); + } + + if let Some(last_char) = text_values[i].chars().last() { + if last_char == '\n' && i != text_values.len() - 1 { + lines.push(vec![]); + line_types.push(vec![]); + } + } + } + + for i in 0..lines.len() { + if lines[i].len() == 0 { + lines[i].push(""); + line_types[i].push(0); + } + } let line_count = lines.len(); + println!("{:?}", text_values[0].lines().collect::>()); + println!("{:?}", text_values); + println!("{:?}", lines); - let multiline = line_count > 1; + let multielement = line_count > 1 || text_values.len() > 1; let vertical = orig_elem.has_class("d-text-vertical"); // Whether text is pre-formatted (i.e. spaces are not collapsed) let text_pre = orig_elem.has_class("d-text-pre"); - // There will always be a text element; if not multiline this is the only element. + // There will always be a text element; if not multielement this is the only element. let mut text_elem = if orig_elem.name() == "text" { orig_elem.clone() } else { @@ -276,9 +628,9 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Result<(SvgElement, Vec String { + "SectionData { start_ind: ".to_owned() + + &s.to_string() + + ", end_ind: " + + &e.to_string() + + ", code_bold_italic: " + + &i.to_string() + + " }" + } + + // using the md + let text = r"He*ll*o, \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(2, 4, 2) + + "])" + ); + + // mismatched + let text = r"*Hello** , \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', '*', ' ', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(0, 5, 2) + "])" + ); + + // diff type + let text = r"He*llo_, \nworld!"; + assert_eq!(format!("{:?}",md_parse(text)), "(['H', 'e', '*', 'l', 'l', 'o', '_', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [])"); + + // multiple diff type + let text = r"_hello*"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['_', 'h', 'e', 'l', 'l', 'o', '*'], [])" + ); + + // multiple same type + let text = r"He*ll*o, \nw*or*ld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(2, 4, 2) + + ", " + + &sd(9, 11, 2) + + "])" + ); + + // space before + let text = r"**foo bar **"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['*', '*', 'f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', '*', '*'], [])" + ); + + // punctuation before alphnum after + let text = r"**(**foo)"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['*', '*', '(', '*', '*', 'f', 'o', 'o', ')'], [])" + ); + } + + #[test] + fn test_get_md_value() { + let mut el = SvgElement::new("text", &[]); + let text = r"foo"; + el.set_attr("md", text); + assert_eq!(format!("{:?}", get_md_value(&mut el)), "([\"foo\"], [0])"); + + let text = r"**(**foo)"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"**(**foo)\"], [0])" + ); + + let text = r"*foo *bar**"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo bar\"], [4])" + ); + + let text = r"*foo**bar**baz*"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo\", \"bar\", \"baz\"], [4, 6, 4])" + ); + } } diff --git a/tests/integration_tests/text_attr.rs b/tests/integration_tests/text_attr.rs index a50367c..77bc297 100644 --- a/tests/integration_tests/text_attr.rs +++ b/tests/integration_tests/text_attr.rs @@ -614,3 +614,20 @@ fn test_multiline_outside() { expected.trim() ); } + +#[test] +fn test_md() { + let input = r#" + +"#; + let expected = r#" + + +multiline + +"#; + assert_eq!( + transform_str_default(input).unwrap().trim(), + expected.trim() + ); +} From 5b0ebb69fafdea6a3e112254b471ead04fc6ce4d Mon Sep 17 00:00:00 2001 From: megamaths Date: Tue, 5 Aug 2025 14:50:29 +0100 Subject: [PATCH 2/9] clean the printlines and fixed problem with single element md not adding styles --- src/elements/text.rs | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/src/elements/text.rs b/src/elements/text.rs index df741fc..ec2a75f 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -190,7 +190,6 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { '`' => &mut opener_t, _ => panic!(), }; - println!("{} {:?}", current_position, delimiters); let min = opener_min[(delimiters[current_position].num_delimiters % 3) as usize] .max(stack_bottom); @@ -206,6 +205,7 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { != delimiters[current_position].num_delimiters % 3 { } else { + // found valid opener break; } } @@ -237,7 +237,6 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { }, }); - println!("{} {} {}", opener_ind, current_position, strong); delimiters[opener_ind].num_delimiters -= 1 + (strong as u32); delimiters[current_position].num_delimiters -= 1 + (strong as u32); @@ -254,7 +253,6 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { } } } - println!(); let mut final_result = vec![]; @@ -423,27 +421,6 @@ fn get_text_position(element: &mut SvgElement) -> Result<(f32, f32, bool, LocSpe Ok((tdx, tdy, outside, text_anchor, text_classes)) } -fn get_text_len(mono: bool, text: String) -> f32 { - if mono { - return 0.6 * text.len() as f32; - } - let mut length = 0.0; - - let long = ['m', 'w']; - let short = ['f', 'i', 'j', 'l', 'r', 't']; - for i in text.chars() { - if long.contains(&i) { - length += 0.8; - } else if short.contains(&i) { - length += 0.33; - } else { - length += 0.6; - } - } - - return length; -} - pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec)> { // Different conversions from line count to first-line offset based on whether // top, center, or bottom justification. @@ -520,9 +497,6 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec>()); - println!("{:?}", text_values); - println!("{:?}", lines); let multielement = line_count > 1 || text_values.len() > 1; let vertical = orig_elem.has_class("d-text-vertical"); @@ -596,6 +570,18 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Date: Tue, 5 Aug 2025 16:25:22 +0100 Subject: [PATCH 3/9] split markdown parser into multiple functions --- src/elements/text.rs | 134 ++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 60 deletions(-) diff --git a/src/elements/text.rs b/src/elements/text.rs index ec2a75f..1c11690 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -20,10 +20,10 @@ fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { let mut state_per_char = vec![0; parsed_string.len()]; - for i in 0..sections.len() { - let bit = sections[i].code_bold_italic; - for j in sections[i].start_ind..sections[i].end_ind { - state_per_char[j] |= 1 << bit; + for s in sections { + let bit = s.code_bold_italic; + for i in s.start_ind..s.end_ind { + state_per_char[i] |= 1 << bit; } } @@ -55,14 +55,13 @@ struct SectionData { struct DelimiterData { ind: usize, // goes just before this char char_type: char, - num_delimiters: u32, + num_delimiters: usize, is_active: bool, could_open: bool, could_close: bool, } -fn md_parse(text_value: &str) -> (Vec, Vec) { - let mut sections = vec![]; +fn md_parse_escapes_and_delimiters(text_value: &str) -> (Vec, Vec) { let mut result = vec![]; let mut delimiters = vec![DelimiterData { ind: 0, @@ -77,17 +76,16 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { // first pass process \ and find delimiters for c in text_value.chars() { let mut add = true; - if c == '\\' { - if !escaped { + match (c, escaped) { + ('\\', false) => { add = false; escaped = true; - } else { - escaped = false; } - } - // the delimiters - else if c == '`' || c == '_' || c == '*' { - if !escaped { + ('\\', true) => { + escaped = true; + } + // the delimiters + ('`', false) | ('_', false) | ('*', false) => { let last = delimiters.last_mut().expect("garenteed not to be empty"); if c == last.char_type && last.ind == result.len() { // is a continuation @@ -103,25 +101,31 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { }); } add = false; - } else { - escaped = true; } - } else if escaped { - if c == 'n' { + ('`', true) | ('_', true) | ('*', true) => { + escaped = false; + } + ('n', true) => { add = false; result.push('\n'); - } else { + escaped = false; + } + (_, true) => { // was not an escape result.push('\\'); + escaped = false; } - escaped = false; + (_, false) => {} } - if add { result.push(c); } } + return (result, delimiters); +} + +fn md_parse_set_delimiter_open_close(result: &Vec, delimiters: &mut Vec) { // set could open/close for i in 0..delimiters.len() { let prev_char; @@ -142,18 +146,23 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { next_char = result[delimiters[i].ind]; } - if next_char.is_whitespace() { - delimiters[i].could_open = false; - } - if prev_char.is_whitespace() { - delimiters[i].could_close = false; - } - if !next_char.is_whitespace() - && !prev_char.is_whitespace() - && delimiters[i].char_type == '_' - { - delimiters[i].could_open = false; - delimiters[i].could_close = false; + match (prev_char.is_whitespace(), next_char.is_whitespace()) { + (false, false) => { + if delimiters[i].char_type == '_' { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } + } + (true, false) => { + delimiters[i].could_close = false; + } + (false, true) => { + delimiters[i].could_open = false; + } + (true, true) => { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } } if next_char.is_ascii_punctuation() @@ -167,7 +176,10 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { delimiters[i].could_close = false; } } +} +fn md_parse_eval_sections(delimiters: &mut Vec) -> Vec { + let mut sections = vec![]; let stack_bottom = 0; // because I have a null element in it let mut current_position = stack_bottom + 1; let mut opener_a = [stack_bottom; 3]; @@ -188,11 +200,10 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { '*' => &mut opener_a, '_' => &mut opener_d, '`' => &mut opener_t, - _ => panic!(), + _ => panic!("this cant happen as current_position starts at 0 and all other delimiters are of above types"), }; - let min = opener_min[(delimiters[current_position].num_delimiters % 3) as usize] - .max(stack_bottom); + let min = opener_min[delimiters[current_position].num_delimiters % 3].max(stack_bottom); let mut opener_ind = current_position - 1; while opener_ind > min { // found opener @@ -214,8 +225,7 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { if opener_ind == min { // not found a opener - opener_min[(delimiters[current_position].num_delimiters % 3) as usize] = - current_position - 1; + opener_min[delimiters[current_position].num_delimiters % 3] = current_position - 1; current_position += 1; } else { delimiters[current_position].could_open = false; @@ -228,17 +238,15 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { sections.push(SectionData { start_ind: delimiters[opener_ind].ind, end_ind: delimiters[current_position].ind, - code_bold_italic: if code { - 0 - } else if strong { - 1 - } else { - 2 + code_bold_italic: match (code, strong) { + (true, _) => 0, + (_, true) => 1, + (_, _) => 2, }, }); - delimiters[opener_ind].num_delimiters -= 1 + (strong as u32); - delimiters[current_position].num_delimiters -= 1 + (strong as u32); + delimiters[opener_ind].num_delimiters -= 1 + (strong as usize); + delimiters[current_position].num_delimiters -= 1 + (strong as usize); if delimiters[opener_ind].num_delimiters == 0 { delimiters[opener_ind].is_active = false; @@ -248,35 +256,41 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { current_position += 1; } - for i in (opener_ind + 1)..current_position { - delimiters[i].is_active = false; + for d in &mut delimiters[(opener_ind + 1)..current_position] { + d.is_active = false; } } } + return sections; +} + +fn md_parse(text_value: &str) -> (Vec, Vec) { + let (mut result, mut delimiters) = md_parse_escapes_and_delimiters(text_value); + md_parse_set_delimiter_open_close(&result, &mut delimiters); + let mut sections = md_parse_eval_sections(&mut delimiters); let mut final_result = vec![]; // work from the back to avoid index invalidation - for i in (0..delimiters.len()).rev() { - while delimiters[i].ind < result.len() { + for d in delimiters.into_iter().rev() { + while d.ind < result.len() { if let Some(thing) = result.pop() { final_result.push(thing); } } - for j in 0..sections.len() { + for s in sections.iter_mut() { // if start needs to be after or equal - if sections[j].start_ind >= delimiters[i].ind { - sections[j].start_ind += delimiters[i].num_delimiters as usize; + if s.start_ind >= d.ind { + s.start_ind += d.num_delimiters as usize; } - if sections[j].end_ind > delimiters[i].ind { + if s.end_ind > d.ind { // if end needs to be after - sections[j].end_ind += delimiters[i].num_delimiters as usize; + s.end_ind += d.num_delimiters as usize; } } - for _ in 0..delimiters[i].num_delimiters { - final_result.push(delimiters[i].char_type); - } + let mut temp = vec![d.char_type; d.num_delimiters]; + final_result.append(&mut temp); } return (final_result.into_iter().rev().collect(), sections); @@ -571,7 +585,7 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Date: Wed, 6 Aug 2025 09:56:44 +0100 Subject: [PATCH 4/9] fixed some tests --- src/elements/text.rs | 2 +- tests/integration_tests/text_attr.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/elements/text.rs b/src/elements/text.rs index 1c11690..7dc5af8 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -82,7 +82,7 @@ fn md_parse_escapes_and_delimiters(text_value: &str) -> (Vec, Vec { - escaped = true; + escaped = false; } // the delimiters ('`', false) | ('_', false) | ('*', false) => { diff --git a/tests/integration_tests/text_attr.rs b/tests/integration_tests/text_attr.rs index 77bc297..e0ad734 100644 --- a/tests/integration_tests/text_attr.rs +++ b/tests/integration_tests/text_attr.rs @@ -621,9 +621,9 @@ fn test_md() { "#; let expected = r#" - - -multiline + + +multiline "#; assert_eq!( From ac0f2466b0d3230c042a1dd01011ce3ace6a6e9a Mon Sep 17 00:00:00 2001 From: megamaths Date: Wed, 6 Aug 2025 10:12:42 +0100 Subject: [PATCH 5/9] make check compliance --- src/elements/text.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/elements/text.rs b/src/elements/text.rs index 7dc5af8..2662876 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -22,8 +22,8 @@ fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { for s in sections { let bit = s.code_bold_italic; - for i in s.start_ind..s.end_ind { - state_per_char[i] |= 1 << bit; + for i in state_per_char.iter_mut().take(s.end_ind).skip(s.start_ind) { + *i |= 1 << bit; } } @@ -40,7 +40,7 @@ fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { .push(parsed_string[i]); } - return (strings, states); + (strings, states) } #[derive(Debug)] @@ -122,10 +122,10 @@ fn md_parse_escapes_and_delimiters(text_value: &str) -> (Vec, Vec, delimiters: &mut Vec) { +fn md_parse_set_delimiter_open_close(result: &[char], delimiters: &mut [DelimiterData]) { // set could open/close for i in 0..delimiters.len() { let prev_char; @@ -178,7 +178,7 @@ fn md_parse_set_delimiter_open_close(result: &Vec, delimiters: &mut Vec) -> Vec { +fn md_parse_eval_sections(delimiters: &mut [DelimiterData]) -> Vec { let mut sections = vec![]; let stack_bottom = 0; // because I have a null element in it let mut current_position = stack_bottom + 1; @@ -261,7 +261,7 @@ fn md_parse_eval_sections(delimiters: &mut Vec) -> Vec (Vec, Vec) { @@ -282,18 +282,18 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { for s in sections.iter_mut() { // if start needs to be after or equal if s.start_ind >= d.ind { - s.start_ind += d.num_delimiters as usize; + s.start_ind += d.num_delimiters; } if s.end_ind > d.ind { // if end needs to be after - s.end_ind += d.num_delimiters as usize; + s.end_ind += d.num_delimiters; } } let mut temp = vec![d.char_type; d.num_delimiters]; final_result.append(&mut temp); } - return (final_result.into_iter().rev().collect(), sections); + (final_result.into_iter().rev().collect(), sections) } /// Convert unescaped r"\n" into newline characters for multi-line text @@ -453,7 +453,7 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec Date: Thu, 7 Aug 2025 16:27:42 +0100 Subject: [PATCH 6/9] reworked parsing to do code seperately and moved markdown functions to seperate file --- src/elements/markdown.rs | 619 +++++++++++++++++++++++++++++++++++++++ src/elements/mod.rs | 1 + src/elements/text.rs | 434 ++------------------------- 3 files changed, 643 insertions(+), 411 deletions(-) create mode 100644 src/elements/markdown.rs diff --git a/src/elements/markdown.rs b/src/elements/markdown.rs new file mode 100644 index 0000000..80086dc --- /dev/null +++ b/src/elements/markdown.rs @@ -0,0 +1,619 @@ +use super::SvgElement; + +pub fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { + let text_value = if let Some(tv) = element.pop_attr("md") { + tv + } else { + element + .pop_attr("text") + .expect("no text attr in process_text_attr") + }; + + let (parsed_string, sections) = md_parse(&text_value); + + let mut state_per_char = vec![ + TextClass { + code: false, + bold: false, + italic: false + }; + parsed_string.len() + ]; + + for s in sections { + let class = s.code_bold_italic; + for i in state_per_char.iter_mut().take(s.end_ind).skip(s.start_ind) { + match class { + TextClassEnum::Code => i.code = true, + TextClassEnum::Bold => i.bold = true, + TextClassEnum::Italic => i.italic = true, + } + } + } + + let mut strings = vec![]; + let mut states = vec![]; + for i in 0..parsed_string.len() { + if i == 0 || states[states.len() - 1] != state_per_char[i] { + strings.push(String::new()); + states.push(state_per_char[i]) + } + strings + .last_mut() + .expect("filled from i == 0") + .push(parsed_string[i]); + } + + (strings, states) +} + +#[derive(Debug, Clone, PartialEq, Copy)] +pub struct TextClass { + pub code: bool, + pub bold: bool, + pub italic: bool, +} + +#[derive(Debug)] +enum TextClassEnum { + Code, + Bold, + Italic, +} + +#[derive(Debug)] +struct SectionData { + start_ind: usize, + end_ind: usize, + code_bold_italic: TextClassEnum, +} + +// based on the commonmark implementation https://spec.commonmark.org/0.31.2/ +#[derive(Debug, Clone)] +struct DelimiterData { + ind: usize, // goes just before this char + char_type: char, + num_delimiters: usize, + is_active: bool, + could_open: bool, + could_close: bool, +} + +fn md_parse_delimiters(text_value: &str) -> (Vec, Vec) { + let mut result = vec![]; + let mut delimiters = vec![DelimiterData { + ind: 0, + char_type: ' ', + num_delimiters: 0, + is_active: false, + could_open: false, + could_close: false, + }]; + + // first pass find delimiters + for c in text_value.chars() { + let mut add = true; + match c { + // the delimiters and escape + '`' | '_' | '*' | '\\' => { + let last = delimiters.last_mut().expect("garenteed not to be empty"); + if c == last.char_type && last.ind == result.len() { + // is a continuation + last.num_delimiters += 1; + } else { + delimiters.push(DelimiterData { + ind: result.len(), + char_type: c, + num_delimiters: 1, + is_active: true, + could_open: true, + could_close: true, + }); + } + add = false; + } + _ => {} + } + if add { + result.push(c); + } + } + + (result, delimiters) +} + +// assumes delimiters are ordered +fn md_parse_code_blocks( + result: &[char], + delimiters: &mut Vec, +) -> (Vec, Vec) { + let mut new_result = vec![]; + let mut sections = vec![]; + let mut res_ind = 0; + let mut del_ind = 0; + let mut readded_letters = 0; + while res_ind <= result.len() { + while del_ind < delimiters.len() && delimiters[del_ind].ind <= res_ind { + if delimiters[del_ind].char_type == '`' { + // if previous delimiter is \ and is right before and is odd number of \ + // then reduce by 1 and readd it if it does make a pair + // need to acount for all previous delimiters have been moved by readded letters + let escaped = del_ind != 0 + && delimiters[del_ind - 1].ind - readded_letters == delimiters[del_ind].ind + && delimiters[del_ind - 1].char_type == '\\' + && delimiters[del_ind - 1].num_delimiters % 2 != 0; + let needed_len = match escaped { + false => delimiters[del_ind].num_delimiters, + true => delimiters[del_ind].num_delimiters - 1, + }; + + for closer_ind in (del_ind + 1)..delimiters.len() { + if delimiters[del_ind].char_type == delimiters[closer_ind].char_type + && delimiters[closer_ind].num_delimiters == needed_len + { + // it is a section + delimiters[del_ind].is_active = false; + delimiters[closer_ind].is_active = false; + delimiters[del_ind].num_delimiters = 0; + delimiters[closer_ind].num_delimiters = 0; + + if escaped { + delimiters[del_ind - 1].num_delimiters -= 1; + new_result.push('`'); + readded_letters += 1; + } + let start_ind = new_result.len(); + + del_ind += 1; + while res_ind <= delimiters[closer_ind].ind { + while del_ind < closer_ind && delimiters[del_ind].ind <= res_ind { + let mut temp = vec![ + delimiters[del_ind].char_type; + delimiters[del_ind].num_delimiters + ]; + new_result.append(&mut temp); + readded_letters += delimiters[del_ind].num_delimiters; + delimiters[del_ind].num_delimiters = 0; + del_ind += 1; + } + if res_ind != delimiters[closer_ind].ind { + new_result.push(result[res_ind]); + res_ind += 1; + } else { + break; + } + } + let end_ind = new_result.len(); + + sections.push(SectionData { + start_ind, + end_ind, + code_bold_italic: TextClassEnum::Code, + }); + + break; + } + } + } + delimiters[del_ind].ind += readded_letters; + + del_ind += 1; + } + + if res_ind != result.len() { + new_result.push(result[res_ind]); + } + + res_ind += 1; + } + + delimiters[0].num_delimiters = 1; // set the null delimiter to 1 + delimiters.retain(|d| d.num_delimiters != 0); + delimiters[0].num_delimiters = 0; // set the null delimiter to 0 + + (new_result, sections) +} + +fn md_parse_escapes( + result: &[char], + delimiters: &mut [DelimiterData], +) -> (Vec, Vec) { + let mut new_result = vec![]; + let mut new_delimiters = vec![]; + let mut readded_letters = 0; + let mut del_ind = 0; + let mut res_ind = 0; + while res_ind <= result.len() { + while del_ind < delimiters.len() && delimiters[del_ind].ind <= res_ind { + match delimiters[del_ind].char_type { + '\\' => { + let mut temp = + vec![delimiters[del_ind].char_type; delimiters[del_ind].num_delimiters / 2]; + new_result.append(&mut temp); + readded_letters += delimiters[del_ind].num_delimiters / 2; + + if delimiters[del_ind].num_delimiters % 2 != 0 { + if del_ind != delimiters.len() - 1 + && delimiters[del_ind + 1].ind == delimiters[del_ind].ind + { + match delimiters[del_ind + 1].char_type { + '`' | '*' | '_' => { + new_result.push(delimiters[del_ind + 1].char_type); + readded_letters += 1; + delimiters[del_ind + 1].num_delimiters -= 1; + } + _ => panic!("\\ => should merge"), + } + } else { + match result[delimiters[del_ind].ind] { + 'n' => { + res_ind += 1; + new_result.push('\n'); + } + _ => { + new_result.push(delimiters[del_ind].char_type); + readded_letters += 1; + delimiters[del_ind].num_delimiters -= 1; + } + } + } + } + } + '`' => { + let mut temp = + vec![delimiters[del_ind].char_type; delimiters[del_ind].num_delimiters]; + new_result.append(&mut temp); + readded_letters += delimiters[del_ind].num_delimiters; + } + ' ' | '*' | '_' => { + new_delimiters.push(delimiters[del_ind].clone()); + let last_ind = new_delimiters.len() - 1; + new_delimiters[last_ind].ind += readded_letters; + } + _ => panic!("no other type of delimiter char"), + } + + del_ind += 1; + } + + if res_ind != result.len() { + new_result.push(result[res_ind]); + } + res_ind += 1; + } + + (new_result, new_delimiters) +} + +fn md_parse_set_delimiter_open_close(result: &[char], delimiters: &mut [DelimiterData]) { + // set could open/close + for i in 0..delimiters.len() { + let prev_char; + let next_char; + if i != 0 && delimiters[i - 1].ind == delimiters[i].ind { + prev_char = delimiters[i - 1].char_type; + } else if delimiters[i].ind == 0 { + prev_char = ' '; + } else { + prev_char = result[delimiters[i].ind - 1]; + } + + if i != delimiters.len() - 1 && delimiters[i + 1].ind == delimiters[i].ind { + next_char = delimiters[i + 1].char_type; + } else if delimiters[i].ind == result.len() { + next_char = ' '; + } else { + next_char = result[delimiters[i].ind]; + } + + match (prev_char.is_whitespace(), next_char.is_whitespace()) { + (false, false) => { + if delimiters[i].char_type == '_' { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } + } + (true, false) => { + delimiters[i].could_close = false; + } + (false, true) => { + delimiters[i].could_open = false; + } + (true, true) => { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } + } + + if next_char.is_ascii_punctuation() + && (!prev_char.is_whitespace() || !prev_char.is_ascii_punctuation()) + { + delimiters[i].could_open = false; + } + if prev_char.is_ascii_punctuation() + && (!next_char.is_whitespace() || !next_char.is_ascii_punctuation()) + { + delimiters[i].could_close = false; + } + } +} + +fn md_parse_eval_sections(delimiters: &mut [DelimiterData]) -> Vec { + let mut sections = vec![]; + let stack_bottom = 0; // because I have a null element in it + let mut current_position = stack_bottom + 1; + let mut opener_a = [stack_bottom; 3]; + let mut opener_d = [stack_bottom; 3]; + + loop { + while current_position != delimiters.len() + && !delimiters[current_position].could_close + && delimiters[current_position].is_active + { + current_position += 1; + } + if current_position == delimiters.len() { + break; + } + let opener_min = match delimiters[current_position].char_type { + '*' => &mut opener_a, + '_' => &mut opener_d, + _ => panic!("this cant happen as current_position starts at 0 and all other delimiters are of above types"), + }; + + let min = opener_min[delimiters[current_position].num_delimiters % 3].max(stack_bottom); + let mut opener_ind = current_position - 1; + while opener_ind > min { + // found opener + if delimiters[opener_ind].is_active + && delimiters[opener_ind].could_open + && delimiters[opener_ind].char_type == delimiters[current_position].char_type + && !((delimiters[opener_ind].could_close + || delimiters[current_position].could_open) + && delimiters[opener_ind].num_delimiters % 3 + != delimiters[current_position].num_delimiters % 3) + { + // found valid opener + break; + } + opener_ind -= 1; + } + + if opener_ind == min { + // not found a opener + opener_min[delimiters[current_position].num_delimiters % 3] = current_position - 1; + current_position += 1; + } else { + delimiters[current_position].could_open = false; + delimiters[opener_ind].could_close = false; + // did + let code = delimiters[current_position].char_type == '`'; + let strong = !code + && delimiters[opener_ind].num_delimiters >= 2 + && delimiters[current_position].num_delimiters >= 2; + sections.push(SectionData { + start_ind: delimiters[opener_ind].ind, + end_ind: delimiters[current_position].ind, + code_bold_italic: match (code, strong) { + (true, _) => TextClassEnum::Code, + (_, true) => TextClassEnum::Bold, + (_, _) => TextClassEnum::Italic, + }, + }); + + delimiters[opener_ind].num_delimiters -= 1 + (strong as usize); + delimiters[current_position].num_delimiters -= 1 + (strong as usize); + + if delimiters[opener_ind].num_delimiters == 0 { + delimiters[opener_ind].is_active = false; + } + if delimiters[current_position].num_delimiters == 0 { + delimiters[current_position].is_active = false; + current_position += 1; + } + + for d in &mut delimiters[(opener_ind + 1)..current_position] { + d.is_active = false; + } + } + } + sections +} + +fn md_parse(text_value: &str) -> (Vec, Vec) { + let (result, mut delimiters) = md_parse_delimiters(text_value); + let (result, mut sections) = md_parse_code_blocks(&result, &mut delimiters); + let (mut result, mut delimiters) = md_parse_escapes(&result, &mut delimiters); + md_parse_set_delimiter_open_close(&result, &mut delimiters); + sections.append(&mut md_parse_eval_sections(&mut delimiters)); + + let mut final_result = vec![]; + + // work from the back to avoid index invalidation + for d in delimiters.into_iter().rev() { + while d.ind < result.len() { + if let Some(thing) = result.pop() { + final_result.push(thing); + } + } + + for s in sections.iter_mut() { + // if start needs to be after or equal + if s.start_ind >= d.ind { + s.start_ind += d.num_delimiters; + } + if s.end_ind > d.ind { + // if end needs to be after + s.end_ind += d.num_delimiters; + } + } + if d.char_type != ' ' { + let mut temp = vec![d.char_type; d.num_delimiters]; + final_result.append(&mut temp); + } + } + + (final_result.into_iter().rev().collect(), sections) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_md_parse() { + // the basic examples no actual md + + let text = r"Hello, \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [])" + ); + + // when not part of a '\n', '\' is not special + let text = r"Hello, world! \1"; + assert_eq!(format!("{:?}",md_parse(text)), "(['H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', ' ', '\\\\', '1'], [])"); + + // when precedes '\n', '\' escapes it. + let text = r"Hello, \\nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\\\', 'n', 'w', 'o', 'r', 'l', 'd', '!'], [])" + ); + + fn sd(s: i32, e: i32, i: i32) -> String { + "SectionData { start_ind: ".to_owned() + + &s.to_string() + + ", end_ind: " + + &e.to_string() + + ", code_bold_italic: " + + match i { + 0 => "Code", + 1 => "Bold", + 2 => "Italic", + _ => "Err", + } + + " }" + } + + // using the md + let text = r"He*ll*o, \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(2, 4, 2) + + "])" + ); + + // mismatched + let text = r"*Hello** , \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', '*', ' ', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(0, 5, 2) + "])" + ); + + // diff type + let text = r"He*llo_, \nworld!"; + assert_eq!(format!("{:?}",md_parse(text)), "(['H', 'e', '*', 'l', 'l', 'o', '_', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [])"); + + // multiple diff type + let text = r"_hello*"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['_', 'h', 'e', 'l', 'l', 'o', '*'], [])" + ); + + // multiple same type + let text = r"He*ll*o, \nw*or*ld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(2, 4, 2) + + ", " + + &sd(9, 11, 2) + + "])" + ); + + // space before + let text = r"**foo bar **"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['*', '*', 'f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', '*', '*'], [])" + ); + + // punctuation before alphnum after + let text = r"**(**foo)"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['*', '*', '(', '*', '*', 'f', 'o', 'o', ')'], [])" + ); + } + + #[test] + fn test_get_md_value() { + fn tc(i: u32) -> String { + "TextClass { code: ".to_owned() + + match i & (1 << 0) != 0 { + false => "false", + true => "true", + } + + ", bold: " + + match i & (1 << 1) != 0 { + false => "false", + true => "true", + } + + ", italic: " + + match i & (1 << 2) != 0 { + false => "false", + true => "true", + } + + " }" + } + + let mut el = SvgElement::new("text", &[]); + let text = r"foo"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo\"], [".to_owned() + &tc(0) + "])" + ); + + let text = r"**(**foo)"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"**(**foo)\"], [".to_owned() + &tc(0) + "])" + ); + + let text = r"*foo *bar**"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo bar\"], [".to_owned() + &tc(4) + "])" + ); + + let text = r"*foo**bar**baz*"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo\", \"bar\", \"baz\"], [".to_owned() + + &tc(4) + + ", " + + &tc(6) + + ", " + + &tc(4) + + "])" + ); + + let text = r"`foo*`"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo*\"], [".to_owned() + &tc(1) + "])" + ); + } +} diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 5dbe0cf..509a471 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -4,6 +4,7 @@ mod containers; mod element; mod layout; mod loops; +mod markdown; mod path; mod reuse; mod special; diff --git a/src/elements/text.rs b/src/elements/text.rs index 2662876..7993d42 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -2,6 +2,7 @@ use super::SvgElement; use crate::geometry::LocSpec; use crate::types::{attr_split_cycle, fstr, strp}; +use crate::elements::markdown::{get_md_value, TextClass}; use crate::errors::{Result, SvgdxError}; fn get_text_value(element: &mut SvgElement) -> String { @@ -11,291 +12,6 @@ fn get_text_value(element: &mut SvgElement) -> String { text_string(&text_value) } -fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { - let text_value = element - .pop_attr("md") - .expect("no md attr in process_text_attr"); - - let (parsed_string, sections) = md_parse(&text_value); - - let mut state_per_char = vec![0; parsed_string.len()]; - - for s in sections { - let bit = s.code_bold_italic; - for i in state_per_char.iter_mut().take(s.end_ind).skip(s.start_ind) { - *i |= 1 << bit; - } - } - - let mut strings = vec![]; - let mut states = vec![]; - for i in 0..parsed_string.len() { - if i == 0 || states[states.len() - 1] != state_per_char[i] { - strings.push(String::new()); - states.push(state_per_char[i]) - } - strings - .last_mut() - .expect("filled from i == 0") - .push(parsed_string[i]); - } - - (strings, states) -} - -#[derive(Debug)] -struct SectionData { - start_ind: usize, - end_ind: usize, - code_bold_italic: u32, -} - -// based on the commonmarkdown implementation -#[derive(Debug)] -struct DelimiterData { - ind: usize, // goes just before this char - char_type: char, - num_delimiters: usize, - is_active: bool, - could_open: bool, - could_close: bool, -} - -fn md_parse_escapes_and_delimiters(text_value: &str) -> (Vec, Vec) { - let mut result = vec![]; - let mut delimiters = vec![DelimiterData { - ind: 0, - char_type: ' ', - num_delimiters: 0, - is_active: false, - could_open: false, - could_close: false, - }]; - let mut escaped = false; - - // first pass process \ and find delimiters - for c in text_value.chars() { - let mut add = true; - match (c, escaped) { - ('\\', false) => { - add = false; - escaped = true; - } - ('\\', true) => { - escaped = false; - } - // the delimiters - ('`', false) | ('_', false) | ('*', false) => { - let last = delimiters.last_mut().expect("garenteed not to be empty"); - if c == last.char_type && last.ind == result.len() { - // is a continuation - last.num_delimiters += 1; - } else { - delimiters.push(DelimiterData { - ind: result.len(), - char_type: c, - num_delimiters: 1, - is_active: true, - could_open: true, - could_close: true, - }); - } - add = false; - } - ('`', true) | ('_', true) | ('*', true) => { - escaped = false; - } - ('n', true) => { - add = false; - result.push('\n'); - escaped = false; - } - (_, true) => { - // was not an escape - result.push('\\'); - escaped = false; - } - (_, false) => {} - } - if add { - result.push(c); - } - } - - (result, delimiters) -} - -fn md_parse_set_delimiter_open_close(result: &[char], delimiters: &mut [DelimiterData]) { - // set could open/close - for i in 0..delimiters.len() { - let prev_char; - let next_char; - if i != 0 && delimiters[i - 1].ind == delimiters[i].ind { - prev_char = delimiters[i - 1].char_type; - } else if delimiters[i].ind == 0 { - prev_char = ' '; - } else { - prev_char = result[delimiters[i].ind - 1]; - } - - if i != delimiters.len() - 1 && delimiters[i + 1].ind == delimiters[i].ind { - next_char = delimiters[i + 1].char_type; - } else if delimiters[i].ind == result.len() { - next_char = ' '; - } else { - next_char = result[delimiters[i].ind]; - } - - match (prev_char.is_whitespace(), next_char.is_whitespace()) { - (false, false) => { - if delimiters[i].char_type == '_' { - delimiters[i].could_open = false; - delimiters[i].could_close = false; - } - } - (true, false) => { - delimiters[i].could_close = false; - } - (false, true) => { - delimiters[i].could_open = false; - } - (true, true) => { - delimiters[i].could_open = false; - delimiters[i].could_close = false; - } - } - - if next_char.is_ascii_punctuation() - && (!prev_char.is_whitespace() || !prev_char.is_ascii_punctuation()) - { - delimiters[i].could_open = false; - } - if prev_char.is_ascii_punctuation() - && (!next_char.is_whitespace() || !next_char.is_ascii_punctuation()) - { - delimiters[i].could_close = false; - } - } -} - -fn md_parse_eval_sections(delimiters: &mut [DelimiterData]) -> Vec { - let mut sections = vec![]; - let stack_bottom = 0; // because I have a null element in it - let mut current_position = stack_bottom + 1; - let mut opener_a = [stack_bottom; 3]; - let mut opener_d = [stack_bottom; 3]; - let mut opener_t = [stack_bottom; 3]; - - loop { - while current_position != delimiters.len() - && !delimiters[current_position].could_close - && delimiters[current_position].is_active - { - current_position += 1; - } - if current_position == delimiters.len() { - break; - } - let opener_min = match delimiters[current_position].char_type { - '*' => &mut opener_a, - '_' => &mut opener_d, - '`' => &mut opener_t, - _ => panic!("this cant happen as current_position starts at 0 and all other delimiters are of above types"), - }; - - let min = opener_min[delimiters[current_position].num_delimiters % 3].max(stack_bottom); - let mut opener_ind = current_position - 1; - while opener_ind > min { - // found opener - if delimiters[opener_ind].is_active - && delimiters[opener_ind].could_open - && delimiters[opener_ind].char_type == delimiters[current_position].char_type - { - if (delimiters[opener_ind].could_close || delimiters[current_position].could_open) - && delimiters[opener_ind].num_delimiters % 3 - != delimiters[current_position].num_delimiters % 3 - { - } else { - // found valid opener - break; - } - } - opener_ind -= 1; - } - - if opener_ind == min { - // not found a opener - opener_min[delimiters[current_position].num_delimiters % 3] = current_position - 1; - current_position += 1; - } else { - delimiters[current_position].could_open = false; - delimiters[opener_ind].could_close = false; - // did - let code = delimiters[current_position].char_type == '`'; - let strong = !code - && delimiters[opener_ind].num_delimiters >= 2 - && delimiters[current_position].num_delimiters >= 2; - sections.push(SectionData { - start_ind: delimiters[opener_ind].ind, - end_ind: delimiters[current_position].ind, - code_bold_italic: match (code, strong) { - (true, _) => 0, - (_, true) => 1, - (_, _) => 2, - }, - }); - - delimiters[opener_ind].num_delimiters -= 1 + (strong as usize); - delimiters[current_position].num_delimiters -= 1 + (strong as usize); - - if delimiters[opener_ind].num_delimiters == 0 { - delimiters[opener_ind].is_active = false; - } - if delimiters[current_position].num_delimiters == 0 { - delimiters[current_position].is_active = false; - current_position += 1; - } - - for d in &mut delimiters[(opener_ind + 1)..current_position] { - d.is_active = false; - } - } - } - sections -} - -fn md_parse(text_value: &str) -> (Vec, Vec) { - let (mut result, mut delimiters) = md_parse_escapes_and_delimiters(text_value); - md_parse_set_delimiter_open_close(&result, &mut delimiters); - let mut sections = md_parse_eval_sections(&mut delimiters); - - let mut final_result = vec![]; - - // work from the back to avoid index invalidation - for d in delimiters.into_iter().rev() { - while d.ind < result.len() { - if let Some(thing) = result.pop() { - final_result.push(thing); - } - } - - for s in sections.iter_mut() { - // if start needs to be after or equal - if s.start_ind >= d.ind { - s.start_ind += d.num_delimiters; - } - if s.end_ind > d.ind { - // if end needs to be after - s.end_ind += d.num_delimiters; - } - } - let mut temp = vec![d.char_type; d.num_delimiters]; - final_result.append(&mut temp); - } - - (final_result.into_iter().rev().collect(), sections) -} - /// Convert unescaped r"\n" into newline characters for multi-line text fn text_string(text_value: &str) -> String { let mut result = String::new(); @@ -454,8 +170,17 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec String { - "SectionData { start_ind: ".to_owned() - + &s.to_string() - + ", end_ind: " - + &e.to_string() - + ", code_bold_italic: " - + &i.to_string() - + " }" - } - - // using the md - let text = r"He*ll*o, \nworld!"; - assert_eq!( - format!("{:?}", md_parse(text)), - "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" - .to_owned() - + &sd(2, 4, 2) - + "])" - ); - - // mismatched - let text = r"*Hello** , \nworld!"; - assert_eq!( - format!("{:?}", md_parse(text)), - "(['H', 'e', 'l', 'l', 'o', '*', ' ', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" - .to_owned() - + &sd(0, 5, 2) + "])" - ); - - // diff type - let text = r"He*llo_, \nworld!"; - assert_eq!(format!("{:?}",md_parse(text)), "(['H', 'e', '*', 'l', 'l', 'o', '_', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [])"); - - // multiple diff type - let text = r"_hello*"; - assert_eq!( - format!("{:?}", md_parse(text)), - "(['_', 'h', 'e', 'l', 'l', 'o', '*'], [])" - ); - - // multiple same type - let text = r"He*ll*o, \nw*or*ld!"; - assert_eq!( - format!("{:?}", md_parse(text)), - "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" - .to_owned() - + &sd(2, 4, 2) - + ", " - + &sd(9, 11, 2) - + "])" - ); - - // space before - let text = r"**foo bar **"; - assert_eq!( - format!("{:?}", md_parse(text)), - "(['*', '*', 'f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', '*', '*'], [])" - ); - - // punctuation before alphnum after - let text = r"**(**foo)"; - assert_eq!( - format!("{:?}", md_parse(text)), - "(['*', '*', '(', '*', '*', 'f', 'o', 'o', ')'], [])" - ); - } - - #[test] - fn test_get_md_value() { - let mut el = SvgElement::new("text", &[]); - let text = r"foo"; - el.set_attr("md", text); - assert_eq!(format!("{:?}", get_md_value(&mut el)), "([\"foo\"], [0])"); - - let text = r"**(**foo)"; - el.set_attr("md", text); - assert_eq!( - format!("{:?}", get_md_value(&mut el)), - "([\"**(**foo)\"], [0])" - ); - - let text = r"*foo *bar**"; - el.set_attr("md", text); - assert_eq!( - format!("{:?}", get_md_value(&mut el)), - "([\"foo bar\"], [4])" - ); - - let text = r"*foo**bar**baz*"; - el.set_attr("md", text); - assert_eq!( - format!("{:?}", get_md_value(&mut el)), - "([\"foo\", \"bar\", \"baz\"], [4, 6, 4])" - ); - } } From 17b68c52964bdd5a7966c13f777247bcbafb63ee Mon Sep 17 00:00:00 2001 From: megamaths Date: Fri, 8 Aug 2025 11:04:51 +0100 Subject: [PATCH 7/9] fixed errors with vertical and empty lines and made code blocks remove 1 space from start and end and added example --- examples/markdown.svg | 23 ++ examples/markdown.xml | 4 + src/elements/markdown.rs | 319 ++++++++++++++------------- src/elements/text.rs | 28 +-- tests/integration_tests/text_attr.rs | 46 +++- 5 files changed, 251 insertions(+), 169 deletions(-) create mode 100644 examples/markdown.svg create mode 100644 examples/markdown.xml diff --git a/examples/markdown.svg b/examples/markdown.svg new file mode 100644 index 0000000..c294757 --- /dev/null +++ b/examples/markdown.svg @@ -0,0 +1,23 @@ + + + + + Hello Markdown world! + + + + down \ ***literal*** \!Hi vertical Mark + + \ No newline at end of file diff --git a/examples/markdown.xml b/examples/markdown.xml new file mode 100644 index 0000000..87a4b20 --- /dev/null +++ b/examples/markdown.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/elements/markdown.rs b/src/elements/markdown.rs index 80086dc..dd705c3 100644 --- a/src/elements/markdown.rs +++ b/src/elements/markdown.rs @@ -1,18 +1,18 @@ use super::SvgElement; -pub fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { +pub fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { let text_value = if let Some(tv) = element.pop_attr("md") { tv + } else if let Some(tv) = element.pop_attr("text") { + tv } else { - element - .pop_attr("text") - .expect("no text attr in process_text_attr") + return (vec![], vec![]); }; - let (parsed_string, sections) = md_parse(&text_value); + let (parsed_string, spans) = md_parse(&text_value); let mut state_per_char = vec![ - TextClass { + SpanStyle { code: false, bold: false, italic: false @@ -20,13 +20,13 @@ pub fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { parsed_string.len() ]; - for s in sections { + for s in spans { let class = s.code_bold_italic; - for i in state_per_char.iter_mut().take(s.end_ind).skip(s.start_ind) { + for i in state_per_char.iter_mut().take(s.end_idx).skip(s.start_idx) { match class { - TextClassEnum::Code => i.code = true, - TextClassEnum::Bold => i.bold = true, - TextClassEnum::Italic => i.italic = true, + SpanStyleEnum::Code => i.code = true, + SpanStyleEnum::Bold => i.bold = true, + SpanStyleEnum::Italic => i.italic = true, } } } @@ -48,24 +48,24 @@ pub fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { } #[derive(Debug, Clone, PartialEq, Copy)] -pub struct TextClass { +pub struct SpanStyle { pub code: bool, pub bold: bool, pub italic: bool, } -#[derive(Debug)] -enum TextClassEnum { +#[derive(Debug, PartialEq)] +enum SpanStyleEnum { Code, Bold, Italic, } -#[derive(Debug)] -struct SectionData { - start_ind: usize, - end_ind: usize, - code_bold_italic: TextClassEnum, +#[derive(Debug, PartialEq)] +struct SpanData { + start_idx: usize, + end_idx: usize, + code_bold_italic: SpanStyleEnum, } // based on the commonmark implementation https://spec.commonmark.org/0.31.2/ @@ -96,7 +96,7 @@ fn md_parse_delimiters(text_value: &str) -> (Vec, Vec) { match c { // the delimiters and escape '`' | '_' | '*' | '\\' => { - let last = delimiters.last_mut().expect("garenteed not to be empty"); + let last = delimiters.last_mut().expect("guarenteed not to be empty"); if c == last.char_type && last.ind == result.len() { // is a continuation last.num_delimiters += 1; @@ -126,20 +126,20 @@ fn md_parse_delimiters(text_value: &str) -> (Vec, Vec) { fn md_parse_code_blocks( result: &[char], delimiters: &mut Vec, -) -> (Vec, Vec) { +) -> (Vec, Vec) { let mut new_result = vec![]; - let mut sections = vec![]; + let mut spans = vec![]; let mut res_ind = 0; let mut del_ind = 0; - let mut readded_letters = 0; + let mut re_added_letters = 0; while res_ind <= result.len() { while del_ind < delimiters.len() && delimiters[del_ind].ind <= res_ind { if delimiters[del_ind].char_type == '`' { // if previous delimiter is \ and is right before and is odd number of \ - // then reduce by 1 and readd it if it does make a pair - // need to acount for all previous delimiters have been moved by readded letters + // then reduce by 1 and re_add it if it does make a pair + // need to acount for all previous delimiters have been moved by re_added letters let escaped = del_ind != 0 - && delimiters[del_ind - 1].ind - readded_letters == delimiters[del_ind].ind + && delimiters[del_ind - 1].ind - re_added_letters == delimiters[del_ind].ind && delimiters[del_ind - 1].char_type == '\\' && delimiters[del_ind - 1].num_delimiters % 2 != 0; let needed_len = match escaped { @@ -151,7 +151,7 @@ fn md_parse_code_blocks( if delimiters[del_ind].char_type == delimiters[closer_ind].char_type && delimiters[closer_ind].num_delimiters == needed_len { - // it is a section + // it is a span delimiters[del_ind].is_active = false; delimiters[closer_ind].is_active = false; delimiters[del_ind].num_delimiters = 0; @@ -160,10 +160,14 @@ fn md_parse_code_blocks( if escaped { delimiters[del_ind - 1].num_delimiters -= 1; new_result.push('`'); - readded_letters += 1; + re_added_letters += 1; } let start_ind = new_result.len(); + // to make easy to remove edge spaces if any + let mut has_none_space = false; + let mut span_str = vec![]; + del_ind += 1; while res_ind <= delimiters[closer_ind].ind { while del_ind < closer_ind && delimiters[del_ind].ind <= res_ind { @@ -171,31 +175,46 @@ fn md_parse_code_blocks( delimiters[del_ind].char_type; delimiters[del_ind].num_delimiters ]; - new_result.append(&mut temp); - readded_letters += delimiters[del_ind].num_delimiters; + has_none_space |= delimiters[del_ind].num_delimiters != 0; // char type will not be ' ' + span_str.append(&mut temp); + re_added_letters += delimiters[del_ind].num_delimiters; delimiters[del_ind].num_delimiters = 0; del_ind += 1; } if res_ind != delimiters[closer_ind].ind { - new_result.push(result[res_ind]); + has_none_space |= result[res_ind] != ' '; + span_str.push(result[res_ind]); res_ind += 1; } else { break; } } + if has_none_space + && span_str.len() > 1 + && span_str[0] == ' ' + && span_str[span_str.len() - 1] == ' ' + { + span_str.pop(); + let mut itr = span_str.iter(); + itr.next().expect("size bigger than 1"); // pop front + new_result.extend(itr); + } else { + new_result.append(&mut span_str); + } + let end_ind = new_result.len(); - sections.push(SectionData { - start_ind, - end_ind, - code_bold_italic: TextClassEnum::Code, + spans.push(SpanData { + start_idx: start_ind, + end_idx: end_ind, + code_bold_italic: SpanStyleEnum::Code, }); break; } } } - delimiters[del_ind].ind += readded_letters; + delimiters[del_ind].ind += re_added_letters; del_ind += 1; } @@ -211,16 +230,18 @@ fn md_parse_code_blocks( delimiters.retain(|d| d.num_delimiters != 0); delimiters[0].num_delimiters = 0; // set the null delimiter to 0 - (new_result, sections) + (new_result, spans) } +// assumes no zero length delimiters except for null delim +// assumes delimiters are ordered fn md_parse_escapes( result: &[char], delimiters: &mut [DelimiterData], ) -> (Vec, Vec) { let mut new_result = vec![]; let mut new_delimiters = vec![]; - let mut readded_letters = 0; + let mut re_added_letters = 0; let mut del_ind = 0; let mut res_ind = 0; while res_ind <= result.len() { @@ -230,7 +251,7 @@ fn md_parse_escapes( let mut temp = vec![delimiters[del_ind].char_type; delimiters[del_ind].num_delimiters / 2]; new_result.append(&mut temp); - readded_letters += delimiters[del_ind].num_delimiters / 2; + re_added_letters += delimiters[del_ind].num_delimiters / 2; if delimiters[del_ind].num_delimiters % 2 != 0 { if del_ind != delimiters.len() - 1 @@ -239,7 +260,7 @@ fn md_parse_escapes( match delimiters[del_ind + 1].char_type { '`' | '*' | '_' => { new_result.push(delimiters[del_ind + 1].char_type); - readded_letters += 1; + re_added_letters += 1; delimiters[del_ind + 1].num_delimiters -= 1; } _ => panic!("\\ => should merge"), @@ -252,7 +273,7 @@ fn md_parse_escapes( } _ => { new_result.push(delimiters[del_ind].char_type); - readded_letters += 1; + re_added_letters += 1; delimiters[del_ind].num_delimiters -= 1; } } @@ -263,12 +284,17 @@ fn md_parse_escapes( let mut temp = vec![delimiters[del_ind].char_type; delimiters[del_ind].num_delimiters]; new_result.append(&mut temp); - readded_letters += delimiters[del_ind].num_delimiters; + re_added_letters += delimiters[del_ind].num_delimiters; } ' ' | '*' | '_' => { - new_delimiters.push(delimiters[del_ind].clone()); - let last_ind = new_delimiters.len() - 1; - new_delimiters[last_ind].ind += readded_letters; + // future stages assume no 0 len delimiters + if delimiters[del_ind].char_type == ' ' + || delimiters[del_ind].num_delimiters != 0 + { + new_delimiters.push(delimiters[del_ind].clone()); + let last_ind = new_delimiters.len() - 1; + new_delimiters[last_ind].ind += re_added_letters; + } } _ => panic!("no other type of delimiter char"), } @@ -285,6 +311,7 @@ fn md_parse_escapes( (new_result, new_delimiters) } +// assumes delimiters are ordered and nonzero fn md_parse_set_delimiter_open_close(result: &[char], delimiters: &mut [DelimiterData]) { // set could open/close for i in 0..delimiters.len() { @@ -338,8 +365,8 @@ fn md_parse_set_delimiter_open_close(result: &[char], delimiters: &mut [Delimite } } -fn md_parse_eval_sections(delimiters: &mut [DelimiterData]) -> Vec { - let mut sections = vec![]; +fn md_parse_eval_spans(delimiters: &mut [DelimiterData]) -> Vec { + let mut spans = vec![]; let stack_bottom = 0; // because I have a null element in it let mut current_position = stack_bottom + 1; let mut opener_a = [stack_bottom; 3]; @@ -391,13 +418,13 @@ fn md_parse_eval_sections(delimiters: &mut [DelimiterData]) -> Vec let strong = !code && delimiters[opener_ind].num_delimiters >= 2 && delimiters[current_position].num_delimiters >= 2; - sections.push(SectionData { - start_ind: delimiters[opener_ind].ind, - end_ind: delimiters[current_position].ind, + spans.push(SpanData { + start_idx: delimiters[opener_ind].ind, + end_idx: delimiters[current_position].ind, code_bold_italic: match (code, strong) { - (true, _) => TextClassEnum::Code, - (_, true) => TextClassEnum::Bold, - (_, _) => TextClassEnum::Italic, + (true, _) => SpanStyleEnum::Code, + (_, true) => SpanStyleEnum::Bold, + (_, _) => SpanStyleEnum::Italic, }, }); @@ -417,15 +444,15 @@ fn md_parse_eval_sections(delimiters: &mut [DelimiterData]) -> Vec } } } - sections + spans } -fn md_parse(text_value: &str) -> (Vec, Vec) { +fn md_parse(text_value: &str) -> (Vec, Vec) { let (result, mut delimiters) = md_parse_delimiters(text_value); - let (result, mut sections) = md_parse_code_blocks(&result, &mut delimiters); + let (result, mut spans) = md_parse_code_blocks(&result, &mut delimiters); let (mut result, mut delimiters) = md_parse_escapes(&result, &mut delimiters); md_parse_set_delimiter_open_close(&result, &mut delimiters); - sections.append(&mut md_parse_eval_sections(&mut delimiters)); + spans.append(&mut md_parse_eval_spans(&mut delimiters)); let mut final_result = vec![]; @@ -437,14 +464,14 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { } } - for s in sections.iter_mut() { + for s in spans.iter_mut() { // if start needs to be after or equal - if s.start_ind >= d.ind { - s.start_ind += d.num_delimiters; + if s.start_idx >= d.ind { + s.start_idx += d.num_delimiters; } - if s.end_ind > d.ind { + if s.end_idx > d.ind { // if end needs to be after - s.end_ind += d.num_delimiters; + s.end_idx += d.num_delimiters; } } if d.char_type != ' ' { @@ -453,7 +480,7 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { } } - (final_result.into_iter().rev().collect(), sections) + (final_result.into_iter().rev().collect(), spans) } #[cfg(test)] @@ -466,154 +493,146 @@ mod tests { let text = r"Hello, \nworld!"; assert_eq!( - format!("{:?}", md_parse(text)), - "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [])" + md_parse(text).0, + ['H', 'e', 'l', 'l', 'o', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] ); + assert_eq!(md_parse(text).1, []); // when not part of a '\n', '\' is not special let text = r"Hello, world! \1"; - assert_eq!(format!("{:?}",md_parse(text)), "(['H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', ' ', '\\\\', '1'], [])"); + assert_eq!( + md_parse(text).0, + ['H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', ' ', '\\', '1'] + ); + assert_eq!(md_parse(text).1, []); // when precedes '\n', '\' escapes it. let text = r"Hello, \\nworld!"; assert_eq!( - format!("{:?}", md_parse(text)), - "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\\\', 'n', 'w', 'o', 'r', 'l', 'd', '!'], [])" + md_parse(text).0, + ['H', 'e', 'l', 'l', 'o', ',', ' ', '\\', 'n', 'w', 'o', 'r', 'l', 'd', '!'] ); - - fn sd(s: i32, e: i32, i: i32) -> String { - "SectionData { start_ind: ".to_owned() - + &s.to_string() - + ", end_ind: " - + &e.to_string() - + ", code_bold_italic: " - + match i { - 0 => "Code", - 1 => "Bold", - 2 => "Italic", - _ => "Err", - } - + " }" + assert_eq!(md_parse(text).1, []); + + fn sd(s: usize, e: usize, i: u8) -> SpanData { + SpanData { + start_idx: s, + end_idx: e, + code_bold_italic: match i { + 0 => SpanStyleEnum::Code, + 1 => SpanStyleEnum::Bold, + 2 => SpanStyleEnum::Italic, + _ => panic!(), + }, + } } // using the md let text = r"He*ll*o, \nworld!"; assert_eq!( - format!("{:?}", md_parse(text)), - "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" - .to_owned() - + &sd(2, 4, 2) - + "])" + md_parse(text).0, + ['H', 'e', 'l', 'l', 'o', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] ); + assert_eq!(md_parse(text).1, [sd(2, 4, 2)]); // mismatched let text = r"*Hello** , \nworld!"; assert_eq!( - format!("{:?}", md_parse(text)), - "(['H', 'e', 'l', 'l', 'o', '*', ' ', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" - .to_owned() - + &sd(0, 5, 2) + "])" + md_parse(text).0, + ['H', 'e', 'l', 'l', 'o', '*', ' ', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] ); + assert_eq!(md_parse(text).1, [sd(0, 5, 2)]); // diff type let text = r"He*llo_, \nworld!"; - assert_eq!(format!("{:?}",md_parse(text)), "(['H', 'e', '*', 'l', 'l', 'o', '_', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [])"); + assert_eq!( + md_parse(text).0, + ['H', 'e', '*', 'l', 'l', 'o', '_', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] + ); + assert_eq!(md_parse(text).1, []); // multiple diff type let text = r"_hello*"; - assert_eq!( - format!("{:?}", md_parse(text)), - "(['_', 'h', 'e', 'l', 'l', 'o', '*'], [])" - ); + assert_eq!(md_parse(text).0, ['_', 'h', 'e', 'l', 'l', 'o', '*']); + assert_eq!(md_parse(text).1, []); // multiple same type let text = r"He*ll*o, \nw*or*ld!"; assert_eq!( - format!("{:?}", md_parse(text)), - "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" - .to_owned() - + &sd(2, 4, 2) - + ", " - + &sd(9, 11, 2) - + "])" + md_parse(text).0, + ['H', 'e', 'l', 'l', 'o', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] ); + assert_eq!(md_parse(text).1, [sd(2, 4, 2), sd(9, 11, 2)]); // space before let text = r"**foo bar **"; assert_eq!( - format!("{:?}", md_parse(text)), - "(['*', '*', 'f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', '*', '*'], [])" + md_parse(text).0, + ['*', '*', 'f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', '*', '*'] ); + assert_eq!(md_parse(text).1, []); // punctuation before alphnum after let text = r"**(**foo)"; assert_eq!( - format!("{:?}", md_parse(text)), - "(['*', '*', '(', '*', '*', 'f', 'o', 'o', ')'], [])" + md_parse(text).0, + ['*', '*', '(', '*', '*', 'f', 'o', 'o', ')'] ); + assert_eq!(md_parse(text).1, []); } #[test] fn test_get_md_value() { - fn tc(i: u32) -> String { - "TextClass { code: ".to_owned() - + match i & (1 << 0) != 0 { - false => "false", - true => "true", - } - + ", bold: " - + match i & (1 << 1) != 0 { - false => "false", - true => "true", - } - + ", italic: " - + match i & (1 << 2) != 0 { - false => "false", - true => "true", - } - + " }" + fn tc(i: u32) -> SpanStyle { + SpanStyle { + code: i & (1 << 0) != 0, + bold: i & (1 << 1) != 0, + italic: i & (1 << 2) != 0, + } } let mut el = SvgElement::new("text", &[]); let text = r"foo"; el.set_attr("md", text); - assert_eq!( - format!("{:?}", get_md_value(&mut el)), - "([\"foo\"], [".to_owned() + &tc(0) + "])" - ); + assert_eq!(get_md_value(&mut el).0, ["foo"]); + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).1, [tc(0)]); let text = r"**(**foo)"; el.set_attr("md", text); - assert_eq!( - format!("{:?}", get_md_value(&mut el)), - "([\"**(**foo)\"], [".to_owned() + &tc(0) + "])" - ); + assert_eq!(get_md_value(&mut el).0, ["**(**foo)"]); + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).1, [tc(0)]); let text = r"*foo *bar**"; el.set_attr("md", text); - assert_eq!( - format!("{:?}", get_md_value(&mut el)), - "([\"foo bar\"], [".to_owned() + &tc(4) + "])" - ); + assert_eq!(get_md_value(&mut el).0, ["foo bar"]); + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).1, [tc(4)]); let text = r"*foo**bar**baz*"; el.set_attr("md", text); - assert_eq!( - format!("{:?}", get_md_value(&mut el)), - "([\"foo\", \"bar\", \"baz\"], [".to_owned() - + &tc(4) - + ", " - + &tc(6) - + ", " - + &tc(4) - + "])" - ); + assert_eq!(get_md_value(&mut el).0, ["foo", "bar", "baz"]); + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).1, [tc(4), tc(6), tc(4)]); let text = r"`foo*`"; el.set_attr("md", text); - assert_eq!( - format!("{:?}", get_md_value(&mut el)), - "([\"foo*\"], [".to_owned() + &tc(1) + "])" - ); + assert_eq!(get_md_value(&mut el).0, ["foo*"]); + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).1, [tc(1)]); + + // if first and last chars in code block are space remove them unless all empty + let text = r"` `` `"; + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).0, ["``"]); + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).1, [tc(1)]); + + let text = r"` `"; + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).0, [" "]); + el.set_attr("md", text); + assert_eq!(get_md_value(&mut el).1, [tc(1)]); } } diff --git a/src/elements/text.rs b/src/elements/text.rs index 7993d42..1b402fa 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -2,7 +2,7 @@ use super::SvgElement; use crate::geometry::LocSpec; use crate::types::{attr_split_cycle, fstr, strp}; -use crate::elements::markdown::{get_md_value, TextClass}; +use crate::elements::markdown::{get_md_value, SpanStyle}; use crate::errors::{Result, SvgdxError}; fn get_text_value(element: &mut SvgElement) -> String { @@ -175,7 +175,7 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec -multiline +multiline "#; assert_eq!( transform_str_default(input).unwrap().trim(), expected.trim() ); + + let input = r#" + +"#; + let expected = r#" + +hellomark down + +"#; + + assert_eq!( + transform_str_default(input).unwrap().trim(), + expected.trim() + ); + + let input = r#" + +"#; + let expected = r#" + + +ark downmhello + +"#; + + assert_eq!( + transform_str_default(input).unwrap().trim(), + expected.trim() + ); + + let input = r#" + +"#; + let expected = r#" + + +ark* down*mhello + +"#; + + assert_eq!( + transform_str_default(input).unwrap().trim(), + expected.trim() + ); } From 085c14f832370979b9d9943e348a5a0a48e63973 Mon Sep 17 00:00:00 2001 From: megamaths Date: Mon, 18 Aug 2025 16:47:03 +0100 Subject: [PATCH 8/9] changed to use vecs of string instead of vec of char --- src/elements/markdown.rs | 250 ++++++++++++++++++++------------------- 1 file changed, 129 insertions(+), 121 deletions(-) diff --git a/src/elements/markdown.rs b/src/elements/markdown.rs index dd705c3..38e560d 100644 --- a/src/elements/markdown.rs +++ b/src/elements/markdown.rs @@ -9,20 +9,20 @@ pub fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { return (vec![], vec![]); }; - let (parsed_string, spans) = md_parse(&text_value); + let (spans, span_data) = md_parse(&text_value); - let mut state_per_char = vec![ + let mut state_per_span = vec![ SpanStyle { code: false, bold: false, italic: false }; - parsed_string.len() + spans.len() ]; - for s in spans { + for s in span_data { let class = s.code_bold_italic; - for i in state_per_char.iter_mut().take(s.end_idx).skip(s.start_idx) { + for i in state_per_span.iter_mut().take(s.end_idx).skip(s.start_idx) { match class { SpanStyleEnum::Code => i.code = true, SpanStyleEnum::Bold => i.bold = true, @@ -33,15 +33,13 @@ pub fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { let mut strings = vec![]; let mut states = vec![]; - for i in 0..parsed_string.len() { - if i == 0 || states[states.len() - 1] != state_per_char[i] { + for i in 0..spans.len() { + if i == 0 || states[states.len() - 1] != state_per_span[i] { strings.push(String::new()); - states.push(state_per_char[i]) + states.push(state_per_span[i]) } - strings - .last_mut() - .expect("filled from i == 0") - .push(parsed_string[i]); + let last_ind = strings.len() - 1; + strings[last_ind] += &spans[i]; } (strings, states) @@ -79,7 +77,7 @@ struct DelimiterData { could_close: bool, } -fn md_parse_delimiters(text_value: &str) -> (Vec, Vec) { +fn md_parse_delimiters(text_value: &str) -> (Vec, Vec) { let mut result = vec![]; let mut delimiters = vec![DelimiterData { ind: 0, @@ -90,12 +88,18 @@ fn md_parse_delimiters(text_value: &str) -> (Vec, Vec) { could_close: false, }]; + let mut current_span = String::new(); + // first pass find delimiters for c in text_value.chars() { let mut add = true; match c { // the delimiters and escape '`' | '_' | '*' | '\\' => { + if !current_span.is_empty() { + result.push(current_span); + current_span = String::new(); + } let last = delimiters.last_mut().expect("guarenteed not to be empty"); if c == last.char_type && last.ind == result.len() { // is a continuation @@ -115,23 +119,28 @@ fn md_parse_delimiters(text_value: &str) -> (Vec, Vec) { _ => {} } if add { - result.push(c); + current_span.push(c); } } + if !current_span.is_empty() { + result.push(current_span); + } (result, delimiters) } // assumes delimiters are ordered fn md_parse_code_blocks( - result: &[char], + result: Vec, delimiters: &mut Vec, -) -> (Vec, Vec) { +) -> (Vec, Vec) { let mut new_result = vec![]; let mut spans = vec![]; let mut res_ind = 0; let mut del_ind = 0; - let mut re_added_letters = 0; + let mut removed_spans = 0; + + let mut current_span = String::new(); while res_ind <= result.len() { while del_ind < delimiters.len() && delimiters[del_ind].ind <= res_ind { if delimiters[del_ind].char_type == '`' { @@ -139,7 +148,7 @@ fn md_parse_code_blocks( // then reduce by 1 and re_add it if it does make a pair // need to acount for all previous delimiters have been moved by re_added letters let escaped = del_ind != 0 - && delimiters[del_ind - 1].ind - re_added_letters == delimiters[del_ind].ind + && delimiters[del_ind - 1].ind + removed_spans == delimiters[del_ind].ind && delimiters[del_ind - 1].char_type == '\\' && delimiters[del_ind - 1].num_delimiters % 2 != 0; let needed_len = match escaped { @@ -159,47 +168,51 @@ fn md_parse_code_blocks( if escaped { delimiters[del_ind - 1].num_delimiters -= 1; - new_result.push('`'); - re_added_letters += 1; + current_span.push('`'); + } + + if !current_span.is_empty() { + new_result.push(current_span); + current_span = String::new(); } + let start_ind = new_result.len(); // to make easy to remove edge spaces if any let mut has_none_space = false; - let mut span_str = vec![]; del_ind += 1; while res_ind <= delimiters[closer_ind].ind { while del_ind < closer_ind && delimiters[del_ind].ind <= res_ind { - let mut temp = vec![ - delimiters[del_ind].char_type; - delimiters[del_ind].num_delimiters - ]; has_none_space |= delimiters[del_ind].num_delimiters != 0; // char type will not be ' ' - span_str.append(&mut temp); - re_added_letters += delimiters[del_ind].num_delimiters; + current_span += &delimiters[del_ind] + .char_type + .to_string() + .repeat(delimiters[del_ind].num_delimiters); delimiters[del_ind].num_delimiters = 0; del_ind += 1; } if res_ind != delimiters[closer_ind].ind { - has_none_space |= result[res_ind] != ' '; - span_str.push(result[res_ind]); + removed_spans += 1; + has_none_space |= result[res_ind].contains(|c| c != ' '); + current_span += &result[res_ind]; res_ind += 1; } else { break; } } if has_none_space - && span_str.len() > 1 - && span_str[0] == ' ' - && span_str[span_str.len() - 1] == ' ' + && current_span.len() > 1 + && current_span.starts_with(' ') + && current_span.ends_with(' ') { - span_str.pop(); - let mut itr = span_str.iter(); - itr.next().expect("size bigger than 1"); // pop front - new_result.extend(itr); - } else { - new_result.append(&mut span_str); + current_span = current_span[1..current_span.len() - 1].to_string(); + // chop off each end + } + if !current_span.is_empty() { + removed_spans -= 1; + new_result.push(current_span); + current_span = String::new(); } let end_ind = new_result.len(); @@ -214,13 +227,13 @@ fn md_parse_code_blocks( } } } - delimiters[del_ind].ind += re_added_letters; + delimiters[del_ind].ind -= removed_spans; del_ind += 1; } if res_ind != result.len() { - new_result.push(result[res_ind]); + new_result.push(result[res_ind].clone()); } res_ind += 1; @@ -236,22 +249,28 @@ fn md_parse_code_blocks( // assumes no zero length delimiters except for null delim // assumes delimiters are ordered fn md_parse_escapes( - result: &[char], + result: Vec, delimiters: &mut [DelimiterData], -) -> (Vec, Vec) { +) -> (Vec, Vec) { let mut new_result = vec![]; let mut new_delimiters = vec![]; - let mut re_added_letters = 0; + let mut added_spans = 0; let mut del_ind = 0; let mut res_ind = 0; + + let mut current_span = String::new(); while res_ind <= result.len() { while del_ind < delimiters.len() && delimiters[del_ind].ind <= res_ind { match delimiters[del_ind].char_type { '\\' => { - let mut temp = - vec![delimiters[del_ind].char_type; delimiters[del_ind].num_delimiters / 2]; - new_result.append(&mut temp); - re_added_letters += delimiters[del_ind].num_delimiters / 2; + if !current_span.is_empty() && delimiters[del_ind].num_delimiters != 0 { + new_result.push(current_span); + current_span = String::new(); + } + current_span += &delimiters[del_ind] + .char_type + .to_string() + .repeat(delimiters[del_ind].num_delimiters / 2); if delimiters[del_ind].num_delimiters % 2 != 0 { if del_ind != delimiters.len() - 1 @@ -259,21 +278,24 @@ fn md_parse_escapes( { match delimiters[del_ind + 1].char_type { '`' | '*' | '_' => { - new_result.push(delimiters[del_ind + 1].char_type); - re_added_letters += 1; + added_spans += 1; + current_span.push(delimiters[del_ind + 1].char_type); delimiters[del_ind + 1].num_delimiters -= 1; } _ => panic!("\\ => should merge"), } } else { - match result[delimiters[del_ind].ind] { - 'n' => { + match result[delimiters[del_ind].ind].chars().next() { + Some('n') => { + current_span.push('\n'); + current_span += &result[delimiters[del_ind].ind][1..]; res_ind += 1; - new_result.push('\n'); + delimiters[del_ind].num_delimiters -= 1; } _ => { - new_result.push(delimiters[del_ind].char_type); - re_added_letters += 1; + current_span.push(delimiters[del_ind].char_type); + current_span += &result[delimiters[del_ind].ind]; + res_ind += 1; delimiters[del_ind].num_delimiters -= 1; } } @@ -281,10 +303,10 @@ fn md_parse_escapes( } } '`' => { - let mut temp = - vec![delimiters[del_ind].char_type; delimiters[del_ind].num_delimiters]; - new_result.append(&mut temp); - re_added_letters += delimiters[del_ind].num_delimiters; + current_span += &delimiters[del_ind] + .char_type + .to_string() + .repeat(delimiters[del_ind].num_delimiters); } ' ' | '*' | '_' => { // future stages assume no 0 len delimiters @@ -293,7 +315,7 @@ fn md_parse_escapes( { new_delimiters.push(delimiters[del_ind].clone()); let last_ind = new_delimiters.len() - 1; - new_delimiters[last_ind].ind += re_added_letters; + new_delimiters[last_ind].ind += added_spans; } } _ => panic!("no other type of delimiter char"), @@ -301,18 +323,26 @@ fn md_parse_escapes( del_ind += 1; } + if !current_span.is_empty() { + new_result.push(current_span); + current_span = String::new(); + } if res_ind != result.len() { - new_result.push(result[res_ind]); + current_span += &result[res_ind].clone(); } res_ind += 1; } + if !current_span.is_empty() { + new_result.push(current_span); + } + (new_result, new_delimiters) } // assumes delimiters are ordered and nonzero -fn md_parse_set_delimiter_open_close(result: &[char], delimiters: &mut [DelimiterData]) { +fn md_parse_set_delimiter_open_close(result: &[String], delimiters: &mut [DelimiterData]) { // set could open/close for i in 0..delimiters.len() { let prev_char; @@ -322,7 +352,10 @@ fn md_parse_set_delimiter_open_close(result: &[char], delimiters: &mut [Delimite } else if delimiters[i].ind == 0 { prev_char = ' '; } else { - prev_char = result[delimiters[i].ind - 1]; + prev_char = result[delimiters[i].ind - 1] + .chars() + .last() + .expect("no 0 len spans"); } if i != delimiters.len() - 1 && delimiters[i + 1].ind == delimiters[i].ind { @@ -330,7 +363,10 @@ fn md_parse_set_delimiter_open_close(result: &[char], delimiters: &mut [Delimite } else if delimiters[i].ind == result.len() { next_char = ' '; } else { - next_char = result[delimiters[i].ind]; + next_char = result[delimiters[i].ind] + .chars() + .next() + .expect("no 0 len spans"); } match (prev_char.is_whitespace(), next_char.is_whitespace()) { @@ -447,16 +483,15 @@ fn md_parse_eval_spans(delimiters: &mut [DelimiterData]) -> Vec { spans } -fn md_parse(text_value: &str) -> (Vec, Vec) { +fn md_parse(text_value: &str) -> (Vec, Vec) { let (result, mut delimiters) = md_parse_delimiters(text_value); - let (result, mut spans) = md_parse_code_blocks(&result, &mut delimiters); - let (mut result, mut delimiters) = md_parse_escapes(&result, &mut delimiters); + let (result, mut span_data) = md_parse_code_blocks(result, &mut delimiters); + let (mut result, mut delimiters) = md_parse_escapes(result, &mut delimiters); md_parse_set_delimiter_open_close(&result, &mut delimiters); - spans.append(&mut md_parse_eval_spans(&mut delimiters)); + span_data.append(&mut md_parse_eval_spans(&mut delimiters)); let mut final_result = vec![]; - // work from the back to avoid index invalidation for d in delimiters.into_iter().rev() { while d.ind < result.len() { if let Some(thing) = result.pop() { @@ -464,23 +499,23 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { } } - for s in spans.iter_mut() { - // if start needs to be after or equal - if s.start_idx >= d.ind { - s.start_idx += d.num_delimiters; - } - if s.end_idx > d.ind { - // if end needs to be after - s.end_idx += d.num_delimiters; + if d.char_type != ' ' && d.num_delimiters != 0 { + for s in span_data.iter_mut() { + // if start needs to be after or equal + if s.start_idx >= d.ind { + s.start_idx += 1; + } + if s.end_idx > d.ind { + // if end needs to be after + s.end_idx += 1; + } } - } - if d.char_type != ' ' { - let mut temp = vec![d.char_type; d.num_delimiters]; - final_result.append(&mut temp); + + final_result.push(d.char_type.to_string().repeat(d.num_delimiters)); } } - (final_result.into_iter().rev().collect(), spans) + (final_result.into_iter().rev().collect(), span_data) } #[cfg(test)] @@ -492,26 +527,17 @@ mod tests { // the basic examples no actual md let text = r"Hello, \nworld!"; - assert_eq!( - md_parse(text).0, - ['H', 'e', 'l', 'l', 'o', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] - ); + assert_eq!(md_parse(text).0, ["Hello, ", "\nworld!"]); assert_eq!(md_parse(text).1, []); // when not part of a '\n', '\' is not special let text = r"Hello, world! \1"; - assert_eq!( - md_parse(text).0, - ['H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', ' ', '\\', '1'] - ); + assert_eq!(md_parse(text).0, ["Hello, world! ", "\\1"]); assert_eq!(md_parse(text).1, []); // when precedes '\n', '\' escapes it. let text = r"Hello, \\nworld!"; - assert_eq!( - md_parse(text).0, - ['H', 'e', 'l', 'l', 'o', ',', ' ', '\\', 'n', 'w', 'o', 'r', 'l', 'd', '!'] - ); + assert_eq!(md_parse(text).0, ["Hello, ", "\\", "nworld!"]); assert_eq!(md_parse(text).1, []); fn sd(s: usize, e: usize, i: u8) -> SpanData { @@ -529,55 +555,37 @@ mod tests { // using the md let text = r"He*ll*o, \nworld!"; - assert_eq!( - md_parse(text).0, - ['H', 'e', 'l', 'l', 'o', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] - ); - assert_eq!(md_parse(text).1, [sd(2, 4, 2)]); + assert_eq!(md_parse(text).0, ["He", "ll", "o, ", "\nworld!"]); + assert_eq!(md_parse(text).1, [sd(1, 2, 2)]); // mismatched let text = r"*Hello** , \nworld!"; - assert_eq!( - md_parse(text).0, - ['H', 'e', 'l', 'l', 'o', '*', ' ', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] - ); - assert_eq!(md_parse(text).1, [sd(0, 5, 2)]); + assert_eq!(md_parse(text).0, ["Hello", "*", " , ", "\nworld!"]); + assert_eq!(md_parse(text).1, [sd(0, 1, 2)]); // diff type let text = r"He*llo_, \nworld!"; - assert_eq!( - md_parse(text).0, - ['H', 'e', '*', 'l', 'l', 'o', '_', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] - ); + assert_eq!(md_parse(text).0, ["He", "*", "llo", "_", ", ", "\nworld!"]); assert_eq!(md_parse(text).1, []); // multiple diff type let text = r"_hello*"; - assert_eq!(md_parse(text).0, ['_', 'h', 'e', 'l', 'l', 'o', '*']); + assert_eq!(md_parse(text).0, ["_", "hello", "*"]); assert_eq!(md_parse(text).1, []); // multiple same type let text = r"He*ll*o, \nw*or*ld!"; - assert_eq!( - md_parse(text).0, - ['H', 'e', 'l', 'l', 'o', ',', ' ', '\n', 'w', 'o', 'r', 'l', 'd', '!'] - ); - assert_eq!(md_parse(text).1, [sd(2, 4, 2), sd(9, 11, 2)]); + assert_eq!(md_parse(text).0, ["He", "ll", "o, ", "\nw", "or", "ld!"]); + assert_eq!(md_parse(text).1, [sd(1, 2, 2), sd(4, 5, 2)]); // space before let text = r"**foo bar **"; - assert_eq!( - md_parse(text).0, - ['*', '*', 'f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', '*', '*'] - ); + assert_eq!(md_parse(text).0, ["**", "foo bar ", "**"]); assert_eq!(md_parse(text).1, []); // punctuation before alphnum after let text = r"**(**foo)"; - assert_eq!( - md_parse(text).0, - ['*', '*', '(', '*', '*', 'f', 'o', 'o', ')'] - ); + assert_eq!(md_parse(text).0, ["**", "(", "**", "foo)"]); assert_eq!(md_parse(text).1, []); } From 024c71ac16854b244316ae78c07c95cb49da9925 Mon Sep 17 00:00:00 2001 From: megamaths Date: Tue, 19 Aug 2025 12:45:20 +0100 Subject: [PATCH 9/9] changed return type to only return 1 vec and commented much of the code --- examples/markdown.svg | 8 +- examples/markdown.xml | 2 +- src/elements/markdown.rs | 272 ++++++++++++++++++++++++++------------- src/elements/text.rs | 88 +++++++------ 4 files changed, 234 insertions(+), 136 deletions(-) diff --git a/examples/markdown.svg b/examples/markdown.svg index c294757..3059d9b 100644 --- a/examples/markdown.svg +++ b/examples/markdown.svg @@ -1,12 +1,10 @@