From 2cdcf33a02a1346b2719309a7e3b837ca001761a Mon Sep 17 00:00:00 2001 From: BitSyndicate1 <100071875+BitSyndicate1@users.noreply.github.com> Date: Tue, 22 Apr 2025 16:23:15 +0200 Subject: [PATCH 1/2] feat: add a read text impl for async readers --- src/reader/async_tokio.rs | 62 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index c5e1eaaa..de95b77a 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -5,7 +5,7 @@ use std::pin::Pin; use std::task::{Context, Poll}; -use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, ReadBuf}; +use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncWrite, ReadBuf}; use crate::errors::{Error, Result, SyntaxError}; use crate::events::Event; @@ -199,6 +199,66 @@ impl Reader { async fn read_until_close_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { read_until_close!(self, buf, TokioAdapter(&mut self.reader), await) } + + /// Reads the content between start and end tags, including any markup. This + /// function is supposed to be called after you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements havce the _literally_ + /// same name. + /// + /// This method does not unescape read data, instead it writes the content + /// of the XML document "as is". This is because it has no idea what text it + /// reads, and if, for example, it contains CDATA section, attempt ot unescape + /// it content will spoil data. + /// + /// Any text will be decoded using the XML current [`decoder`]. + /// + /// [`Start`]: Event::Start + /// [`decoder`]: Self::decoder() + async fn read_text_into_async<'n, W>( + &mut self, + end: QName<'n>, + buf: &mut Vec, + out: &mut W, + ) -> Result<()> + where + W: AsyncWrite + Unpin, + { + let writer = crate::Writer::new(out); + let config = self.config_mut(); + let trim = config.trim_text_start; + config.trim_text_start = false; + let mut depth = 0; + loop { + buf.clear(); + match self.read_event_into_async(&mut buf).await { + Err(e) => { + self.config_mut().trim_text_start = trim; + Err(e)?; + } + + Ok(Event::Start(e)) if e.name() == end_name => { + writer.write_event_async(Event::Start(e)).await?; + depth += 1; + } + Ok(Event::End(e)) if e.name() == end_name => { + if depth == 0 { + self.config_mut().trim_text_start = trim; + break Ok(()); + } + depth -= 1; + writer.write_event_async(Event::End(e)).await?; + } + Ok(Event::Eof) => { + self.config_mut().trim_text_start = trim; + break Err(Error::missed_end(end, self.decoder())); + } + Ok(e) => { + writer.write_event_async(e).await?; + } + } + } + } } //////////////////////////////////////////////////////////////////////////////////////////////////// From 4e1ce0070da3fc2bb14894cce96db44681f48b23 Mon Sep 17 00:00:00 2001 From: BitSyndicate1 <100071875+BitSyndicate1@users.noreply.github.com> Date: Tue, 22 Apr 2025 16:25:40 +0200 Subject: [PATCH 2/2] feat: add read_text for nsreader --- src/reader/async_tokio.rs | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index de95b77a..652b8d65 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -215,7 +215,7 @@ impl Reader { /// /// [`Start`]: Event::Start /// [`decoder`]: Self::decoder() - async fn read_text_into_async<'n, W>( + pub async fn read_text_into_async<'n, W>( &mut self, end: QName<'n>, buf: &mut Vec, @@ -467,6 +467,33 @@ impl NsReader { let event = self.read_event_into_async(buf).await; self.resolve_event(event) } + + /// Reads the content between start and end tags, including any markup. This + /// function is supposed to be called after you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements havce the _literally_ + /// same name. + /// + /// This method does not unescape read data, instead it writes the content + /// of the XML document "as is". This is because it has no idea what text it + /// reads, and if, for example, it contains CDATA section, attempt ot unescape + /// it content will spoil data. + /// + /// Any text will be decoded using the XML current [`decoder`]. + /// + /// [`Start`]: Event::Start + /// [`decoder`]: Self::decoder() + async fn read_text_into_async<'n, W>( + &mut self, + end: QName<'n>, + buf: &mut Vec, + out: &mut W, + ) -> Result<()> + where + W: AsyncWrite + Unpin, + { + self.reader.read_text_into_async(end, buf, out).await + } } #[cfg(test)]