Skip to content

Commit 8790bc6

Browse files
committed
Convert events and attributes to str and remove decoding methods
1 parent c5068e7 commit 8790bc6

File tree

18 files changed

+488
-716
lines changed

18 files changed

+488
-716
lines changed

Changelog.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@
160160
- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
161161
added to all events
162162

163-
- [#421]: `decode_and_unescape*` methods now does one less allocation if unescaping is not required
164163
- [#421]: Removed ability to deserialize byte arrays from serde deserializer.
165164
XML is not able to store binary data directly, you should always use some encoding
166165
scheme, for example, HEX or Base64

benches/macrobenches.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
5050
match criterion::black_box(r.read_event()?) {
5151
Event::Start(e) | Event::Empty(e) => {
5252
for attr in e.attributes() {
53-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
53+
criterion::black_box(attr?.unescape_value()?);
5454
}
5555
}
5656
Event::Text(e) => {
@@ -75,7 +75,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
7575
match criterion::black_box(r.read_event_into(&mut buf)?) {
7676
Event::Start(e) | Event::Empty(e) => {
7777
for attr in e.attributes() {
78-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
78+
criterion::black_box(attr?.unescape_value()?);
7979
}
8080
}
8181
Event::Text(e) => {
@@ -101,7 +101,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
101101
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
102102
criterion::black_box(resolved_ns);
103103
for attr in e.attributes() {
104-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
104+
criterion::black_box(attr?.unescape_value()?);
105105
}
106106
}
107107
(resolved_ns, Event::Text(e)) => {
@@ -129,7 +129,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
129129
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
130130
criterion::black_box(resolved_ns);
131131
for attr in e.attributes() {
132-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
132+
criterion::black_box(attr?.unescape_value()?);
133133
}
134134
}
135135
(resolved_ns, Event::Text(e)) => {

examples/custom_entities.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
3333
loop {
3434
match reader.read_event() {
3535
Ok(Event::DocType(ref e)) => {
36-
for cap in entity_re.captures_iter(e) {
36+
for cap in entity_re.captures_iter(e.as_bytes()) {
3737
custom_entities.insert(
3838
reader.decoder().decode(&cap[1])?.into_owned(),
3939
reader.decoder().decode(&cap[2])?.into_owned(),
@@ -46,7 +46,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
4646
.attributes()
4747
.map(|a| {
4848
a.unwrap()
49-
.decode_and_unescape_value_with(&reader, |ent| {
49+
.unescape_value_with(|ent| {
5050
custom_entities.get(ent).map(|s| s.as_str())
5151
})
5252
.unwrap()

src/de/map.rs

Lines changed: 39 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ where
188188
/// ```
189189
has_value_field: bool,
190190
/// list of fields yet to unflatten (defined as starting with $unflatten=)
191-
unflatten_fields: Vec<&'static [u8]>,
191+
unflatten_fields: Vec<&'static str>,
192192
}
193193

194194
impl<'de, 'a, R> MapAccess<'de, 'a, R>
@@ -211,7 +211,7 @@ where
211211
unflatten_fields: fields
212212
.iter()
213213
.filter(|f| f.starts_with(UNFLATTEN_PREFIX))
214-
.map(|f| f.as_bytes())
214+
.map(|&f| f)
215215
.collect(),
216216
})
217217
}
@@ -232,15 +232,12 @@ where
232232
// FIXME: There error positions counted from the start of tag name - need global position
233233
let slice = &self.start.buf;
234234

235-
if let Some(a) = self.iter.next(slice).transpose()? {
235+
if let Some(a) = self.iter.next(slice.as_bytes()).transpose()? {
236236
// try getting map from attributes (key= "value")
237237
let (key, value) = a.into();
238238
self.source = ValueSource::Attribute(value.unwrap_or_default());
239-
seed.deserialize(EscapedDeserializer::new(
240-
Cow::Borrowed(std::str::from_utf8(&slice[key]).expect("fixme dalley")),
241-
false,
242-
))
243-
.map(Some)
239+
seed.deserialize(EscapedDeserializer::new(Cow::Borrowed(&slice[key]), false))
240+
.map(Some)
244241
} else {
245242
// try getting from events (<key>value</key>)
246243
match self.de.peek()? {
@@ -273,26 +270,27 @@ where
273270
}
274271
DeEvent::Start(e) => {
275272
self.source = ValueSource::Nested;
276-
let key =
277-
if let Some(p) = self.unflatten_fields.iter().position(|f| {
278-
e.name().as_ref().as_bytes() == &f[UNFLATTEN_PREFIX.len()..]
279-
}) {
280-
// Used to deserialize elements, like:
281-
// <root>
282-
// <xxx>test</xxx>
283-
// </root>
284-
//
285-
// into
286-
//
287-
// struct Root {
288-
// #[serde(rename = "$unflatten=xxx")]
289-
// xxx: String,
290-
// }
291-
seed.deserialize(self.unflatten_fields.remove(p).into_deserializer())
292-
} else {
293-
let name = Cow::Borrowed(e.local_name().into_inner());
294-
seed.deserialize(EscapedDeserializer::new(name, false))
295-
};
273+
let key = if let Some(p) = self
274+
.unflatten_fields
275+
.iter()
276+
.position(|f| e.name().as_ref() == &f[UNFLATTEN_PREFIX.len()..])
277+
{
278+
// Used to deserialize elements, like:
279+
// <root>
280+
// <xxx>test</xxx>
281+
// </root>
282+
//
283+
// into
284+
//
285+
// struct Root {
286+
// #[serde(rename = "$unflatten=xxx")]
287+
// xxx: String,
288+
// }
289+
seed.deserialize(self.unflatten_fields.remove(p).into_deserializer())
290+
} else {
291+
let name = Cow::Borrowed(e.local_name().into_inner());
292+
seed.deserialize(EscapedDeserializer::new(name, false))
293+
};
296294
key.map(Some)
297295
}
298296
// Stop iteration after reaching a closing tag
@@ -315,7 +313,6 @@ where
315313
&self.start.buf,
316314
value,
317315
true,
318-
self.de.reader.decoder(),
319316
)),
320317
// This arm processes the following XML shape:
321318
// <any-tag>
@@ -327,16 +324,12 @@ where
327324
// of that events)
328325
// This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
329326
ValueSource::Text => match self.de.next()? {
330-
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
331-
e.into_inner(),
332-
true,
333-
self.de.reader.decoder(),
334-
)),
335-
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
336-
e.into_inner(),
337-
false,
338-
self.de.reader.decoder(),
339-
)),
327+
DeEvent::Text(e) => {
328+
seed.deserialize(SimpleTypeDeserializer::from_cow(e.into_inner(), true))
329+
}
330+
DeEvent::CData(e) => {
331+
seed.deserialize(SimpleTypeDeserializer::from_cow(e.into_inner(), false))
332+
}
340333
// SAFETY: We set `Text` only when we seen `Text` or `CData`
341334
_ => unreachable!(),
342335
},
@@ -722,32 +715,20 @@ where
722715
// Comment to prevent auto-formatting and keep Text and Cdata similar
723716
e.into_inner(),
724717
true,
725-
self.map.de.reader.decoder(),
726-
)
727-
.deserialize_seq(visitor),
728-
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
729-
e.into_inner(),
730-
false,
731-
self.map.de.reader.decoder(),
732718
)
733719
.deserialize_seq(visitor),
720+
DeEvent::CData(e) => {
721+
SimpleTypeDeserializer::from_cow(e.into_inner(), false).deserialize_seq(visitor)
722+
}
734723
// This is a sequence element. We cannot treat it as another flatten
735724
// sequence if type will require `deserialize_seq` We instead forward
736725
// it to `xs:simpleType` implementation
737726
DeEvent::Start(e) => {
738727
let value = match self.map.de.next()? {
739-
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
740-
e.into_inner(),
741-
true,
742-
self.map.de.reader.decoder(),
743-
)
744-
.deserialize_seq(visitor),
745-
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
746-
e.into_inner(),
747-
false,
748-
self.map.de.reader.decoder(),
749-
)
750-
.deserialize_seq(visitor),
728+
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(e.into_inner(), true)
729+
.deserialize_seq(visitor),
730+
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(e.into_inner(), false)
731+
.deserialize_seq(visitor),
751732
e => Err(DeError::Custom(format!("Unsupported event {:?}", e))),
752733
};
753734
// TODO: May be assert that here we expect only matching closing tag?

src/de/mod.rs

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ mod simple_type;
109109
mod var;
110110

111111
pub use crate::errors::serialize::DeError;
112+
use crate::escape::unescape;
112113
use crate::{
113114
encoding::{Decoder, Utf8BytesReader},
114115
errors::Error,
@@ -490,18 +491,36 @@ where
490491
/// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
491492
fn next_text_impl(
492493
&mut self,
493-
unescape: bool,
494+
do_unescape: bool,
494495
allow_start: bool,
495496
) -> Result<Cow<'de, str>, DeError> {
496497
match self.next()? {
497-
DeEvent::Text(e) => Ok(e.decode(unescape)?),
498-
DeEvent::CData(e) => Ok(e.decode()?),
498+
DeEvent::Text(e) => {
499+
if do_unescape {
500+
Ok(match unescape(e.as_ref())? {
501+
Cow::Borrowed(_) => e.into_inner(),
502+
Cow::Owned(s) => Cow::Owned(s),
503+
})
504+
} else {
505+
Ok(e.into_inner())
506+
}
507+
}
508+
DeEvent::CData(e) => Ok(e.into_inner()),
499509
DeEvent::Start(e) if allow_start => {
500510
// allow one nested level
501511
let inner = self.next()?;
502512
let t = match inner {
503-
DeEvent::Text(t) => t.decode(unescape)?,
504-
DeEvent::CData(t) => t.decode()?,
513+
DeEvent::Text(t) => {
514+
if do_unescape {
515+
Ok(match unescape(t.as_ref())? {
516+
Cow::Borrowed(_) => t.into_inner(),
517+
Cow::Owned(s) => Cow::Owned(s),
518+
})
519+
} else {
520+
Ok(t.into_inner())
521+
}
522+
}
523+
DeEvent::CData(t) => Ok(t.into_inner()),
505524
DeEvent::Start(s) => {
506525
return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
507526
}
@@ -516,7 +535,7 @@ where
516535
DeEvent::Eof => return Err(DeError::UnexpectedEof),
517536
};
518537
self.read_to_end(e.name())?;
519-
Ok(t)
538+
t
520539
}
521540
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
522541
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
@@ -774,7 +793,7 @@ where
774793
}
775794

776795
/// Trait used by the deserializer for iterating over input. This is manually
777-
/// "specialized" for iterating over `&[u8]`.
796+
/// "specialized" for iterating over `&str`.
778797
///
779798
/// You do not need to implement this trait, it is needed to abstract from
780799
/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
@@ -786,9 +805,6 @@ pub trait XmlRead<'i> {
786805
/// Skips until end element is found. Unlike `next()` it will not allocate
787806
/// when it cannot satisfy the lifetime.
788807
fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
789-
790-
/// A copy of the reader's decoder used to decode strings.
791-
fn decoder(&self) -> Decoder;
792808
}
793809

794810
/// XML input source that reads from a std::io input stream.
@@ -827,10 +843,6 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
827843
Ok(_) => Ok(()),
828844
}
829845
}
830-
831-
fn decoder(&self) -> Decoder {
832-
self.reader.decoder()
833-
}
834846
}
835847

836848
/// XML input source that reads from a slice of bytes and can borrow from it.
@@ -864,10 +876,6 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
864876
Ok(_) => Ok(()),
865877
}
866878
}
867-
868-
fn decoder(&self) -> Decoder {
869-
self.reader.decoder()
870-
}
871879
}
872880

873881
#[cfg(test)]

0 commit comments

Comments
 (0)