|
1 | 1 | #[cfg(feature = "encoding")]
|
2 | 2 | use encoding_rs::UTF_8;
|
3 | 3 |
|
4 |
| -#[cfg(feature = "encoding")] |
5 |
| -use crate::encoding::detect_encoding; |
6 |
| -use crate::encoding::Decoder; |
| 4 | +use crate::encoding::{self, Decoder}; |
7 | 5 | use crate::errors::{Error, Result};
|
8 | 6 | use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
|
9 | 7 | #[cfg(feature = "encoding")]
|
@@ -68,23 +66,31 @@ impl Parser {
|
68 | 66 | ///
|
69 | 67 | /// [`Text`]: Event::Text
|
70 | 68 | pub fn read_text<'b>(&mut self, bytes: &'b [u8], first: bool) -> Result<Event<'b>> {
|
71 |
| - #[cfg(feature = "encoding")] |
72 |
| - if first && self.encoding.can_be_refined() { |
73 |
| - if let Some(encoding) = detect_encoding(bytes) { |
74 |
| - self.encoding = EncodingRef::BomDetected(encoding); |
75 |
| - } |
76 |
| - } |
| 69 | + let mut content = bytes; |
77 | 70 |
|
78 |
| - let content = if self.trim_text_end { |
| 71 | + if self.trim_text_end { |
79 | 72 | // Skip the ending '<'
|
80 | 73 | let len = bytes
|
81 | 74 | .iter()
|
82 | 75 | .rposition(|&b| !is_whitespace(b))
|
83 | 76 | .map_or_else(|| bytes.len(), |p| p + 1);
|
84 |
| - &bytes[..len] |
85 |
| - } else { |
86 |
| - bytes |
87 |
| - }; |
| 77 | + content = &bytes[..len]; |
| 78 | + } |
| 79 | + |
| 80 | + if first { |
| 81 | + #[cfg(feature = "encoding")] |
| 82 | + if self.encoding.can_be_refined() { |
| 83 | + if let Some(encoding) = encoding::detect_encoding(bytes) { |
| 84 | + self.encoding = EncodingRef::BomDetected(encoding); |
| 85 | + content = encoding::remove_bom(content, encoding); |
| 86 | + } |
| 87 | + } |
| 88 | + #[cfg(not(feature = "encoding"))] |
| 89 | + if bytes.starts_with(encoding::UTF8_BOM) { |
| 90 | + content = &bytes[encoding::UTF8_BOM.len()..]; |
| 91 | + } |
| 92 | + } |
| 93 | + |
88 | 94 | Ok(Event::Text(BytesText::wrap(content, self.decoder())))
|
89 | 95 | }
|
90 | 96 |
|
|
0 commit comments