Skip to content

Commit 6666237

Browse files
committed
Start writing some tests for decoding functionality
Fix up descriptions on some decoding functions
1 parent 380826e commit 6666237

File tree

4 files changed

+53
-24
lines changed

4 files changed

+53
-24
lines changed

src/encoding.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,9 @@ impl Decoder {
105105
}
106106
}
107107

108-
/// Decodes the provided bytes using the specified encoding, ignoring the BOM
109-
/// if it is present in the `bytes`.
108+
/// Decodes the provided bytes using the specified encoding.
110109
///
111-
/// Returns an error in case of malformed sequences in the `bytes`.
110+
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.
112111
#[cfg(feature = "encoding")]
113112
pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result<Cow<'b, str>> {
114113
encoding
@@ -119,7 +118,7 @@ pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result<Cow<'b
119118
/// Decodes a slice with an unknown encoding, removing the BOM if it is present
120119
/// in the bytes.
121120
///
122-
/// Returns an error in case of malformed sequences in the `bytes`.
121+
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.
123122
#[cfg(feature = "encoding")]
124123
pub fn decode_with_bom_removal<'b>(bytes: &'b [u8]) -> Result<Cow<'b, str>> {
125124
if let Some(encoding) = detect_encoding(bytes) {
@@ -185,5 +184,3 @@ pub fn detect_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
185184
_ => None,
186185
}
187186
}
188-
189-
// TODO: add some tests for functions

tests/documents/utf8.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<?xml version="1.0"?>
2+
<project name="project-name">
3+
</project>

tests/encodings.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#[cfg(feature = "encoding")]
2+
mod decode {
3+
use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8};
4+
use quick_xml::encoding::*;
5+
use std::borrow::Cow;
6+
7+
static UTF16BE_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf16be.xml");
8+
static UTF16LE_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf16le.xml");
9+
static UTF8_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf8.xml");
10+
11+
static UTF8_TEXT: &str = r#"<?xml version="1.0"?>
12+
<project name="project-name">
13+
</project>
14+
"#;
15+
16+
#[test]
17+
fn test_removes_bom() {
18+
// No BOM
19+
assert_eq!(
20+
decode_with_bom_removal(UTF8_TEXT.as_bytes()).unwrap(),
21+
Cow::Borrowed(UTF8_TEXT)
22+
);
23+
// BOM
24+
assert_eq!(
25+
decode_with_bom_removal(UTF8_TEXT_WITH_BOM).unwrap(),
26+
Cow::Borrowed(UTF8_TEXT)
27+
);
28+
assert_eq!(
29+
decode_with_bom_removal(UTF16BE_TEXT_WITH_BOM).unwrap(),
30+
Cow::Borrowed(UTF8_TEXT).into_owned()
31+
);
32+
assert_eq!(
33+
decode_with_bom_removal(UTF16LE_TEXT_WITH_BOM).unwrap(),
34+
Cow::Borrowed(UTF8_TEXT).into_owned()
35+
);
36+
}
37+
38+
#[test]
39+
fn test_detect_encoding() {
40+
// No BOM
41+
assert_eq!(detect_encoding(UTF8_TEXT.as_bytes()), Some(UTF_8));
42+
// BOM
43+
assert_eq!(detect_encoding(UTF8_TEXT_WITH_BOM), Some(UTF_8));
44+
assert_eq!(detect_encoding(UTF16BE_TEXT_WITH_BOM), Some(UTF_16BE));
45+
assert_eq!(detect_encoding(UTF16LE_TEXT_WITH_BOM), Some(UTF_16LE));
46+
}
47+
}

tests/test.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -89,24 +89,6 @@ fn test_comment_starting_with_gt() {
8989
}
9090
}
9191

92-
#[test]
93-
#[cfg(feature = "encoding")]
94-
fn test_koi8_r_encoding() {
95-
let src = include_bytes!("documents/opennews_all.rss").as_ref();
96-
let mut buf = vec![];
97-
let mut r = Reader::from_reader(src);
98-
r.trim_text(true).expand_empty_elements(false);
99-
loop {
100-
match r.read_event_into(&mut buf) {
101-
Ok(Text(e)) => {
102-
e.unescape().unwrap();
103-
}
104-
Ok(Eof) => break,
105-
_ => (),
106-
}
107-
}
108-
}
109-
11092
#[test]
11193
fn fuzz_53() {
11294
let data: &[u8] = b"\xe9\x00\x00\x00\x00\x00\x00\x00\x00\

0 commit comments

Comments
 (0)