1
1
//! Contains high-level interface for an events-based XML emitter.
2
2
3
+ use crate :: encoding:: UTF8_BOM ;
3
4
use crate :: errors:: { Error , Result } ;
4
5
use crate :: events:: { attributes:: Attribute , BytesCData , BytesStart , BytesText , Event } ;
5
6
use std:: io:: Write ;
6
7
8
+ /// Writer-side encoding schemes supported by quick-xml.
9
+ ///
10
+ /// Currently, `quick-xml` only supports UTF-8 as an output encoding as the `encoding_rs`
11
+ /// library does not provide encoders for any other encodings. If you need to write UTF-16
12
+ /// encoded XML, consider writing the XML with a UTF-8 encoding and then re-encoding the file.
13
+ #[ derive( Clone , Debug ) ]
14
+ pub enum EncodingScheme {
15
+ /// UTF-8 text with no "BOM". This is the default, and recommended value.
16
+ Utf8 ,
17
+ /// UTF-8 with a "BOM" identifier. The standard recommends against this but some software
18
+ /// requires it to be present.
19
+ Utf8WithBom ,
20
+ }
21
+
22
+ impl Default for EncodingScheme {
23
+ fn default ( ) -> Self {
24
+ Self :: Utf8
25
+ }
26
+ }
27
+
7
28
/// XML writer.
8
29
///
9
30
/// Writes XML `Event`s to a `Write` implementor.
@@ -57,6 +78,8 @@ pub struct Writer<W: Write> {
57
78
/// underlying writer
58
79
writer : W ,
59
80
indent : Option < Indentation > ,
81
+ encoding : EncodingScheme ,
82
+ first_write : bool ,
60
83
}
61
84
62
85
impl < W : Write > Writer < W > {
@@ -65,6 +88,8 @@ impl<W: Write> Writer<W> {
65
88
Writer {
66
89
writer : inner,
67
90
indent : None ,
91
+ encoding : EncodingScheme :: default ( ) ,
92
+ first_write : false ,
68
93
}
69
94
}
70
95
@@ -73,6 +98,23 @@ impl<W: Write> Writer<W> {
73
98
Writer {
74
99
writer : inner,
75
100
indent : Some ( Indentation :: new ( indent_char, indent_size) ) ,
101
+ encoding : EncodingScheme :: default ( ) ,
102
+ first_write : true ,
103
+ }
104
+ }
105
+
106
+ /// Creates a Writer with configured whitespace indents from a generic Write
107
+ pub fn new_with_indent_and_encoding (
108
+ inner : W ,
109
+ indent_char : u8 ,
110
+ indent_size : usize ,
111
+ encoding : EncodingScheme ,
112
+ ) -> Writer < W > {
113
+ Writer {
114
+ writer : inner,
115
+ indent : Some ( Indentation :: new ( indent_char, indent_size) ) ,
116
+ encoding : encoding,
117
+ first_write : true ,
76
118
}
77
119
}
78
120
@@ -129,7 +171,15 @@ impl<W: Write> Writer<W> {
129
171
130
172
/// Writes bytes
131
173
#[ inline]
132
- pub fn write ( & mut self , value : & [ u8 ] ) -> Result < ( ) > {
174
+ fn write ( & mut self , value : & [ u8 ] ) -> Result < ( ) > {
175
+ // The BOM should be the very first thing written to the file, but it should only be written once
176
+ if self . first_write {
177
+ match self . encoding {
178
+ EncodingScheme :: Utf8WithBom => self . writer . write_all ( UTF8_BOM ) ?,
179
+ _ => ( ) ,
180
+ }
181
+ self . first_write = false ;
182
+ }
133
183
self . writer . write_all ( value) . map_err ( Error :: Io )
134
184
}
135
185
@@ -579,4 +629,23 @@ mod indentation {
579
629
</outer>"#
580
630
) ;
581
631
}
632
+
633
+ #[ test]
634
+ fn write_utf8_with_bom ( ) {
635
+ let mut buffer = Vec :: new ( ) ;
636
+ let mut writer =
637
+ Writer :: new_with_indent_and_encoding ( & mut buffer, b' ' , 4 , EncodingScheme :: Utf8WithBom ) ;
638
+
639
+ writer
640
+ . create_element ( "paired" )
641
+ . with_attribute ( ( "attr1" , "value1" ) )
642
+ . with_attribute ( ( "attr2" , "value2" ) )
643
+ . write_text_content ( BytesText :: new ( "text" ) )
644
+ . expect ( "failure" ) ;
645
+
646
+ assert_eq ! (
647
+ & buffer,
648
+ "\u{FEFF} <paired attr1=\" value1\" attr2=\" value2\" >text</paired>" . as_bytes( )
649
+ ) ;
650
+ }
582
651
}
0 commit comments