1
1
//! Contains high-level interface for an events-based XML emitter.
2
2
3
+ use crate :: encoding:: UTF8_BOM ;
3
4
use crate :: errors:: { Error , Result } ;
4
5
use crate :: events:: { attributes:: Attribute , BytesCData , BytesStart , BytesText , Event } ;
5
6
use std:: io:: Write ;
6
7
8
+ /// Writer-side encoding schemes supported by quick-xml.
9
+ ///
10
+ /// Currently, `quick-xml` only supports UTF-8 as an output encoding as the `encoding_rs`
11
+ /// library does not provide encoders for any other encodings. If you need to write UTF-16
12
+ /// encoded XML, consider writing the XML with a UTF-8 encoding and then re-encoding the file.
13
+ #[ derive( Clone , Debug ) ]
14
+ pub enum EncodingScheme {
15
+ /// UTF-8 text with no "BOM". This is the default, and recommended value.
16
+ Utf8 ,
17
+ /// UTF-8 with a "BOM" identifier. The standard recommends against this but some software
18
+ /// struggles to detect the encoding properly if it is not present.
19
+ Utf8WithBom ,
20
+ }
21
+
22
+ impl Default for EncodingScheme {
23
+ fn default ( ) -> Self {
24
+ Self :: Utf8
25
+ }
26
+ }
27
+
7
28
/// XML writer.
8
29
///
9
30
/// Writes XML `Event`s to a `Write` implementor.
@@ -57,6 +78,8 @@ pub struct Writer<W: Write> {
57
78
/// underlying writer
58
79
writer : W ,
59
80
indent : Option < Indentation > ,
81
+ encoding : EncodingScheme ,
82
+ first_write : bool ,
60
83
}
61
84
62
85
impl < W : Write > Writer < W > {
@@ -65,14 +88,34 @@ impl<W: Write> Writer<W> {
65
88
Writer {
66
89
writer : inner,
67
90
indent : None ,
91
+ encoding : EncodingScheme :: default ( ) ,
92
+ first_write : false ,
68
93
}
69
94
}
70
95
71
- /// Creates a Writer with configured whitespace indents from a generic Write
96
+ /// Creates a Writer from a generic Write implementor with configured whitespace indents
72
97
pub fn new_with_indent ( inner : W , indent_char : u8 , indent_size : usize ) -> Writer < W > {
73
98
Writer {
74
99
writer : inner,
75
100
indent : Some ( Indentation :: new ( indent_char, indent_size) ) ,
101
+ encoding : EncodingScheme :: default ( ) ,
102
+ first_write : true ,
103
+ }
104
+ }
105
+
106
+ /// Creates a Writer from a generic Write implementor with configured whitespace indents and a
107
+ /// specified encoding scheme.
108
+ pub fn new_with_indent_and_encoding (
109
+ inner : W ,
110
+ indent_char : u8 ,
111
+ indent_size : usize ,
112
+ encoding_scheme : EncodingScheme ,
113
+ ) -> Writer < W > {
114
+ Writer {
115
+ writer : inner,
116
+ indent : Some ( Indentation :: new ( indent_char, indent_size) ) ,
117
+ encoding : encoding_scheme,
118
+ first_write : true ,
76
119
}
77
120
}
78
121
@@ -129,7 +172,15 @@ impl<W: Write> Writer<W> {
129
172
130
173
/// Writes bytes
131
174
#[ inline]
132
- pub fn write ( & mut self , value : & [ u8 ] ) -> Result < ( ) > {
175
+ pub ( crate ) fn write ( & mut self , value : & [ u8 ] ) -> Result < ( ) > {
176
+ // The BOM should be the very first thing written to the file, but it should only be written once
177
+ if self . first_write {
178
+ match self . encoding {
179
+ EncodingScheme :: Utf8WithBom => self . writer . write_all ( UTF8_BOM ) ?,
180
+ _ => ( ) ,
181
+ }
182
+ self . first_write = false ;
183
+ }
133
184
self . writer . write_all ( value) . map_err ( Error :: Io )
134
185
}
135
186
@@ -579,4 +630,23 @@ mod indentation {
579
630
</outer>"#
580
631
) ;
581
632
}
633
+
634
+ #[ test]
635
+ fn write_utf8_with_bom ( ) {
636
+ let mut buffer = Vec :: new ( ) ;
637
+ let mut writer =
638
+ Writer :: new_with_indent_and_encoding ( & mut buffer, b' ' , 4 , EncodingScheme :: Utf8WithBom ) ;
639
+
640
+ writer
641
+ . create_element ( "paired" )
642
+ . with_attribute ( ( "attr1" , "value1" ) )
643
+ . with_attribute ( ( "attr2" , "value2" ) )
644
+ . write_text_content ( BytesText :: new ( "text" ) )
645
+ . expect ( "failure" ) ;
646
+
647
+ assert_eq ! (
648
+ & buffer,
649
+ "\u{FEFF} <paired attr1=\" value1\" attr2=\" value2\" >text</paired>" . as_bytes( )
650
+ ) ;
651
+ }
582
652
}
0 commit comments