Skip to content

Commit 06326f7

Browse files
authored
Output parse errors for the Rust part of the build step
This fixes #290, by outputting the parse errors encountered by the Rust build step's parser. Previously they were being stored in the RcDom instance's errors vector, and ignored. Now they are threaded through to the final io::Result, and then output by main(). The hardest part of this was adding line numbers to the errors. Doing this necessitated creating a wrapper for RcDom, called RcDomWithLineNumbers, which implements TreeSink with two methods parse_error() and set_current_line() given custom behavior, while the other many methods just delegate to RcDom's implementation. Additionally, this enables exact_errors as a parser option, which provides slightly more information in a couple of cases related to character references.
1 parent 6757955 commit 06326f7

File tree

10 files changed

+303
-42
lines changed

10 files changed

+303
-42
lines changed

Cargo.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ tokio = { version = "1", features = ["full"] }
1111
html5ever = "0.26.0"
1212
markup5ever_rcdom = "0.2.0"
1313
regex = "1"
14+
delegate = "0.12.0"
1415

1516
[dev-dependencies]
1617
tempfile = "3"

src/annotate_attributes.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ mod tests {
313313
// reordered in the HTML spec).
314314
let document = parse_document_async(
315315
r#"
316+
<!DOCTYPE html>
316317
<h3>The a element</h3>
317318
<dl class="element">
318319
<dt>Categories
@@ -338,7 +339,7 @@ mod tests {
338339
assert_eq!(
339340
serialize_for_test(&[document]),
340341
r#"
341-
<html><head></head><body><h3>The a element</h3>
342+
<!DOCTYPE html><html><head></head><body><h3>The a element</h3>
342343
<dl class="element">
343344
<dt>Categories
344345
</dt><dd>Flow content
@@ -369,6 +370,7 @@ mod tests {
369370
// i.e., the variant description is used where requested
370371
let document = parse_document_async(
371372
r#"
373+
<!DOCTYPE html>
372374
<h3>The a element</h3>
373375
<dl class="element">
374376
<dt><span data-x="concept-element-attributes">Content attributes</span>
@@ -390,7 +392,7 @@ mod tests {
390392
assert_eq!(
391393
serialize_for_test(&[document]),
392394
r#"
393-
<html><head></head><body><h3>The a element</h3>
395+
<!DOCTYPE html><html><head></head><body><h3>The a element</h3>
394396
<dl class="element">
395397
<dt><span data-x="concept-element-attributes">Content attributes</span>
396398
</dt><dd><code data-x="attr-a-href">href</code>
@@ -415,6 +417,7 @@ mod tests {
415417
// Checks that the special rules for using : instead of an em dash work.
416418
let document = parse_document_async(
417419
r#"
420+
<!DOCTYPE html>
418421
<h3>The a element</h3>
419422
<dl class="element">
420423
<dt><span data-x="concept-element-attributes">Content attributes</span>
@@ -431,7 +434,7 @@ mod tests {
431434
assert_eq!(
432435
serialize_for_test(&[document]),
433436
r#"
434-
<html><head></head><body><h3>The a element</h3>
437+
<!DOCTYPE html><html><head></head><body><h3>The a element</h3>
435438
<dl class="element">
436439
<dt><span data-x="concept-element-attributes">Content attributes</span>
437440
</dt><dd>Also, the <code data-x="attr-a-name">name</code> attribute <span data-x="attr-a-name">has special semantics</span> on this element: Anchor name
@@ -450,6 +453,7 @@ mod tests {
450453
// Checks that the special rules for joining any special semantics with a ; work.
451454
let document = parse_document_async(
452455
r#"
456+
<!DOCTYPE html>
453457
<h3>The a element</h3>
454458
<dl class="element">
455459
<dt><span data-x="concept-element-attributes">Content attributes</span>
@@ -467,7 +471,7 @@ mod tests {
467471
assert_eq!(
468472
serialize_for_test(&[document]),
469473
r#"
470-
<html><head></head><body><h3>The a element</h3>
474+
<!DOCTYPE html><html><head></head><body><h3>The a element</h3>
471475
<dl class="element">
472476
<dt><span data-x="concept-element-attributes">Content attributes</span>
473477
</dt><dd>Also, the <code data-x="attr-a-name">name</code> attribute <span data-x="attr-a-name">has special semantics</span> on this element: Anchor name; Name of the anchor
@@ -488,6 +492,7 @@ mod tests {
488492
// repeating the description.
489493
let document = parse_document_async(
490494
r#"
495+
<!DOCTYPE html>
491496
<h3>The img element</h3>
492497
<dl class="element">
493498
<dt><span data-x="concept-element-attributes">Content attributes</span>
@@ -509,7 +514,7 @@ mod tests {
509514
assert_eq!(
510515
serialize_for_test(&[document]),
511516
r#"
512-
<html><head></head><body><h3>The img element</h3>
517+
<!DOCTYPE html><html><head></head><body><h3>The img element</h3>
513518
<dl class="element">
514519
<dt><span data-x="concept-element-attributes">Content attributes</span>
515520
</dt><dd><code data-x="attr-dim-width">width</code>

src/boilerplate.rs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -170,14 +170,16 @@ mod tests {
170170
"<tr><td>en<td>English",
171171
)
172172
.await?;
173-
let document =
174-
parse_document_async("<table><!--BOILERPLATE languages-->".as_bytes()).await?;
173+
let document = parse_document_async(
174+
"<!DOCTYPE html><table><!--BOILERPLATE languages--></table>".as_bytes(),
175+
)
176+
.await?;
175177
let mut proc = Processor::new(boilerplate_dir.path(), Path::new("."));
176178
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
177179
proc.apply().await?;
178180
assert_eq!(
179181
serialize_for_test(&[document]),
180-
"<html><head></head><body><table><tbody><tr><td>en</td><td>English</td></tr></tbody></table></body></html>");
182+
"<!DOCTYPE html><html><head></head><body><table><tbody><tr><td>en</td><td>English</td></tr></tbody></table></body></html>");
181183
Ok(())
182184
}
183185

@@ -189,15 +191,16 @@ mod tests {
189191
"data:text/html,Hello, world!",
190192
)
191193
.await?;
192-
let document =
193-
parse_document_async("<a href=\"<!--BOILERPLATE data.url-->\">hello</a>".as_bytes())
194-
.await?;
194+
let document = parse_document_async(
195+
"<!DOCTYPE html><a href=\"<!--BOILERPLATE data.url-->\">hello</a>".as_bytes(),
196+
)
197+
.await?;
195198
let mut proc = Processor::new(boilerplate_dir.path(), Path::new("."));
196199
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
197200
proc.apply().await?;
198201
assert_eq!(
199202
serialize_for_test(&[document]),
200-
"<html><head></head><body><a href=\"data:text/html,Hello, world!\">hello</a></body></html>");
203+
"<!DOCTYPE html><html><head></head><body><a href=\"data:text/html,Hello, world!\">hello</a></body></html>");
201204
Ok(())
202205
}
203206

@@ -208,23 +211,23 @@ mod tests {
208211
tokio::fs::write(example_dir.path().join("ex2"), "second").await?;
209212
tokio::fs::write(example_dir.path().join("ignored"), "bad").await?;
210213
let document =
211-
parse_document_async("<pre>EXAMPLE ex1</pre><pre><code class=html>\nEXAMPLE ex2 </code></pre><p>EXAMPLE ignored</p>".as_bytes())
214+
parse_document_async("<!DOCTYPE html><pre>EXAMPLE ex1</pre><pre><code class=html>\nEXAMPLE ex2 </code></pre><p>EXAMPLE ignored</p>".as_bytes())
212215
.await?;
213216
let mut proc = Processor::new(Path::new("."), example_dir.path());
214217
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
215218
proc.apply().await?;
216219
assert_eq!(
217220
serialize_for_test(&[document]),
218-
"<html><head></head><body><pre>first</pre><pre><code class=\"html\">second</code></pre><p>EXAMPLE ignored</p></body></html>" );
221+
"<!DOCTYPE html><html><head></head><body><pre>first</pre><pre><code class=\"html\">second</code></pre><p>EXAMPLE ignored</p></body></html>" );
219222
Ok(())
220223
}
221224

222225
#[tokio::test]
223226
async fn test_errors_unsafe_paths() -> io::Result<()> {
224227
let bad_path_examples = [
225-
"<body><!--BOILERPLATE /etc/passwd-->",
226-
"<body><pre data-x=\"<!--BOILERPLATE src/../../foo-->\"></pre>",
227-
"<body><pre>EXAMPLE ../foo</pre>",
228+
"<!DOCTYPE html><body><!--BOILERPLATE /etc/passwd-->",
229+
"<!DOCTYPE html><body><pre data-x=\"<!--BOILERPLATE src/../../foo-->\"></pre>",
230+
"<!DOCTYPE html><body><pre>EXAMPLE ../foo</pre>",
228231
];
229232
for example in bad_path_examples {
230233
let document = parse_document_async(example.as_bytes()).await?;

src/interface_index.rs

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ mod tests {
188188
async fn test_two_interfaces_in_one_block() -> io::Result<()> {
189189
let document = parse_document_async(
190190
r#"
191+
<!DOCTYPE html>
191192
<pre><code class=idl>
192193
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
193194
interface <dfn interface>HTMLBlinkElement</dfn> { ... }
@@ -204,7 +205,7 @@ INSERT INTERFACES HERE
204205
assert_eq!(
205206
serialize_for_test(&[document]),
206207
r#"
207-
<html><head></head><body><pre><code class="idl">
208+
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
208209
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
209210
interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
210211
</code></pre>
@@ -217,6 +218,7 @@ interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
217218
async fn test_two_interfaces_in_separate_blocks() -> io::Result<()> {
218219
let document = parse_document_async(
219220
r#"
221+
<!DOCTYPE html>
220222
<pre><code class=idl>
221223
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
222224
</code></pre>
@@ -235,7 +237,7 @@ INSERT INTERFACES HERE
235237
assert_eq!(
236238
serialize_for_test(&[document]),
237239
r#"
238-
<html><head></head><body><pre><code class="idl">
240+
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
239241
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
240242
</code></pre>
241243
<pre><code class="idl">
@@ -250,6 +252,7 @@ interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
250252
async fn interface_with_partial() -> io::Result<()> {
251253
let document = parse_document_async(
252254
r#"
255+
<!DOCTYPE html>
253256
<pre><code class=idl>
254257
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
255258
</code></pre>
@@ -268,7 +271,7 @@ INSERT INTERFACES HERE
268271
assert_eq!(
269272
serialize_for_test(&[document]),
270273
r##"
271-
<html><head></head><body><pre><code class="idl">
274+
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
272275
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
273276
</code></pre>
274277
<pre><code class="idl">
@@ -283,6 +286,7 @@ partial interface <span id="HTMLMarqueeElement-partial">HTMLMarqueeElement</span
283286
async fn interface_with_two_partials() -> io::Result<()> {
284287
let document = parse_document_async(
285288
r#"
289+
<!DOCTYPE html>
286290
<pre><code class=idl>
287291
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
288292
partial interface <span id=HTMLMarqueeElement-partial>HTMLMarqueeElement</span> { ... }
@@ -300,7 +304,7 @@ INSERT INTERFACES HERE
300304
assert_eq!(
301305
serialize_for_test(&[document]),
302306
r##"
303-
<html><head></head><body><pre><code class="idl">
307+
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
304308
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
305309
partial interface <span id="HTMLMarqueeElement-partial">HTMLMarqueeElement</span> { ... }
306310
partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</span> { ... }
@@ -314,6 +318,7 @@ partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</sp
314318
async fn only_partials() -> io::Result<()> {
315319
let document = parse_document_async(
316320
r#"
321+
<!DOCTYPE html>
317322
<pre><code class=idl>
318323
partial interface <span id=HTMLMarqueeElement-partial>HTMLMarqueeElement</span> { ... }
319324
partial interface <span id=HTMLMarqueeElement-partial-2>HTMLMarqueeElement</span> { ... }
@@ -330,7 +335,7 @@ INSERT INTERFACES HERE
330335
assert_eq!(
331336
serialize_for_test(&[document]),
332337
r##"
333-
<html><head></head><body><pre><code class="idl">
338+
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
334339
partial interface <span id="HTMLMarqueeElement-partial">HTMLMarqueeElement</span> { ... }
335340
partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</span> { ... }
336341
</code></pre>
@@ -343,6 +348,7 @@ partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</sp
343348
async fn marker_before() -> io::Result<()> {
344349
let document = parse_document_async(
345350
r#"
351+
<!DOCTYPE html>
346352
INSERT INTERFACES HERE
347353
<pre><code class=idl>
348354
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
@@ -357,20 +363,20 @@ interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
357363
proc.apply()?;
358364
assert_eq!(
359365
serialize_for_test(&[document]),
360-
r##"
361-
<html><head></head><body><ul class="brief"><li><code>HTMLMarqueeElement</code></li></ul>
366+
r#"
367+
<!DOCTYPE html><html><head></head><body><ul class="brief"><li><code>HTMLMarqueeElement</code></li></ul>
362368
<pre><code class="idl">
363369
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
364370
</code></pre></body></html>
365-
"##
371+
"#
366372
.trim()
367373
);
368374
Ok(())
369375
}
370376

371377
#[tokio::test]
372378
async fn no_marker() -> io::Result<()> {
373-
let document = parse_document_async("".as_bytes()).await?;
379+
let document = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
374380
let mut proc = Processor::new();
375381
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
376382
let result = proc.apply();
@@ -381,7 +387,8 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
381387
#[tokio::test]
382388
async fn duplicate_marker() -> io::Result<()> {
383389
let document = parse_document_async(
384-
"<div>INSERT INTERFACES HERE</div><div>INSERT INTERFACES HERE</div>".as_bytes(),
390+
"<!DOCTYPE html><div>INSERT INTERFACES HERE</div><div>INSERT INTERFACES HERE</div>"
391+
.as_bytes(),
385392
)
386393
.await?;
387394
let mut proc = Processor::new();
@@ -395,6 +402,7 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
395402
async fn duplicate_dfn() -> io::Result<()> {
396403
let document = parse_document_async(
397404
r#"
405+
<!DOCTYPE html>
398406
<pre><code class=idl>
399407
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
400408
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }

src/main.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,21 @@ mod dom_utils;
1414
mod interface_index;
1515
mod io_utils;
1616
mod parser;
17+
mod rcdom_with_line_numbers;
1718
mod represents;
1819
mod tag_omission;
1920

2021
#[tokio::main]
2122
async fn main() -> io::Result<()> {
23+
// This gives slightly prettier error-printing.
24+
if let Err(e) = run().await {
25+
eprintln!("{}", e);
26+
std::process::exit(1);
27+
}
28+
Ok(())
29+
}
30+
31+
async fn run() -> io::Result<()> {
2232
// Since we're using Rc in the DOM implementation, we must ensure that tasks
2333
// which act on it are confined to this thread.
2434

0 commit comments

Comments
 (0)