Skip to content

Commit 161f7f1

Browse files
committed
Defer resolution of link reference definitions
We must not remove link reference definitions until we check for list tightness. This commit defers resolving of link reference definitions until finalization of the document. We still need to eagerly remove link reference definitions in setext headings to determine whether it is a setext heading or a thematic break. So this commit provides slightly different functions for resolving link reference definitions and checking if a line is blank for `cmark_strbuf` and `cmark_chunk`.
1 parent ae9d580 commit 161f7f1

File tree

2 files changed

+114
-15
lines changed

2 files changed

+114
-15
lines changed

src/blocks.c

Lines changed: 111 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,10 @@ void cmark_parser_free(cmark_parser *parser) {
135135
static cmark_node *finalize(cmark_parser *parser, cmark_node *b);
136136

137137
// Returns true if line has only space characters, else false.
138-
static bool is_blank(cmark_strbuf *s, bufsize_t offset) {
139-
while (offset < s->size) {
140-
switch (s->ptr[offset]) {
138+
static bool is_blank_raw(const unsigned char *ptr, const bufsize_t size,
139+
bufsize_t offset) {
140+
while (offset < size) {
141+
switch (ptr[offset]) {
141142
case '\r':
142143
case '\n':
143144
return true;
@@ -155,6 +156,17 @@ static bool is_blank(cmark_strbuf *s, bufsize_t offset) {
155156
return true;
156157
}
157158

159+
// Returns true if line has only space characters, else false.
160+
static CMARK_INLINE bool is_blank_strbuf(cmark_strbuf *s, bufsize_t offset) {
161+
return is_blank_raw(s->ptr, s->size, offset);
162+
}
163+
164+
// Returns true if line has only space characters, else false.
165+
static CMARK_INLINE bool is_blank_chunk(cmark_chunk *s, bufsize_t offset) {
166+
return is_blank_raw(s->data, s->len, offset);
167+
}
168+
169+
158170
static CMARK_INLINE bool can_contain(cmark_node_type parent_type,
159171
cmark_node_type child_type) {
160172
return (parent_type == CMARK_NODE_DOCUMENT ||
@@ -244,15 +256,103 @@ static bool resolve_reference_link_definitions(cmark_parser *parser) {
244256
chunk.len -= pos;
245257
}
246258
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
247-
return !is_blank(node_content, 0);
259+
return !is_blank_strbuf(node_content, 0);
260+
}
261+
262+
// Parse link reference definitions in the given finalized paragraph.
263+
// Results are added to `refmap` of the parser.
264+
// `data`, `len`, `start_line`, and `start_column` of the parser will be
265+
// updated.
266+
// If the paragraph contains only link reference definitions, it is removed
267+
// from the tree and freed.
268+
static void resolve_deferred_reference_link_definitions(cmark_parser *parser,
269+
cmark_node *paragraph) {
270+
bufsize_t pos;
271+
cmark_chunk chunk = {paragraph->data, paragraph->len};
272+
int new_start_line;
273+
int new_start_column;
274+
unsigned char *p;
275+
unsigned char *resized;
276+
277+
while (chunk.len && chunk.data[0] == '[' &&
278+
(pos = cmark_parse_reference_inline(parser->mem, &chunk,
279+
parser->refmap))) {
280+
281+
chunk.data += pos;
282+
chunk.len -= pos;
283+
}
284+
285+
if (paragraph->data == chunk.data) {
286+
// No definitions found.
287+
return;
288+
}
289+
290+
if (chunk.len == 0) {
291+
// The paragraph contained only definitions.
292+
cmark_node_free(paragraph);
293+
} else {
294+
// Adjust the start position and the data.
295+
296+
new_start_line = paragraph->start_line;
297+
new_start_column = paragraph->start_column;
298+
299+
for (p = paragraph->data; p < chunk.data; p++) {
300+
switch (*p) {
301+
case '\r':
302+
new_start_line++;
303+
new_start_column = 0;
304+
if (p + 1 < chunk.data && p[1] == '\n') {
305+
p++;
306+
}
307+
break;
308+
case '\n':
309+
new_start_line++;
310+
new_start_column = 0;
311+
break;
312+
default:
313+
new_start_column++;
314+
break;
315+
}
316+
}
317+
318+
paragraph->start_line = new_start_line;
319+
paragraph->start_column = new_start_column;
320+
321+
memmove(paragraph->data, chunk.data, chunk.len);
322+
resized = parser->mem->realloc(paragraph->data, chunk.len);
323+
chunk.data = resized;
324+
paragraph->data = resized;
325+
paragraph->len = chunk.len;
326+
327+
if (is_blank_chunk(&chunk, 0)) {
328+
cmark_node_free(paragraph);
329+
}
330+
}
331+
}
332+
333+
static void resolve_all_reference_link_definitions(cmark_parser *parser) {
334+
cmark_iter *iter = cmark_iter_new(parser->root);
335+
cmark_node *cur;
336+
cmark_event_type ev_type;
337+
338+
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
339+
cur = cmark_iter_get_node(iter);
340+
// Process at exit so we can free the node if it contains only definitions.
341+
if (ev_type == CMARK_EVENT_EXIT) {
342+
if (S_type(cur) == CMARK_NODE_PARAGRAPH) {
343+
resolve_deferred_reference_link_definitions(parser, cur);
344+
}
345+
}
346+
}
347+
348+
cmark_iter_free(iter);
248349
}
249350

250351
static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
251352
bufsize_t pos;
252353
cmark_node *item;
253354
cmark_node *subitem;
254355
cmark_node *parent;
255-
bool has_content;
256356

257357
parent = b->parent;
258358
assert(b->flags &
@@ -281,17 +381,9 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
281381

282382
switch (S_type(b)) {
283383
case CMARK_NODE_PARAGRAPH:
284-
{
285-
has_content = resolve_reference_link_definitions(parser);
286-
if (!has_content) {
287-
// remove blank node (former reference def)
288-
cmark_node_free(b);
289-
} else {
290-
b->len = node_content->size;
291-
b->data = cmark_strbuf_detach(node_content);
292-
}
384+
b->len = node_content->size;
385+
b->data = cmark_strbuf_detach(node_content);
293386
break;
294-
}
295387

296388
case CMARK_NODE_CODE_BLOCK:
297389
if (!b->as.code.fenced) { // indented code
@@ -360,6 +452,10 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
360452

361453
break;
362454

455+
case CMARK_NODE_DOCUMENT:
456+
resolve_all_reference_link_definitions(parser);
457+
break;
458+
363459
default:
364460
break;
365461
}

src/node.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ static void S_free_nodes(cmark_node *e) {
127127
mem->free(e->as.custom.on_enter);
128128
mem->free(e->as.custom.on_exit);
129129
break;
130+
case CMARK_NODE_PARAGRAPH:
131+
mem->free(e->data);
132+
break;
130133
default:
131134
break;
132135
}

0 commit comments

Comments
 (0)