@@ -18,6 +18,8 @@ type BaseOpType = 'delete' | 'create'
18
18
19
19
interface HtmlDiffConfig {
20
20
minMatchedSize : number
21
+ greedyMatch : boolean
22
+ greedyBoundary : number
21
23
classNames : {
22
24
createText : string
23
25
deleteText : string
@@ -29,7 +31,26 @@ interface HtmlDiffConfig {
29
31
}
30
32
31
33
export interface HtmlDiffOptions {
34
+ /**
35
+ * Determine the minimum threshold for calculating common subsequences.
36
+ * You may adjust it to a value larger than 2, but not lower, due to the potential inclusion of HTML tags in the count.
37
+ * @defaultValue 2
38
+ */
32
39
minMatchedSize ?: number
40
+ /**
41
+ * When greedyMatch is enabled, if the length of the sub-sequences exceeds greedyBoundary,
42
+ * we will use the matched sub-sequences that are sufficiently good, even if they are not optimal, to enhance performance.
43
+ * @defaultValue true
44
+ */
45
+ greedyMatch ?: boolean
46
+ /**
47
+ * @defaultValue 1000
48
+ */
49
+ greedyBoundary ?: number
50
+ /**
51
+ * The classNames for wrapper DOM.
52
+ * Use this to configure your own styles without importing the built-in CSS file
53
+ */
33
54
classNames ?: Partial < {
34
55
createText ?: string
35
56
deleteText ?: string
@@ -53,6 +74,7 @@ export default class HtmlDiff {
53
74
private readonly newWords : string [ ] = [ ]
54
75
private readonly matchedBlockList : MatchedBlock [ ] = [ ]
55
76
private readonly operationList : Operation [ ] = [ ]
77
+ private leastCommonLength : number = Infinity
56
78
private unifiedContent ?: string
57
79
private sideBySideContents ?: [ string , string ]
58
80
@@ -61,6 +83,8 @@ export default class HtmlDiff {
61
83
newHtml : string ,
62
84
{
63
85
minMatchedSize = 2 ,
86
+ greedyMatch = true ,
87
+ greedyBoundary = 1000 ,
64
88
classNames = {
65
89
createText : 'html-diff-create-text-wrapper' ,
66
90
deleteText : 'html-diff-delete-text-wrapper' ,
@@ -74,6 +98,8 @@ export default class HtmlDiff {
74
98
// init config
75
99
this . config = {
76
100
minMatchedSize,
101
+ greedyMatch,
102
+ greedyBoundary,
77
103
classNames : {
78
104
createText : 'html-diff-create-text-wrapper' ,
79
105
deleteText : 'html-diff-delete-text-wrapper' ,
@@ -93,8 +119,8 @@ export default class HtmlDiff {
93
119
}
94
120
95
121
// step1: split HTML to atomic words
96
- this . oldWords = this . convertHtml2Words ( oldHtml )
97
- this . newWords = this . convertHtml2Words ( newHtml )
122
+ this . oldWords = this . tokenize ( oldHtml )
123
+ this . newWords = this . tokenize ( newHtml )
98
124
// step2: find matched blocks
99
125
this . matchedBlockList = this . getMatchedBlockList ( )
100
126
// step3: generate operation list
@@ -277,11 +303,12 @@ export default class HtmlDiff {
277
303
}
278
304
279
305
/**
280
- * convert HTML to word list
281
- * "<a> Hello World </a>"
306
+ * convert HTML to tokens
307
+ * @example
308
+ * tokenize("<a> Hello World </a>")
282
309
* ["<a>"," ", "Hello", " ", "World", " ", "</a>"]
283
310
*/
284
- private convertHtml2Words ( html : string ) : string [ ] {
311
+ private tokenize ( html : string ) : string [ ] {
285
312
// atomic word: html tag、continuous numbers or letters、blank space、symbol or other word such as Chinese
286
313
return (
287
314
html . match (
@@ -329,19 +356,24 @@ export default class HtmlDiff {
329
356
}
330
357
}
331
358
332
- const ret = this . computeMatchedBlockList (
333
- start ? i : 0 ,
334
- end ? e1 + 1 : n1 ,
335
- start ? i : 0 ,
336
- end ? e2 + 1 : n2 ,
337
- )
359
+ const oldStart = start ? i : 0
360
+ const oldEnd = end ? e1 + 1 : n1
361
+ const newStart = start ? i : 0
362
+ const newEnd = end ? e2 + 1 : n2
363
+ // optimize for big sequences match
364
+ if ( this . config . greedyMatch ) {
365
+ const commonLength = Math . min ( oldEnd - oldStart , newEnd - newStart )
366
+ if ( commonLength > this . config . greedyBoundary ) {
367
+ this . leastCommonLength = Math . floor ( commonLength / 3 )
368
+ }
369
+ }
370
+ const ret = this . computeMatchedBlockList ( oldStart , oldEnd , newStart , newEnd )
338
371
if ( start ) ret . unshift ( start )
339
372
if ( end ) ret . push ( end )
340
373
341
374
return ret
342
375
}
343
376
344
- // todo difflib
345
377
private computeMatchedBlockList (
346
378
oldStart : number ,
347
379
oldEnd : number ,
@@ -390,13 +422,15 @@ export default class HtmlDiff {
390
422
const ret = this . slideBestMatchedBlock ( i , newStart , len )
391
423
if ( ret && ( ! bestMatchedBlock || ret . size > bestMatchedBlock . size ) ) {
392
424
bestMatchedBlock = ret
425
+ if ( ret . size > this . leastCommonLength ) return bestMatchedBlock
393
426
}
394
427
}
395
428
for ( let j = newStart ; j < newEnd ; j ++ ) {
396
429
const len = Math . min ( oldEnd - oldStart , newEnd - j )
397
430
const ret = this . slideBestMatchedBlock ( oldStart , j , len )
398
431
if ( ret && ( ! bestMatchedBlock || ret . size > bestMatchedBlock . size ) ) {
399
432
bestMatchedBlock = ret
433
+ if ( ret . size > this . leastCommonLength ) return bestMatchedBlock
400
434
}
401
435
}
402
436
return bestMatchedBlock
0 commit comments