Skip to content

Commit d73813e

Browse files
committed
refactor: greedyMatch
1 parent 3934786 commit d73813e

File tree

4 files changed

+86
-35
lines changed

4 files changed

+86
-35
lines changed

README.md

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,41 @@ const unifiedContent = diff.getUnifiedContent()
2424
const sideBySideContents = diff.getSideBySideContents()
2525
```
2626

27-
You can use your own styles without import the css file
27+
## Options
2828

29-
```js
30-
import HtmlDiff from '@armantang/html-diff'
31-
32-
const oldHtml = `<div>hello</div>`
33-
const newHtml = `<div>hello world</div>`
29+
```ts
3430
const diff = new HtmlDiff(oldHtml, newHtml, {
35-
minMatchedSize: 3,
36-
classNames: {
37-
createText: 'cra-txt',
38-
deleteText: 'del-txt',
39-
createInline: 'cra-inl',
40-
deleteInline: 'del-inl',
41-
createBlock: 'cra-blo',
42-
deleteBlock: 'del-blo',
43-
},
31+
// options
4432
})
45-
const unifiedContent = diff.getUnifiedContent()
46-
const sideBySideContents = diff.getSideBySideContents()
33+
34+
interface HtmlDiffOptions {
35+
/**
36+
* Determine the minimum threshold for calculating common subsequences.
37+
* You may adjust it to a value larger than 2, but not lower, due to the potential inclusion of HTML tags in the count.
38+
* @defaultValue 2
39+
*/
40+
minMatchedSize?: number
41+
/**
42+
* When greedyMatch is enabled, if the length of the sub-sequences exceeds greedyBoundary,
43+
* we will use the matched sub-sequences that are sufficiently good, even if they are not optimal, to enhance performance.
44+
* @defaultValue true
45+
*/
46+
greedyMatch?: boolean
47+
/**
48+
* @defaultValue 1000
49+
*/
50+
greedyBoundary?: number
51+
/**
52+
* The classNames for wrapper DOM.
53+
* Use this to configure your own styles without importing the built-in CSS file
54+
*/
55+
classNames?: Partial<{
56+
createText?: string
57+
deleteText?: string
58+
createInline?: string
59+
deleteInline?: string
60+
createBlock?: string
61+
deleteBlock?: string
62+
}>
63+
}
4764
```

build/rollup.config.mjs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@ const mjsTask = {
2626
plugins: [
2727
typescript({
2828
tsconfig: resolve('tsconfig.json'),
29-
}),
30-
nodeResolve(),
29+
compilerOptions: {
30+
'removeComments': true,
31+
},
32+
}), nodeResolve(),
3133
],
3234
}
3335

@@ -41,8 +43,7 @@ const cssTask = {
4143
rollupPostcss({
4244
extract: true,
4345
plugins: [
44-
postcssNested,
45-
postcssPresetEnv({
46+
postcssNested, postcssPresetEnv({
4647
stage: 2,
4748
enableClientSidePolyfills: true,
4849
browsers: '> 0.5%, last 2 versions, not dead',

packages/html-diff/src/index.ts

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ type BaseOpType = 'delete' | 'create'
1818

1919
interface HtmlDiffConfig {
2020
minMatchedSize: number
21+
greedyMatch: boolean
22+
greedyBoundary: number
2123
classNames: {
2224
createText: string
2325
deleteText: string
@@ -29,7 +31,26 @@ interface HtmlDiffConfig {
2931
}
3032

3133
export interface HtmlDiffOptions {
34+
/**
35+
* Determine the minimum threshold for calculating common subsequences.
36+
* You may adjust it to a value larger than 2, but not lower, due to the potential inclusion of HTML tags in the count.
37+
* @defaultValue 2
38+
*/
3239
minMatchedSize?: number
40+
/**
41+
* When greedyMatch is enabled, if the length of the sub-sequences exceeds greedyBoundary,
42+
* we will use the matched sub-sequences that are sufficiently good, even if they are not optimal, to enhance performance.
43+
* @defaultValue true
44+
*/
45+
greedyMatch?: boolean
46+
/**
47+
* @defaultValue 1000
48+
*/
49+
greedyBoundary?: number
50+
/**
51+
* The classNames for wrapper DOM.
52+
* Use this to configure your own styles without importing the built-in CSS file
53+
*/
3354
classNames?: Partial<{
3455
createText?: string
3556
deleteText?: string
@@ -53,6 +74,7 @@ export default class HtmlDiff {
5374
private readonly newWords: string[] = []
5475
private readonly matchedBlockList: MatchedBlock[] = []
5576
private readonly operationList: Operation[] = []
77+
private leastCommonLength: number = Infinity
5678
private unifiedContent?: string
5779
private sideBySideContents?: [string, string]
5880

@@ -61,6 +83,8 @@ export default class HtmlDiff {
6183
newHtml: string,
6284
{
6385
minMatchedSize = 2,
86+
greedyMatch = true,
87+
greedyBoundary = 1000,
6488
classNames = {
6589
createText: 'html-diff-create-text-wrapper',
6690
deleteText: 'html-diff-delete-text-wrapper',
@@ -74,6 +98,8 @@ export default class HtmlDiff {
7498
// init config
7599
this.config = {
76100
minMatchedSize,
101+
greedyMatch,
102+
greedyBoundary,
77103
classNames: {
78104
createText: 'html-diff-create-text-wrapper',
79105
deleteText: 'html-diff-delete-text-wrapper',
@@ -93,8 +119,8 @@ export default class HtmlDiff {
93119
}
94120

95121
// step1: split HTML to atomic words
96-
this.oldWords = this.convertHtml2Words(oldHtml)
97-
this.newWords = this.convertHtml2Words(newHtml)
122+
this.oldWords = this.tokenize(oldHtml)
123+
this.newWords = this.tokenize(newHtml)
98124
// step2: find matched blocks
99125
this.matchedBlockList = this.getMatchedBlockList()
100126
// step3: generate operation list
@@ -277,11 +303,12 @@ export default class HtmlDiff {
277303
}
278304

279305
/**
280-
* convert HTML to word list
281-
* "<a> Hello World </a>"
306+
* convert HTML to tokens
307+
* @example
308+
* tokenize("<a> Hello World </a>")
282309
* ["<a>"," ", "Hello", " ", "World", " ", "</a>"]
283310
*/
284-
private convertHtml2Words(html: string): string[] {
311+
private tokenize(html: string): string[] {
285312
// atomic word: html tag、continuous numbers or letters、blank space、symbol or other word such as Chinese
286313
return (
287314
html.match(
@@ -329,19 +356,24 @@ export default class HtmlDiff {
329356
}
330357
}
331358

332-
const ret = this.computeMatchedBlockList(
333-
start ? i : 0,
334-
end ? e1 + 1 : n1,
335-
start ? i : 0,
336-
end ? e2 + 1 : n2,
337-
)
359+
const oldStart = start ? i : 0
360+
const oldEnd = end ? e1 + 1 : n1
361+
const newStart = start ? i : 0
362+
const newEnd = end ? e2 + 1 : n2
363+
// optimize for big sequences match
364+
if (this.config.greedyMatch) {
365+
const commonLength = Math.min(oldEnd - oldStart, newEnd - newStart)
366+
if (commonLength > this.config.greedyBoundary) {
367+
this.leastCommonLength = Math.floor(commonLength / 3)
368+
}
369+
}
370+
const ret = this.computeMatchedBlockList(oldStart, oldEnd, newStart, newEnd)
338371
if (start) ret.unshift(start)
339372
if (end) ret.push(end)
340373

341374
return ret
342375
}
343376

344-
// todo difflib
345377
private computeMatchedBlockList(
346378
oldStart: number,
347379
oldEnd: number,
@@ -390,13 +422,15 @@ export default class HtmlDiff {
390422
const ret = this.slideBestMatchedBlock(i, newStart, len)
391423
if (ret && (!bestMatchedBlock || ret.size > bestMatchedBlock.size)) {
392424
bestMatchedBlock = ret
425+
if (ret.size > this.leastCommonLength) return bestMatchedBlock
393426
}
394427
}
395428
for (let j = newStart; j < newEnd; j++) {
396429
const len = Math.min(oldEnd - oldStart, newEnd - j)
397430
const ret = this.slideBestMatchedBlock(oldStart, j, len)
398431
if (ret && (!bestMatchedBlock || ret.size > bestMatchedBlock.size)) {
399432
bestMatchedBlock = ret
433+
if (ret.size > this.leastCommonLength) return bestMatchedBlock
400434
}
401435
}
402436
return bestMatchedBlock

packages/html-diff/tsconfig.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
"noUnusedLocals": true,
1414
"noUnusedParameters": true,
1515
"strictNullChecks": true,
16-
"allowUnreachableCode": false,
17-
"removeComments": true
16+
"allowUnreachableCode": false
1817
},
1918
"include": ["src/**/*.ts", "tests/**/*.test.ts"]
2019
}

0 commit comments

Comments
 (0)