Skip to content

Commit 2e04b7c

Browse files
committed
chore: add toMarkdown test case
1 parent 5b0cbe0 commit 2e04b7c

File tree

3 files changed

+62
-1
lines changed

3 files changed

+62
-1
lines changed

packages/agent-infra/shared/src/browser/to-markdown.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6-
import Turndown from 'turndown';
6+
import Turndown, { TagName } from 'turndown';
77
import { gfm } from 'turndown-plugin-gfm';
88

99
export interface ToMarkdownOptions extends Turndown.Options {
1010
gfmExtension?: boolean;
11+
removeTags?: TagName[];
1112
}
1213

1314
/**
@@ -29,6 +30,7 @@ export function toMarkdown(
2930
emDelimiter = '*',
3031
strongDelimiter = '**',
3132
gfmExtension = true,
33+
removeTags = ['script', 'style', 'link'],
3234
} = options;
3335

3436
const turndown = new Turndown({
@@ -38,6 +40,9 @@ export function toMarkdown(
3840
strongDelimiter,
3941
});
4042

43+
// issue: https://github.com/mixmark-io/turndown/issues/210#issuecomment-353666857
44+
turndown.remove(removeTags);
45+
4146
if (gfmExtension) {
4247
turndown.use(gfm);
4348
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { toMarkdown } from '../../src/browser/to-markdown';
3+
4+
describe('toMarkdown', () => {
5+
it('should convert HTML to Markdown', () => {
6+
const html = '<h1>Hello, world!</h1>';
7+
const markdown = toMarkdown(html);
8+
expect(markdown).toBe('# Hello, world!');
9+
});
10+
11+
it('should remove tags', () => {
12+
const html = `<html>
13+
<head>
14+
<script formula-runtime >function e(e){for(var r=1;r<arguments.length;r++){}}</script>
15+
<body>
16+
<div id="app"><!--[--><!--[--><!--[--><!--[--><!--[--><!----><!---->
17+
<div id="global" data-logged="0" class="layout limit" style="--40bdee49:1728px;" data-v-34b87540>
18+
<div class="header-container" data-v-34b87540 style="--67f219a2:1728px;" data-v-5c1b2170>
19+
<header class="mask-paper" data-v-5c1b2170>
20+
<a aria-current="page" href="/explore" class="active router-link-exact-active" id="link-guide" style="display:flex;" data-v-5c1b2170>
21+
<img crossorigin="anonymous" class="header-logo" style="pointer-events:none;" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAM0AAABgCAYAAAC" data-v-5c1b2170>
22+
</a><!--[-->
23+
<div class="input-box" data-v-721de8bd>
24+
<p>Hello World</p>
25+
<input id="search-input" value="" type="text" spellcheck="false" class="search-input" placeholder="登录" autocomplete="off" data-v-721de8bd><!---->
26+
<div class="input-button" data-v-721de8bd><!---->
27+
<div class="search-icon" data-v-721de8bd>
28+
<svg class="reds-icon" width="20" height="20" data-v-721de8bd data-v-55b36ac6><use xlink:href="#search" data-v-55b36ac6></use></svg>
29+
</div>
30+
</div>
31+
</div>
32+
</header>
33+
</div>
34+
</div>
35+
</div>
36+
</body>
37+
</html>`;
38+
const markdown = toMarkdown(html);
39+
expect(markdown)
40+
.toEqual(`[![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAM0AAABgCAYAAAC)](/explore)
41+
42+
Hello World`);
43+
});
44+
});
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/**
2+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
import { defineConfig } from 'vitest/config';
6+
7+
export default defineConfig({
8+
test: {
9+
environment: 'node',
10+
include: ['**/*.{test,spec}.{js,mjs,cjs,ts,mts,cts,jsx,tsx}'],
11+
},
12+
});

0 commit comments

Comments
 (0)