Skip to content

Commit ca23c69

Browse files
authored
Merge pull request #516 from jerriep/allow-overriding-literal-text-element-encoding
Allows the user to override literal text element content encoding
2 parents 3443acc + db2bcd3 commit ca23c69

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed

src/HtmlSanitizer/HtmlSanitizer.cs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ public HtmlSanitizer(HtmlSanitizerOptions options)
9999
AllowedAtRules = new HashSet<CssRuleType>(options.AllowedAtRules);
100100
}
101101

102+
/// <summary>
103+
/// Gets or sets the default <see cref="Action{IElement}"/> method that encodes literal text content.
104+
/// </summary>
105+
public Action<IElement> EncodeLiteralTextElementContent { get; set; } = DefaultEncodeLiteralTextElementContent;
106+
102107
/// <summary>
103108
/// Gets or sets the default value indicating whether to keep child nodes of elements that are removed. Default is false.
104109
/// </summary>
@@ -465,6 +470,15 @@ private void RemoveComments(INode context)
465470
}
466471
}
467472

473+
private static void DefaultEncodeLiteralTextElementContent(IElement tag)
474+
{
475+
var escapedHtml = tag.InnerHtml.Replace("<", "&lt;").Replace(">", "&gt;");
476+
if (escapedHtml != tag.InnerHtml)
477+
tag.InnerHtml = escapedHtml;
478+
if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript
479+
tag.SetInnerText(escapedHtml);
480+
}
481+
468482
private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = "")
469483
{
470484
// remove disallowed tags
@@ -479,11 +493,7 @@ private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl =
479493
&& t.Flags.HasFlag(NodeFlags.LiteralText)
480494
&& !string.IsNullOrWhiteSpace(t.InnerHtml)))
481495
{
482-
var escapedHtml = tag.InnerHtml.Replace("<", "&lt;").Replace(">", "&gt;");
483-
if (escapedHtml != tag.InnerHtml)
484-
tag.InnerHtml = escapedHtml;
485-
if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript
486-
tag.SetInnerText(escapedHtml);
496+
EncodeLiteralTextElementContent(tag);
487497
}
488498

489499
SanitizeStyleSheets(dom, baseUrl);

test/HtmlSanitizer.Tests/Tests.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3553,6 +3553,21 @@ public void Bypass4Test()
35533553
Assert.Equal(expected, sanitized);
35543554
}
35553555

3556+
[Fact]
3557+
public void OverrideLiteralTextElementContentEncoderTest()
3558+
{
3559+
var sanitizer = new HtmlSanitizer();
3560+
sanitizer.AllowedTags.Add("script");
3561+
sanitizer.EncodeLiteralTextElementContent = (e) =>
3562+
{
3563+
// Do nothing - we do not want to encode the custom element inside the <script> element
3564+
};
3565+
var bypass = @"<script><custom-element>abc</custom-element></script>";
3566+
var sanitized = sanitizer.Sanitize(bypass);
3567+
var expected = @"<script><custom-element>abc</custom-element></script>";
3568+
Assert.Equal(expected, sanitized);
3569+
}
3570+
35563571
[Fact]
35573572
public void InlineCssTest()
35583573
{

0 commit comments

Comments
 (0)