Skip to content

Commit 6689f19

Browse files
Kdecherfj0k3r
authored andcommitted
Strip script and style tags through ::clean() method instead of preg_replace
Huge tags can lead to a failure of preg_replace, thus erasing the whole fetched content. Fixes wallabag/wallabag#5847 Signed-off-by: Kevin Decherf <kevin@kdecherf.com>
1 parent 0c0653d commit 6689f19

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

src/Readability.php

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,6 @@ class Readability implements LoggerAwareInterface
108108
protected $useTidy;
109109
// raw HTML filters
110110
protected $pre_filters = [
111-
// remove obvious scripts
112-
'!<script[^>]*>(.*?)</script>!is' => '',
113-
// remove obvious styles
114-
'!<style[^>]*>(.*?)</style>!is' => '',
115111
// remove spans as we redefine styles and they're probably special-styled
116112
'!</?span[^>]*>!is' => '',
117113
// HACK: firewall-filtered content
@@ -366,6 +362,9 @@ public function prepArticle(\DOMNode $articleContent): void
366362

367363
$this->logger->debug($this->lightClean ? 'Light clean enabled.' : 'Standard clean enabled.');
368364

365+
$this->clean($articleContent, 'style');
366+
$this->clean($articleContent, 'script');
367+
369368
$this->cleanStyles($articleContent);
370369
$this->killBreaks($articleContent);
371370

0 commit comments

Comments
 (0)