@@ -142,7 +142,7 @@ class Readability implements LoggerAwareInterface
142
142
* @param string $parser Which parser to use for turning raw HTML into a DOMDocument
143
143
* @param bool $useTidy Use tidy
144
144
*/
145
- public function __construct (string $ html , string $ url = null , string $ parser = 'libxml ' , bool $ useTidy = true )
145
+ public function __construct (string $ html , ? string $ url = null , string $ parser = 'libxml ' , bool $ useTidy = true )
146
146
{
147
147
$ this ->url = $ url ;
148
148
$ this ->html = $ html ;
@@ -739,15 +739,15 @@ public function flagIsActive(int $flag): bool
739
739
*/
740
740
public function addFlag (int $ flag ): void
741
741
{
742
- $ this ->flags = $ this -> flags | $ flag ;
742
+ $ this ->flags |= $ flag ;
743
743
}
744
744
745
745
/**
746
746
* Remove a flag.
747
747
*/
748
748
public function removeFlag (int $ flag ): void
749
749
{
750
- $ this ->flags = $ this -> flags & ~$ flag ;
750
+ $ this ->flags &= ~$ flag ;
751
751
}
752
752
753
753
/**
@@ -893,11 +893,9 @@ protected function initializeNode(\DOMElement $node): void
893
893
* Using a variety of metrics (content score, classname, element types), find the content that is
894
894
* most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
895
895
*
896
- * @param \DOMElement $page
897
- *
898
896
* @return \DOMElement|false
899
897
*/
900
- protected function grabArticle (\DOMElement $ page = null )
898
+ protected function grabArticle (? \DOMElement $ page = null )
901
899
{
902
900
if (!$ page ) {
903
901
$ page = $ this ->dom ;
@@ -933,9 +931,9 @@ protected function grabArticle(\DOMElement $page = null)
933
931
// Remove unlikely candidates
934
932
$ unlikelyMatchString = $ node ->getAttribute ('class ' ) . ' ' . $ node ->getAttribute ('id ' ) . ' ' . $ node ->getAttribute ('style ' );
935
933
936
- if (mb_strlen ($ unlikelyMatchString ) > 3 && // don't process "empty" strings
937
- preg_match ($ this ->regexps ['unlikelyCandidates ' ], $ unlikelyMatchString ) &&
938
- !preg_match ($ this ->regexps ['okMaybeItsACandidate ' ], $ unlikelyMatchString )
934
+ if (mb_strlen ($ unlikelyMatchString ) > 3 // don't process "empty" strings
935
+ && preg_match ($ this ->regexps ['unlikelyCandidates ' ], $ unlikelyMatchString )
936
+ && !preg_match ($ this ->regexps ['okMaybeItsACandidate ' ], $ unlikelyMatchString )
939
937
) {
940
938
$ this ->logger ->debug ('Removing unlikely candidate (using conf) ' . $ node ->getNodePath () . ' by " ' . $ unlikelyMatchString . '" ' );
941
939
$ node ->parentNode ->removeChild ($ node );
@@ -1120,9 +1118,13 @@ protected function grabArticle(\DOMElement $page = null)
1120
1118
}
1121
1119
}
1122
1120
1123
- $ topCandidates = array_filter ($ topCandidates , function ($ v , $ idx ) {
1124
- return 0 === $ idx || null !== $ v ;
1125
- }, \ARRAY_FILTER_USE_BOTH );
1121
+ $ topCandidates = array_filter (
1122
+ $ topCandidates ,
1123
+ function ($ v , $ idx ) {
1124
+ return 0 === $ idx || null !== $ v ;
1125
+ },
1126
+ \ARRAY_FILTER_USE_BOTH
1127
+ );
1126
1128
$ topCandidate = $ topCandidates [0 ];
1127
1129
1128
1130
/*
@@ -1442,7 +1444,7 @@ private function loadHtml(): void
1442
1444
libxml_use_internal_errors (false );
1443
1445
}
1444
1446
1445
- $ this ->dom ->registerNodeClass (\DOMElement::class, \ Readability \ JSLikeHTMLElement::class);
1447
+ $ this ->dom ->registerNodeClass (\DOMElement::class, JSLikeHTMLElement::class);
1446
1448
}
1447
1449
1448
1450
private function getAncestors (\DOMElement $ node , int $ maxDepth = 0 ): array
@@ -1464,9 +1466,18 @@ private function isPhrasingContent($node): bool
1464
1466
{
1465
1467
return \XML_TEXT_NODE === $ node ->nodeType
1466
1468
|| \in_array (strtoupper ($ node ->nodeName ), $ this ->phrasingElements , true )
1467
- || (\in_array (strtoupper ($ node ->nodeName ), ['A ' , 'DEL ' , 'INS ' ], true ) && !\in_array (false , array_map (function ($ c ) {
1468
- return $ this ->isPhrasingContent ($ c );
1469
- }, iterator_to_array ($ node ->childNodes )), true ));
1469
+ || (\in_array (strtoupper ($ node ->nodeName ), ['A ' , 'DEL ' , 'INS ' ], true )
1470
+ && !\in_array (
1471
+ false ,
1472
+ array_map (
1473
+ function ($ c ) {
1474
+ return $ this ->isPhrasingContent ($ c );
1475
+ },
1476
+ iterator_to_array ($ node ->childNodes )
1477
+ ),
1478
+ true
1479
+ )
1480
+ );
1470
1481
}
1471
1482
1472
1483
private function hasSingleTagInsideElement (\DOMElement $ node , string $ tag ): bool
@@ -1475,10 +1486,12 @@ private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool
1475
1486
return false ;
1476
1487
}
1477
1488
1478
- $ a = array_filter (iterator_to_array ($ node ->childNodes ), function ($ childNode ) {
1479
- return $ childNode instanceof \DOMText &&
1480
- preg_match ($ this ->regexps ['hasContent ' ], $ this ->getInnerText ($ childNode ));
1481
- });
1489
+ $ a = array_filter (
1490
+ iterator_to_array ($ node ->childNodes ),
1491
+ function ($ childNode ) {
1492
+ return $ childNode instanceof \DOMText && preg_match ($ this ->regexps ['hasContent ' ], $ this ->getInnerText ($ childNode ));
1493
+ }
1494
+ );
1482
1495
1483
1496
return 0 === \count ($ a );
1484
1497
}
@@ -1491,9 +1504,10 @@ private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool
1491
1504
*/
1492
1505
private function isNodeVisible (\DOMElement $ node ): bool
1493
1506
{
1494
- return !($ node ->hasAttribute ('style ' )
1495
- && preg_match ($ this ->regexps ['isNotVisible ' ], $ node ->getAttribute ('style ' ))
1507
+ return !(
1508
+ $ node ->hasAttribute ('style ' )
1509
+ && preg_match ($ this ->regexps ['isNotVisible ' ], $ node ->getAttribute ('style ' ))
1496
1510
)
1497
- && !$ node ->hasAttribute ('hidden ' );
1511
+ && !$ node ->hasAttribute ('hidden ' );
1498
1512
}
1499
1513
}
0 commit comments