@@ -423,6 +423,7 @@ public function prepArticle(\DOMNode $articleContent): void
423
423
}
424
424
425
425
// Remove service data-candidate attribute.
426
+ /** @var \DOMNodeList<\DOMElement> */
426
427
$ elems = $ xpath ->query ('.//*[@data-candidate] ' , $ articleContent );
427
428
for ($ i = $ elems ->length - 1 ; $ i >= 0 ; --$ i ) {
428
429
$ elems ->item ($ i )->removeAttribute ('data-candidate ' );
@@ -1102,12 +1103,13 @@ protected function grabArticle(?\DOMElement $page = null)
1102
1103
* This is faster to do before scoring but safer after.
1103
1104
*/
1104
1105
if ($ this ->flagIsActive (self ::FLAG_STRIP_UNLIKELYS ) && $ xpath ) {
1106
+ /** @var \DOMNodeList<\DOMElement> */
1105
1107
$ candidates = $ xpath ->query ('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)] ' , $ page ->documentElement );
1106
1108
1107
1109
for ($ c = $ candidates ->length - 1 ; $ c >= 0 ; --$ c ) {
1108
1110
$ node = $ candidates ->item ($ c );
1109
1111
// node should be readable but not inside of an article otherwise it's probably non-readable block
1110
- if ($ node ->hasAttribute ('readability ' ) && (int ) $ node ->getAttributeNode ('readability ' )->value < 40 && ($ node ->parentNode ? 0 !== strcasecmp ($ node ->parentNode ->tagName , 'article ' ) : true )) {
1112
+ if ($ node ->hasAttribute ('readability ' ) && (int ) $ node ->getAttributeNode ('readability ' )->value < 40 && ($ node ->parentNode instanceof \DOMElement ? 0 !== strcasecmp ($ node ->parentNode ->tagName , 'article ' ) : true )) {
1111
1113
$ this ->logger ->debug ('Removing unlikely candidate (using note) ' . $ node ->getNodePath () . ' by " ' . $ node ->tagName . '" with readability ' . self ::getContentScore ($ node ));
1112
1114
$ node ->parentNode ->removeChild ($ node );
1113
1115
}
@@ -1128,6 +1130,7 @@ protected function grabArticle(?\DOMElement $page = null)
1128
1130
$ topCandidates = array_fill (0 , 5 , null );
1129
1131
if ($ xpath ) {
1130
1132
// Using array of DOMElements after deletion is a path to DOOMElement.
1133
+ /** @var \DOMNodeList<\DOMElement> */
1131
1134
$ candidates = $ xpath ->query ('.//*[@data-candidate] ' , $ page ->documentElement );
1132
1135
$ this ->logger ->debug ('Candidates: ' . $ candidates ->length );
1133
1136
@@ -1154,6 +1157,7 @@ protected function grabArticle(?\DOMElement $page = null)
1154
1157
}
1155
1158
}
1156
1159
1160
+ /** @var \DOMNodeList<\DOMElement> */
1157
1161
$ topCandidates = array_filter (
1158
1162
$ topCandidates ,
1159
1163
fn ($ v , $ idx ) => 0 === $ idx || null !== $ v ,
@@ -1276,19 +1280,19 @@ protected function grabArticle(?\DOMElement $page = null)
1276
1280
$ siblingNode = $ siblingNodes ->item ($ s );
1277
1281
$ siblingNodeName = $ siblingNode ->nodeName ;
1278
1282
$ append = false ;
1279
- $ this ->logger ->debug ('Looking at sibling node: ' . $ siblingNode ->getNodePath () . ((\ XML_ELEMENT_NODE === $ siblingNode-> nodeType && $ siblingNode ->hasAttribute ('readability ' )) ? (' with score ' . $ siblingNode ->getAttribute ('readability ' )) : '' ));
1283
+ $ this ->logger ->debug ('Looking at sibling node: ' . $ siblingNode ->getNodePath () . (($ siblingNode instanceof \DOMElement && $ siblingNode ->hasAttribute ('readability ' )) ? (' with score ' . $ siblingNode ->getAttribute ('readability ' )) : '' ));
1280
1284
1281
1285
if ($ siblingNode ->isSameNode ($ topCandidate )) {
1282
1286
$ append = true ;
1283
1287
} else {
1284
1288
$ contentBonus = 0 ;
1285
1289
1286
1290
// Give a bonus if sibling nodes and top candidates have the same classname.
1287
- if (\ XML_ELEMENT_NODE === $ siblingNode-> nodeType && $ siblingNode ->getAttribute ('class ' ) === $ topCandidate ->getAttribute ('class ' ) && '' !== $ topCandidate ->getAttribute ('class ' )) {
1291
+ if ($ siblingNode instanceof \DOMElement && $ siblingNode ->getAttribute ('class ' ) === $ topCandidate ->getAttribute ('class ' ) && '' !== $ topCandidate ->getAttribute ('class ' )) {
1288
1292
$ contentBonus += ((int ) $ topCandidate ->getAttribute ('readability ' )) * 0.2 ;
1289
1293
}
1290
1294
1291
- if (\ XML_ELEMENT_NODE === $ siblingNode-> nodeType && $ siblingNode ->hasAttribute ('readability ' ) && (((int ) $ siblingNode ->getAttribute ('readability ' )) + $ contentBonus ) >= $ siblingScoreThreshold ) {
1295
+ if ($ siblingNode instanceof \DOMElement && $ siblingNode ->hasAttribute ('readability ' ) && (((int ) $ siblingNode ->getAttribute ('readability ' )) + $ contentBonus ) >= $ siblingScoreThreshold ) {
1292
1296
$ append = true ;
1293
1297
} elseif (0 === strcasecmp ($ siblingNodeName , 'p ' )) {
1294
1298
$ linkDensity = (int ) $ this ->getLinkDensity ($ siblingNode );
@@ -1518,7 +1522,7 @@ private function getAncestors(\DOMElement $node, int $maxDepth = 0): array
1518
1522
1519
1523
private function isPhrasingContent ($ node ): bool
1520
1524
{
1521
- return \ XML_TEXT_NODE === $ node-> nodeType
1525
+ return $ node instanceof \DOMText
1522
1526
|| \in_array (strtoupper ($ node ->nodeName ), $ this ->phrasingElements , true )
1523
1527
|| (
1524
1528
\in_array (strtoupper ($ node ->nodeName ), ['A ' , 'DEL ' , 'INS ' ], true )
0 commit comments