@@ -302,8 +302,7 @@ public function addFootnotes(\DOMElement $articleContent): void
302
302
$ articleLinks = $ articleContent ->getElementsByTagName ('a ' );
303
303
$ linkCount = 0 ;
304
304
305
- for ($ i = 0 ; $ i < $ articleLinks ->length ; ++$ i ) {
306
- $ articleLink = $ articleLinks ->item ($ i );
305
+ foreach ($ articleLinks as $ articleLink ) {
307
306
$ footnoteLink = $ articleLink ->cloneNode (true );
308
307
$ refLink = $ this ->dom ->createElement ('a ' );
309
308
$ footnote = $ this ->dom ->createElement ('li ' );
@@ -383,8 +382,8 @@ public function prepArticle(\DOMNode $articleContent): void
383
382
384
383
// Remove service data-candidate attribute.
385
384
$ elems = $ xpath ->query ('.//*[@data-candidate] ' , $ articleContent );
386
- for ($ i = $ elems-> length - 1 ; $ i >= 0 ; -- $ i ) {
387
- $ elems -> item ( $ i ) ->removeAttribute ('data-candidate ' );
385
+ foreach ($ elems as $ elem ) {
386
+ $ elem ->removeAttribute ('data-candidate ' );
388
387
}
389
388
390
389
// Clean out junk from the article content.
@@ -520,11 +519,12 @@ public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): f
520
519
$ textLength = mb_strlen ($ this ->getInnerText ($ e , true , true ));
521
520
$ linkLength = 0 ;
522
521
523
- for ($ dRe = $ this ->domainRegExp , $ i = 0 , $ il = $ links ->length ; $ i < $ il ; ++$ i ) {
524
- if ($ excludeExternal && $ dRe && !preg_match ($ dRe , $ links ->item ($ i )->getAttribute ('href ' ))) {
522
+ $ dRe = $ this ->domainRegExp ;
523
+ foreach ($ links as $ link ) {
524
+ if ($ excludeExternal && $ dRe && !preg_match ($ dRe , $ link ->getAttribute ('href ' ))) {
525
525
continue ;
526
526
}
527
- $ linkLength += mb_strlen ($ this ->getInnerText ($ links -> item ( $ i ) ));
527
+ $ linkLength += mb_strlen ($ this ->getInnerText ($ link ));
528
528
}
529
529
530
530
if ($ textLength > 0 && $ linkLength > 0 ) {
@@ -586,7 +586,7 @@ public function clean(\DOMElement $e, string $tag): void
586
586
}
587
587
588
588
// Then check the elements inside this element for the same.
589
- if (preg_match ($ this ->regexps ['media ' ], $ targetList -> item ( $ y ) ->getInnerHTML ())) {
589
+ if (preg_match ($ this ->regexps ['media ' ], $ currentItem ->getInnerHTML ())) {
590
590
continue ;
591
591
}
592
592
}
@@ -640,15 +640,15 @@ public function cleanConditionally(\DOMElement $e, string $tag): void
640
640
$ embedCount = 0 ;
641
641
$ embeds = $ node ->getElementsByTagName ('embed ' );
642
642
643
- for ($ ei = 0 , $ il = $ embeds-> length ; $ ei < $ il ; ++ $ ei ) {
644
- if (preg_match ($ this ->regexps ['media ' ], $ embeds -> item ( $ ei ) ->getAttribute ('src ' ))) {
643
+ foreach ($ embeds as $ embed ) {
644
+ if (preg_match ($ this ->regexps ['media ' ], $ embed ->getAttribute ('src ' ))) {
645
645
++$ embedCount ;
646
646
}
647
647
}
648
648
649
649
$ embeds = $ node ->getElementsByTagName ('iframe ' );
650
- for ($ ei = 0 , $ il = $ embeds-> length ; $ ei < $ il ; ++ $ ei ) {
651
- if (preg_match ($ this ->regexps ['media ' ], $ embeds -> item ( $ ei ) ->getAttribute ('src ' ))) {
650
+ foreach ($ embeds as $ embed ) {
651
+ if (preg_match ($ this ->regexps ['media ' ], $ embed ->getAttribute ('src ' ))) {
652
652
++$ embedCount ;
653
653
}
654
654
}
@@ -719,8 +719,9 @@ public function cleanHeaders(\DOMElement $e): void
719
719
$ headers = $ e ->getElementsByTagName ('h ' . $ headerIndex );
720
720
721
721
for ($ i = $ headers ->length - 1 ; $ i >= 0 ; --$ i ) {
722
- if ($ this ->getWeight ($ headers ->item ($ i )) < 0 || $ this ->getLinkDensity ($ headers ->item ($ i )) > 0.33 ) {
723
- $ headers ->item ($ i )->parentNode ->removeChild ($ headers ->item ($ i ));
722
+ $ header = $ headers ->item ($ i );
723
+ if ($ this ->getWeight ($ header ) < 0 || $ this ->getLinkDensity ($ header ) > 0.33 ) {
724
+ $ header ->parentNode ->removeChild ($ header );
724
725
}
725
726
}
726
727
}
@@ -812,12 +813,14 @@ protected function prepDocument(): void
812
813
// Remove all style tags in head.
813
814
$ styleTags = $ this ->dom ->getElementsByTagName ('style ' );
814
815
for ($ i = $ styleTags ->length - 1 ; $ i >= 0 ; --$ i ) {
815
- $ styleTags ->item ($ i )->parentNode ->removeChild ($ styleTags ->item ($ i ));
816
+ $ styleTag = $ styleTags ->item ($ i );
817
+ $ styleTag ->parentNode ->removeChild ($ styleTag );
816
818
}
817
819
818
820
$ linkTags = $ this ->dom ->getElementsByTagName ('link ' );
819
821
for ($ i = $ linkTags ->length - 1 ; $ i >= 0 ; --$ i ) {
820
- $ linkTags ->item ($ i )->parentNode ->removeChild ($ linkTags ->item ($ i ));
822
+ $ linkTag = $ linkTags ->item ($ i );
823
+ $ linkTag ->parentNode ->removeChild ($ linkTag );
821
824
}
822
825
}
823
826
@@ -1015,15 +1018,15 @@ protected function grabArticle(?\DOMElement $page = null)
1015
1018
* A score is determined by things like number of commas, class names, etc.
1016
1019
* Maybe eventually link density.
1017
1020
*/
1018
- for ($ pt = 0 , $ scored = \count ( $ nodesToScore); $ pt < $ scored ; ++ $ pt ) {
1019
- $ ancestors = $ this ->getAncestors ($ nodesToScore [ $ pt ] , 5 );
1021
+ foreach ($ nodesToScore as $ nodeToScore ) {
1022
+ $ ancestors = $ this ->getAncestors ($ nodeToScore , 5 );
1020
1023
1021
1024
// No parent node? Move on...
1022
1025
if (0 === \count ($ ancestors )) {
1023
1026
continue ;
1024
1027
}
1025
1028
1026
- $ innerText = $ this ->getInnerText ($ nodesToScore [ $ pt ] );
1029
+ $ innerText = $ this ->getInnerText ($ nodeToScore );
1027
1030
1028
1031
// If this paragraph is less than MIN_PARAGRAPH_LENGTH (default:20) characters, don't even count it.
1029
1032
if (mb_strlen ($ innerText ) < self ::MIN_PARAGRAPH_LENGTH ) {
@@ -1076,11 +1079,6 @@ protected function grabArticle(?\DOMElement $page = null)
1076
1079
}
1077
1080
}
1078
1081
1079
- $ candidates = $ xpath ->query ('.//*[not(self::body) and (@class or @id or @style) and ((number(@readability) < 40) or not(@readability))] ' , $ page ->documentElement );
1080
-
1081
- for ($ c = $ candidates ->length - 1 ; $ c >= 0 ; --$ c ) {
1082
- $ node = $ candidates ->item ($ c );
1083
- }
1084
1082
unset($ candidates );
1085
1083
}
1086
1084
@@ -1231,11 +1229,6 @@ protected function grabArticle(?\DOMElement $page = null)
1231
1229
$ parentOfTopCandidate = $ topCandidate ->parentNode ;
1232
1230
$ siblingNodes = $ parentOfTopCandidate ->childNodes ;
1233
1231
1234
- if (0 === $ siblingNodes ->length ) {
1235
- $ siblingNodes = new \stdClass ();
1236
- $ siblingNodes ->length = 0 ;
1237
- }
1238
-
1239
1232
for ($ s = 0 , $ sl = $ siblingNodes ->length ; $ s < $ sl ; ++$ s ) {
1240
1233
$ siblingNode = $ siblingNodes ->item ($ s );
1241
1234
$ siblingNodeName = $ siblingNode ->nodeName ;
0 commit comments