Merge pull request #80 from jtojnar/stricter

j0k3r · web-flow · commit 38870cdff150 · 2023-04-03T14:47:32.000+02:00
Fix some CI issues
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
@@ -24,6 +24,8 @@ jobs:
           - "7.4"
           - "8.0"
           - "8.1"
+          - "8.2"
+          - "8.3"
 
     steps:
       - name: "Checkout"
diff --git a/composer.json b/composer.json
@@ -35,7 +35,7 @@
         "symfony/phpunit-bridge": "^4.4|^5.3|^6.0",
         "phpstan/phpstan": "^1.3",
         "phpstan/phpstan-phpunit": "^1.0",
-        "rector/rector": "^0.12.15"
+        "rector/rector": "^0.15.0"
     },
     "suggest": {
         "ext-tidy": "Used to clean up given HTML and to avoid problems with bad HTML structure."
@@ -45,5 +45,11 @@
     },
     "autoload-dev": {
         "psr-4": { "Tests\\Readability\\": "tests/" }
+    },
+    "scripts": {
+        "fix": "php-cs-fixer fix --verbose --diff",
+        "phpstan": "phpstan analyze --memory-limit 512M",
+        "rector": "rector process",
+        "test": "simple-phpunit -v"
     }
 }
diff --git a/phpstan.neon b/phpstan.neon
@@ -6,7 +6,7 @@ parameters:
 
     # https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288
     bootstrapFiles:
-        - vendor/bin/.phpunit/phpunit-8.5-0/vendor/autoload.php
+        - vendor/bin/.phpunit/phpunit/vendor/autoload.php
 
     checkMissingIterableValueType: false
 
diff --git a/rector.php b/rector.php
@@ -2,31 +2,30 @@
 
 declare(strict_types=1);
 
-use Rector\Core\Configuration\Option;
+use Rector\Config\RectorConfig;
 use Rector\Core\ValueObject\PhpVersion;
 use Rector\Set\ValueObject\LevelSetList;
-use Symfony\Component\DependencyInjection\Loader\Configurator\ContainerConfigurator;
-
-return static function (ContainerConfigurator $containerConfigurator): void {
-    $parameters = $containerConfigurator->parameters();
 
+return static function (RectorConfig $rectorConfig): void {
     // paths to refactor; solid alternative to CLI arguments
-    $parameters->set(Option::PATHS, [
+    $rectorConfig->paths([
         __DIR__ . '/src',
         __DIR__ . '/tests',
     ]);
 
     // Path to phpstan with extensions, that PHPSTan in Rector uses to determine types
-    $parameters->set(Option::PHPSTAN_FOR_RECTOR_PATH, __DIR__ . '/phpstan.neon');
+    $rectorConfig->phpstanConfig(__DIR__ . '/phpstan.neon');
 
-    $parameters->set(Option::BOOTSTRAP_FILES, [
-        __DIR__ . '/vendor/bin/.phpunit/phpunit-8.5-0/vendor/autoload.php',
+    $rectorConfig->bootstrapFiles([
+        __DIR__ . '/vendor/bin/.phpunit/phpunit/vendor/autoload.php',
         __DIR__ . '/vendor/autoload.php',
     ]);
 
     // Define what rule sets will be applied
-    $containerConfigurator->import(LevelSetList::UP_TO_PHP_72);
+    $rectorConfig->sets([
+        LevelSetList::UP_TO_PHP_72,
+    ]);
 
     // is your PHP version different from the one your refactor to?
-    $parameters->set(Option::PHP_VERSION_FEATURES, PhpVersion::PHP_72);
+    $rectorConfig->phpVersion(PhpVersion::PHP_72);
 };
diff --git a/src/JSLikeHTMLElement.php b/src/JSLikeHTMLElement.php
@@ -39,9 +39,9 @@ class JSLikeHTMLElement extends \DOMElement
     /**
      * Used for setting innerHTML like it's done in JavaScript:.
      *
-     * @code
+     * ```php
      * $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
-     * @endcode
+     * ```
      */
     public function __set($name, $value)
     {
@@ -79,14 +79,13 @@ public function __set($name, $value)
         } else {
             // $value is probably ill-formed
             $f = new \DOMDocument();
-            $value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8');
 
             // Using <htmlfragment> will generate a warning, but so will bad HTML
             // (and by this point, bad HTML is what we've got).
             // We use it (and suppress the warning) because an HTML fragment will
             // be wrapped around <html><body> tags which we don't really want to keep.
             // Note: despite the warning, if loadHTML succeeds it will return true.
-            $result = $f->loadHTML('<htmlfragment>' . $value . '</htmlfragment>');
+            $result = $f->loadHTML('<meta charset="utf-8"><htmlfragment>' . $value . '</htmlfragment>');
 
             if ($result) {
                 $import = $f->getElementsByTagName('htmlfragment')->item(0);
@@ -105,9 +104,9 @@ public function __set($name, $value)
     /**
      * Used for getting innerHTML like it's done in JavaScript:.
      *
-     * @code
+     * ```php
      * $string = $div->innerHTML;
-     * @endcode
+     * ```
      */
     public function __get($name)
     {
diff --git a/src/Readability.php b/src/Readability.php
@@ -2,7 +2,6 @@
 
 namespace Readability;
 
-use DOMElement;
 use Masterminds\HTML5;
 use Psr\Log\LoggerAwareInterface;
 use Psr\Log\LoggerInterface;
@@ -115,7 +114,7 @@ class Readability implements LoggerAwareInterface
         // HACK: replace linebreaks plus br's with p's
         '!(<br[^>]*>[ \r\n\s]*){2,}!i' => '</p><p>',
         // replace noscripts
-        //'!</?noscript>!is' => '',
+        // '!</?noscript>!is' => '',
         // replace fonts to spans
         '!<(/?)font[^>]*>!is' => '<\\1span>',
     ];
@@ -126,8 +125,8 @@ class Readability implements LoggerAwareInterface
         // replace empty tags that break layouts
         '!<(?:a|div|p|figure)[^>]+/>!is' => '',
         // remove all attributes on text tags
-        //'!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>",
-        //single newlines cleanup
+        // '!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>",
+        // single newlines cleanup
         "/\n+/" => "\n",
         // modern web...
         '!<pre[^>]*>\s*<code!is' => '<pre',
@@ -161,7 +160,7 @@ public function setLogger(LoggerInterface $logger): void
     /**
      * Get article title element.
      *
-     * @return DOMElement
+     * @return \DOMElement
      */
     public function getTitle()
     {
@@ -171,7 +170,7 @@ public function getTitle()
     /**
      * Get article content element.
      *
-     * @return DOMElement
+     * @return \DOMElement
      */
     public function getContent()
     {
@@ -280,7 +279,7 @@ public function init(): bool
     /**
      * Run any post-process modifications to article content as necessary.
      */
-    public function postProcessContent(DOMElement $articleContent): void
+    public function postProcessContent(\DOMElement $articleContent): void
     {
         if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) {
             $this->addFootnotes($articleContent);
@@ -292,7 +291,7 @@ public function postProcessContent(DOMElement $articleContent): void
      *
      * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
      */
-    public function addFootnotes(DOMElement $articleContent): void
+    public function addFootnotes(\DOMElement $articleContent): void
     {
         $footnotesWrapper = $this->dom->createElement('footer');
         $footnotesWrapper->setAttribute('class', 'readability-footnotes');
@@ -335,7 +334,7 @@ public function addFootnotes(DOMElement $articleContent): void
             $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
             $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
             $footnote->setInnerHtml('<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ');
-            $footnoteLink->setInnerHtml(('' !== $footnoteLink->getAttribute('title') ? $footnoteLink->getAttribute('title') : $linkText));
+            $footnoteLink->setInnerHtml('' !== $footnoteLink->getAttribute('title') ? $footnoteLink->getAttribute('title') : $linkText);
             $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
             $footnote->appendChild($footnoteLink);
 
@@ -356,7 +355,7 @@ public function addFootnotes(DOMElement $articleContent): void
      */
     public function prepArticle(\DOMNode $articleContent): void
     {
-        if (!$articleContent instanceof DOMElement) {
+        if (!$articleContent instanceof \DOMElement) {
             return;
         }
 
@@ -456,9 +455,9 @@ public function prepArticle(\DOMNode $articleContent): void
      * Get the inner text of a node.
      * This also strips out any excess whitespace to be found.
      *
-     * @param DOMElement $e
-     * @param bool       $normalizeSpaces (default: true)
-     * @param bool       $flattenLines    (default: false)
+     * @param \DOMElement $e
+     * @param bool        $normalizeSpaces (default: true)
+     * @param bool        $flattenLines    (default: false)
      */
     public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLines = false): string
     {
@@ -482,7 +481,7 @@ public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLine
     /**
      * Remove the style attribute on every $e and under.
      */
-    public function cleanStyles(DOMElement $e): void
+    public function cleanStyles(\DOMElement $e): void
     {
         if (\is_object($e)) {
             $elems = $e->getElementsByTagName('*');
@@ -515,7 +514,7 @@ public function getWordCount(string $text): int
      * This is the amount of text that is inside a link divided by the total text in the node.
      * Can exclude external references to differentiate between simple text and menus/infoblocks.
      */
-    public function getLinkDensity(DOMElement $e, bool $excludeExternal = false): float
+    public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): float
     {
         $links = $e->getElementsByTagName('a');
         $textLength = mb_strlen($this->getInnerText($e, true, true));
@@ -538,7 +537,7 @@ public function getLinkDensity(DOMElement $e, bool $excludeExternal = false): fl
     /**
      * Get an element relative weight.
      */
-    public function getWeight(DOMElement $e): int
+    public function getWeight(\DOMElement $e): int
     {
         if (!$this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) {
             return 0;
@@ -556,7 +555,7 @@ public function getWeight(DOMElement $e): int
     /**
      * Remove extraneous break tags from a node.
      */
-    public function killBreaks(DOMElement $node): void
+    public function killBreaks(\DOMElement $node): void
     {
         $html = $node->getInnerHTML();
         $html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
@@ -569,7 +568,7 @@ public function killBreaks(DOMElement $node): void
      *
      * Updated 2012-09-18 to preserve youtube/vimeo iframes
      */
-    public function clean(DOMElement $e, string $tag): void
+    public function clean(\DOMElement $e, string $tag): void
     {
         $targetList = $e->getElementsByTagName($tag);
         $isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag);
@@ -601,7 +600,7 @@ public function clean(DOMElement $e, string $tag): void
      * "Fishy" is an algorithm based on content length, classnames,
      * link density, number of images & embeds, etc.
      */
-    public function cleanConditionally(DOMElement $e, string $tag): void
+    public function cleanConditionally(\DOMElement $e, string $tag): void
     {
         if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
             return;
@@ -714,7 +713,7 @@ public function cleanConditionally(DOMElement $e, string $tag): void
     /**
      * Clean out spurious headers from an Element. Checks things like classnames and link density.
      */
-    public function cleanHeaders(DOMElement $e): void
+    public function cleanHeaders(\DOMElement $e): void
     {
         for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) {
             $headers = $e->getElementsByTagName('h' . $headerIndex);
@@ -754,7 +753,7 @@ public function removeFlag(int $flag): void
     /**
      * Get the article title as an H1.
      *
-     * @return DOMElement
+     * @return \DOMElement
      */
     protected function getArticleTitle()
     {
@@ -826,7 +825,7 @@ protected function prepDocument(): void
      * Initialize a node with the readability object. Also checks the
      * className/id for special names to add to its score.
      */
-    protected function initializeNode(DOMElement $node): void
+    protected function initializeNode(\DOMElement $node): void
     {
         if (!isset($node->tagName)) {
             return;
@@ -894,11 +893,11 @@ protected function initializeNode(DOMElement $node): void
      * Using a variety of metrics (content score, classname, element types), find the content that is
      * most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
      *
-     * @param DOMElement $page
+     * @param \DOMElement $page
      *
-     * @return DOMElement|false
+     * @return \DOMElement|false
      */
-    protected function grabArticle(DOMElement $page = null)
+    protected function grabArticle(\DOMElement $page = null)
     {
         if (!$page) {
             $page = $this->dom;
@@ -1040,7 +1039,7 @@ protected function grabArticle(DOMElement $page = null)
             // For every SCORE_CHARS_IN_PARAGRAPH (default:100) characters in this paragraph, add another point. Up to 3 points.
             $contentScore += min(floor(mb_strlen($innerText) / self::SCORE_CHARS_IN_PARAGRAPH), 3);
             // For every SCORE_WORDS_IN_PARAGRAPH (default:20) words in this paragraph, add another point. Up to 3 points.
-            //$contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3);
+            // $contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3);
 
             foreach ($ancestors as $level => $ancestor) {
                 if (!$ancestor->nodeName || !$ancestor->parentNode) {
@@ -1211,7 +1210,7 @@ protected function grabArticle(DOMElement $page = null)
         if (0 === strcasecmp($tagName, 'td') || 0 === strcasecmp($tagName, 'tr')) {
             $up = $topCandidate;
 
-            if ($up->parentNode instanceof DOMElement) {
+            if ($up->parentNode instanceof \DOMElement) {
                 $up = $up->parentNode;
 
                 if (0 === strcasecmp($up->tagName, 'table')) {
@@ -1292,8 +1291,8 @@ protected function grabArticle(DOMElement $page = null)
 
                 // To ensure a node does not interfere with readability styles, remove its classnames & ids.
                 // Now done via RegExp post_filter.
-                //$nodeToAppend->removeAttribute('class');
-                //$nodeToAppend->removeAttribute('id');
+                // $nodeToAppend->removeAttribute('class');
+                // $nodeToAppend->removeAttribute('id');
                 // Append sibling and subtract from our list as appending removes a node.
                 $articleContent->appendChild($nodeToAppend);
             }
@@ -1340,7 +1339,7 @@ protected function grabArticle(DOMElement $page = null)
      * Get an element weight by attribute.
      * Uses regular expressions to tell if this element looks good or bad.
      */
-    protected function weightAttribute(DOMElement $element, string $attribute): int
+    protected function weightAttribute(\DOMElement $element, string $attribute): int
     {
         if (!$element->hasAttribute($attribute)) {
             return 0;
@@ -1427,7 +1426,7 @@ private function loadHtml(): void
             unset($tidy);
         }
 
-        $this->html = mb_convert_encoding((string) $this->html, 'HTML-ENTITIES', 'UTF-8');
+        $this->html = '<meta charset="utf-8">' . (string) $this->html;
 
         if ('html5lib' === $this->parser || 'html5' === $this->parser) {
             $this->dom = (new HTML5())->loadHTML($this->html);
@@ -1443,14 +1442,14 @@ private function loadHtml(): void
             libxml_use_internal_errors(false);
         }
 
-        $this->dom->registerNodeClass(DOMElement::class, \Readability\JSLikeHTMLElement::class);
+        $this->dom->registerNodeClass(\DOMElement::class, \Readability\JSLikeHTMLElement::class);
     }
 
-    private function getAncestors(DOMElement $node, int $maxDepth = 0): array
+    private function getAncestors(\DOMElement $node, int $maxDepth = 0): array
     {
         $ancestors = [];
         $i = 0;
-        while ($node->parentNode instanceof DOMElement) {
+        while ($node->parentNode instanceof \DOMElement) {
             $ancestors[] = $node->parentNode;
             if (++$i === $maxDepth) {
                 break;
@@ -1470,7 +1469,7 @@ private function isPhrasingContent($node): bool
             }, iterator_to_array($node->childNodes)), true));
     }
 
-    private function hasSingleTagInsideElement(DOMElement $node, string $tag): bool
+    private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool
     {
         if (1 !== $node->childNodes->length || $node->childNodes->item(0)->nodeName !== $tag) {
             return false;
@@ -1490,11 +1489,11 @@ private function hasSingleTagInsideElement(DOMElement $node, string $tag): bool
      * Tidy must be configured to not clean the input for this function to
      * work as expected, see $this->tidy_config['clean']
      */
-    private function isNodeVisible(DOMElement $node): bool
+    private function isNodeVisible(\DOMElement $node): bool
     {
         return !($node->hasAttribute('style')
                     && preg_match($this->regexps['isNotVisible'], $node->getAttribute('style'))
-                )
+        )
                 && !$node->hasAttribute('hidden');
     }
 }
diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php

-Original file line number
+Diff line change
           - "7.4"
           - "8.0"
           - "8.1"
 +          - "8.2"
 +          - "8.3"
     steps:
       - name: "Checkout"