Skip to content

Commit 38870cd

Browse files
authored
Merge pull request #80 from jtojnar/stricter
Fix some CI issues
2 parents 7cd8476 + 9bdd3b6 commit 38870cd

File tree

7 files changed

+104
-91
lines changed

7 files changed

+104
-91
lines changed

.github/workflows/continuous-integration.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ jobs:
2424
- "7.4"
2525
- "8.0"
2626
- "8.1"
27+
- "8.2"
28+
- "8.3"
2729

2830
steps:
2931
- name: "Checkout"

composer.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
"symfony/phpunit-bridge": "^4.4|^5.3|^6.0",
3636
"phpstan/phpstan": "^1.3",
3737
"phpstan/phpstan-phpunit": "^1.0",
38-
"rector/rector": "^0.12.15"
38+
"rector/rector": "^0.15.0"
3939
},
4040
"suggest": {
4141
"ext-tidy": "Used to clean up given HTML and to avoid problems with bad HTML structure."
@@ -45,5 +45,11 @@
4545
},
4646
"autoload-dev": {
4747
"psr-4": { "Tests\\Readability\\": "tests/" }
48+
},
49+
"scripts": {
50+
"fix": "php-cs-fixer fix --verbose --diff",
51+
"phpstan": "phpstan analyze --memory-limit 512M",
52+
"rector": "rector process",
53+
"test": "simple-phpunit -v"
4854
}
4955
}

phpstan.neon

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ parameters:
66

77
# https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288
88
bootstrapFiles:
9-
- vendor/bin/.phpunit/phpunit-8.5-0/vendor/autoload.php
9+
- vendor/bin/.phpunit/phpunit/vendor/autoload.php
1010

1111
checkMissingIterableValueType: false
1212

rector.php

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,30 @@
22

33
declare(strict_types=1);
44

5-
use Rector\Core\Configuration\Option;
5+
use Rector\Config\RectorConfig;
66
use Rector\Core\ValueObject\PhpVersion;
77
use Rector\Set\ValueObject\LevelSetList;
8-
use Symfony\Component\DependencyInjection\Loader\Configurator\ContainerConfigurator;
9-
10-
return static function (ContainerConfigurator $containerConfigurator): void {
11-
$parameters = $containerConfigurator->parameters();
128

9+
return static function (RectorConfig $rectorConfig): void {
1310
// paths to refactor; solid alternative to CLI arguments
14-
$parameters->set(Option::PATHS, [
11+
$rectorConfig->paths([
1512
__DIR__ . '/src',
1613
__DIR__ . '/tests',
1714
]);
1815

1916
// Path to phpstan with extensions, that PHPSTan in Rector uses to determine types
20-
$parameters->set(Option::PHPSTAN_FOR_RECTOR_PATH, __DIR__ . '/phpstan.neon');
17+
$rectorConfig->phpstanConfig(__DIR__ . '/phpstan.neon');
2118

22-
$parameters->set(Option::BOOTSTRAP_FILES, [
23-
__DIR__ . '/vendor/bin/.phpunit/phpunit-8.5-0/vendor/autoload.php',
19+
$rectorConfig->bootstrapFiles([
20+
__DIR__ . '/vendor/bin/.phpunit/phpunit/vendor/autoload.php',
2421
__DIR__ . '/vendor/autoload.php',
2522
]);
2623

2724
// Define what rule sets will be applied
28-
$containerConfigurator->import(LevelSetList::UP_TO_PHP_72);
25+
$rectorConfig->sets([
26+
LevelSetList::UP_TO_PHP_72,
27+
]);
2928

3029
// is your PHP version different from the one your refactor to?
31-
$parameters->set(Option::PHP_VERSION_FEATURES, PhpVersion::PHP_72);
30+
$rectorConfig->phpVersion(PhpVersion::PHP_72);
3231
};

src/JSLikeHTMLElement.php

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ class JSLikeHTMLElement extends \DOMElement
3939
/**
4040
* Used for setting innerHTML like it's done in JavaScript:.
4141
*
42-
* @code
42+
* ```php
4343
* $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
44-
* @endcode
44+
* ```
4545
*/
4646
public function __set($name, $value)
4747
{
@@ -79,14 +79,13 @@ public function __set($name, $value)
7979
} else {
8080
// $value is probably ill-formed
8181
$f = new \DOMDocument();
82-
$value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8');
8382

8483
// Using <htmlfragment> will generate a warning, but so will bad HTML
8584
// (and by this point, bad HTML is what we've got).
8685
// We use it (and suppress the warning) because an HTML fragment will
8786
// be wrapped around <html><body> tags which we don't really want to keep.
8887
// Note: despite the warning, if loadHTML succeeds it will return true.
89-
$result = $f->loadHTML('<htmlfragment>' . $value . '</htmlfragment>');
88+
$result = $f->loadHTML('<meta charset="utf-8"><htmlfragment>' . $value . '</htmlfragment>');
9089

9190
if ($result) {
9291
$import = $f->getElementsByTagName('htmlfragment')->item(0);
@@ -105,9 +104,9 @@ public function __set($name, $value)
105104
/**
106105
* Used for getting innerHTML like it's done in JavaScript:.
107106
*
108-
* @code
107+
* ```php
109108
* $string = $div->innerHTML;
110-
* @endcode
109+
* ```
111110
*/
112111
public function __get($name)
113112
{

src/Readability.php

Lines changed: 36 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
namespace Readability;
44

5-
use DOMElement;
65
use Masterminds\HTML5;
76
use Psr\Log\LoggerAwareInterface;
87
use Psr\Log\LoggerInterface;
@@ -115,7 +114,7 @@ class Readability implements LoggerAwareInterface
115114
// HACK: replace linebreaks plus br's with p's
116115
'!(<br[^>]*>[ \r\n\s]*){2,}!i' => '</p><p>',
117116
// replace noscripts
118-
//'!</?noscript>!is' => '',
117+
// '!</?noscript>!is' => '',
119118
// replace fonts to spans
120119
'!<(/?)font[^>]*>!is' => '<\\1span>',
121120
];
@@ -126,8 +125,8 @@ class Readability implements LoggerAwareInterface
126125
// replace empty tags that break layouts
127126
'!<(?:a|div|p|figure)[^>]+/>!is' => '',
128127
// remove all attributes on text tags
129-
//'!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>",
130-
//single newlines cleanup
128+
// '!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>",
129+
// single newlines cleanup
131130
"/\n+/" => "\n",
132131
// modern web...
133132
'!<pre[^>]*>\s*<code!is' => '<pre',
@@ -161,7 +160,7 @@ public function setLogger(LoggerInterface $logger): void
161160
/**
162161
* Get article title element.
163162
*
164-
* @return DOMElement
163+
* @return \DOMElement
165164
*/
166165
public function getTitle()
167166
{
@@ -171,7 +170,7 @@ public function getTitle()
171170
/**
172171
* Get article content element.
173172
*
174-
* @return DOMElement
173+
* @return \DOMElement
175174
*/
176175
public function getContent()
177176
{
@@ -280,7 +279,7 @@ public function init(): bool
280279
/**
281280
* Run any post-process modifications to article content as necessary.
282281
*/
283-
public function postProcessContent(DOMElement $articleContent): void
282+
public function postProcessContent(\DOMElement $articleContent): void
284283
{
285284
if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) {
286285
$this->addFootnotes($articleContent);
@@ -292,7 +291,7 @@ public function postProcessContent(DOMElement $articleContent): void
292291
*
293292
* @see http://www.roughtype.com/archives/2010/05/experiments_in.php
294293
*/
295-
public function addFootnotes(DOMElement $articleContent): void
294+
public function addFootnotes(\DOMElement $articleContent): void
296295
{
297296
$footnotesWrapper = $this->dom->createElement('footer');
298297
$footnotesWrapper->setAttribute('class', 'readability-footnotes');
@@ -335,7 +334,7 @@ public function addFootnotes(DOMElement $articleContent): void
335334
$articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
336335
$articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
337336
$footnote->setInnerHtml('<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ');
338-
$footnoteLink->setInnerHtml(('' !== $footnoteLink->getAttribute('title') ? $footnoteLink->getAttribute('title') : $linkText));
337+
$footnoteLink->setInnerHtml('' !== $footnoteLink->getAttribute('title') ? $footnoteLink->getAttribute('title') : $linkText);
339338
$footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
340339
$footnote->appendChild($footnoteLink);
341340

@@ -356,7 +355,7 @@ public function addFootnotes(DOMElement $articleContent): void
356355
*/
357356
public function prepArticle(\DOMNode $articleContent): void
358357
{
359-
if (!$articleContent instanceof DOMElement) {
358+
if (!$articleContent instanceof \DOMElement) {
360359
return;
361360
}
362361

@@ -456,9 +455,9 @@ public function prepArticle(\DOMNode $articleContent): void
456455
* Get the inner text of a node.
457456
* This also strips out any excess whitespace to be found.
458457
*
459-
* @param DOMElement $e
460-
* @param bool $normalizeSpaces (default: true)
461-
* @param bool $flattenLines (default: false)
458+
* @param \DOMElement $e
459+
* @param bool $normalizeSpaces (default: true)
460+
* @param bool $flattenLines (default: false)
462461
*/
463462
public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLines = false): string
464463
{
@@ -482,7 +481,7 @@ public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLine
482481
/**
483482
* Remove the style attribute on every $e and under.
484483
*/
485-
public function cleanStyles(DOMElement $e): void
484+
public function cleanStyles(\DOMElement $e): void
486485
{
487486
if (\is_object($e)) {
488487
$elems = $e->getElementsByTagName('*');
@@ -515,7 +514,7 @@ public function getWordCount(string $text): int
515514
* This is the amount of text that is inside a link divided by the total text in the node.
516515
* Can exclude external references to differentiate between simple text and menus/infoblocks.
517516
*/
518-
public function getLinkDensity(DOMElement $e, bool $excludeExternal = false): float
517+
public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): float
519518
{
520519
$links = $e->getElementsByTagName('a');
521520
$textLength = mb_strlen($this->getInnerText($e, true, true));
@@ -538,7 +537,7 @@ public function getLinkDensity(DOMElement $e, bool $excludeExternal = false): fl
538537
/**
539538
* Get an element relative weight.
540539
*/
541-
public function getWeight(DOMElement $e): int
540+
public function getWeight(\DOMElement $e): int
542541
{
543542
if (!$this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) {
544543
return 0;
@@ -556,7 +555,7 @@ public function getWeight(DOMElement $e): int
556555
/**
557556
* Remove extraneous break tags from a node.
558557
*/
559-
public function killBreaks(DOMElement $node): void
558+
public function killBreaks(\DOMElement $node): void
560559
{
561560
$html = $node->getInnerHTML();
562561
$html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
@@ -569,7 +568,7 @@ public function killBreaks(DOMElement $node): void
569568
*
570569
* Updated 2012-09-18 to preserve youtube/vimeo iframes
571570
*/
572-
public function clean(DOMElement $e, string $tag): void
571+
public function clean(\DOMElement $e, string $tag): void
573572
{
574573
$targetList = $e->getElementsByTagName($tag);
575574
$isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag);
@@ -601,7 +600,7 @@ public function clean(DOMElement $e, string $tag): void
601600
* "Fishy" is an algorithm based on content length, classnames,
602601
* link density, number of images & embeds, etc.
603602
*/
604-
public function cleanConditionally(DOMElement $e, string $tag): void
603+
public function cleanConditionally(\DOMElement $e, string $tag): void
605604
{
606605
if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
607606
return;
@@ -714,7 +713,7 @@ public function cleanConditionally(DOMElement $e, string $tag): void
714713
/**
715714
* Clean out spurious headers from an Element. Checks things like classnames and link density.
716715
*/
717-
public function cleanHeaders(DOMElement $e): void
716+
public function cleanHeaders(\DOMElement $e): void
718717
{
719718
for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) {
720719
$headers = $e->getElementsByTagName('h' . $headerIndex);
@@ -754,7 +753,7 @@ public function removeFlag(int $flag): void
754753
/**
755754
* Get the article title as an H1.
756755
*
757-
* @return DOMElement
756+
* @return \DOMElement
758757
*/
759758
protected function getArticleTitle()
760759
{
@@ -826,7 +825,7 @@ protected function prepDocument(): void
826825
* Initialize a node with the readability object. Also checks the
827826
* className/id for special names to add to its score.
828827
*/
829-
protected function initializeNode(DOMElement $node): void
828+
protected function initializeNode(\DOMElement $node): void
830829
{
831830
if (!isset($node->tagName)) {
832831
return;
@@ -894,11 +893,11 @@ protected function initializeNode(DOMElement $node): void
894893
* Using a variety of metrics (content score, classname, element types), find the content that is
895894
* most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
896895
*
897-
* @param DOMElement $page
896+
* @param \DOMElement $page
898897
*
899-
* @return DOMElement|false
898+
* @return \DOMElement|false
900899
*/
901-
protected function grabArticle(DOMElement $page = null)
900+
protected function grabArticle(\DOMElement $page = null)
902901
{
903902
if (!$page) {
904903
$page = $this->dom;
@@ -1040,7 +1039,7 @@ protected function grabArticle(DOMElement $page = null)
10401039
// For every SCORE_CHARS_IN_PARAGRAPH (default:100) characters in this paragraph, add another point. Up to 3 points.
10411040
$contentScore += min(floor(mb_strlen($innerText) / self::SCORE_CHARS_IN_PARAGRAPH), 3);
10421041
// For every SCORE_WORDS_IN_PARAGRAPH (default:20) words in this paragraph, add another point. Up to 3 points.
1043-
//$contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3);
1042+
// $contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3);
10441043

10451044
foreach ($ancestors as $level => $ancestor) {
10461045
if (!$ancestor->nodeName || !$ancestor->parentNode) {
@@ -1211,7 +1210,7 @@ protected function grabArticle(DOMElement $page = null)
12111210
if (0 === strcasecmp($tagName, 'td') || 0 === strcasecmp($tagName, 'tr')) {
12121211
$up = $topCandidate;
12131212

1214-
if ($up->parentNode instanceof DOMElement) {
1213+
if ($up->parentNode instanceof \DOMElement) {
12151214
$up = $up->parentNode;
12161215

12171216
if (0 === strcasecmp($up->tagName, 'table')) {
@@ -1292,8 +1291,8 @@ protected function grabArticle(DOMElement $page = null)
12921291

12931292
// To ensure a node does not interfere with readability styles, remove its classnames & ids.
12941293
// Now done via RegExp post_filter.
1295-
//$nodeToAppend->removeAttribute('class');
1296-
//$nodeToAppend->removeAttribute('id');
1294+
// $nodeToAppend->removeAttribute('class');
1295+
// $nodeToAppend->removeAttribute('id');
12971296
// Append sibling and subtract from our list as appending removes a node.
12981297
$articleContent->appendChild($nodeToAppend);
12991298
}
@@ -1340,7 +1339,7 @@ protected function grabArticle(DOMElement $page = null)
13401339
* Get an element weight by attribute.
13411340
* Uses regular expressions to tell if this element looks good or bad.
13421341
*/
1343-
protected function weightAttribute(DOMElement $element, string $attribute): int
1342+
protected function weightAttribute(\DOMElement $element, string $attribute): int
13441343
{
13451344
if (!$element->hasAttribute($attribute)) {
13461345
return 0;
@@ -1427,7 +1426,7 @@ private function loadHtml(): void
14271426
unset($tidy);
14281427
}
14291428

1430-
$this->html = mb_convert_encoding((string) $this->html, 'HTML-ENTITIES', 'UTF-8');
1429+
$this->html = '<meta charset="utf-8">' . (string) $this->html;
14311430

14321431
if ('html5lib' === $this->parser || 'html5' === $this->parser) {
14331432
$this->dom = (new HTML5())->loadHTML($this->html);
@@ -1443,14 +1442,14 @@ private function loadHtml(): void
14431442
libxml_use_internal_errors(false);
14441443
}
14451444

1446-
$this->dom->registerNodeClass(DOMElement::class, \Readability\JSLikeHTMLElement::class);
1445+
$this->dom->registerNodeClass(\DOMElement::class, \Readability\JSLikeHTMLElement::class);
14471446
}
14481447

1449-
private function getAncestors(DOMElement $node, int $maxDepth = 0): array
1448+
private function getAncestors(\DOMElement $node, int $maxDepth = 0): array
14501449
{
14511450
$ancestors = [];
14521451
$i = 0;
1453-
while ($node->parentNode instanceof DOMElement) {
1452+
while ($node->parentNode instanceof \DOMElement) {
14541453
$ancestors[] = $node->parentNode;
14551454
if (++$i === $maxDepth) {
14561455
break;
@@ -1470,7 +1469,7 @@ private function isPhrasingContent($node): bool
14701469
}, iterator_to_array($node->childNodes)), true));
14711470
}
14721471

1473-
private function hasSingleTagInsideElement(DOMElement $node, string $tag): bool
1472+
private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool
14741473
{
14751474
if (1 !== $node->childNodes->length || $node->childNodes->item(0)->nodeName !== $tag) {
14761475
return false;
@@ -1490,11 +1489,11 @@ private function hasSingleTagInsideElement(DOMElement $node, string $tag): bool
14901489
* Tidy must be configured to not clean the input for this function to
14911490
* work as expected, see $this->tidy_config['clean']
14921491
*/
1493-
private function isNodeVisible(DOMElement $node): bool
1492+
private function isNodeVisible(\DOMElement $node): bool
14941493
{
14951494
return !($node->hasAttribute('style')
14961495
&& preg_match($this->regexps['isNotVisible'], $node->getAttribute('style'))
1497-
)
1496+
)
14981497
&& !$node->hasAttribute('hidden');
14991498
}
15001499
}

0 commit comments

Comments
 (0)