Skip to content

Commit 4491b3d

Browse files
committed
feat(scanner): extract plugin slugs from /wp-content/uploads
1 parent 338820f commit 4491b3d

File tree

1 file changed

+29
-26
lines changed

1 file changed

+29
-26
lines changed

internal/scanner/html.go

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
package scanner
2121

2222
import (
23-
"fmt"
2423
"io"
24+
"regexp"
2525
"strings"
2626
"time"
2727

@@ -30,17 +30,17 @@ import (
3030
)
3131

3232
func discoverPluginsFromHTML(target string, headers []string) ([]string, error) {
33-
normalized := utils.NormalizeURL(target) + "/"
34-
33+
normalized := utils.NormalizeURL(target)
3534
client := utils.NewHTTPClient(10*time.Second, headers)
36-
htmlContent, err := client.Get(normalized)
37-
if err != nil {
38-
return nil, fmt.Errorf("failed to fetch homepage %s: %w", normalized, err)
39-
}
4035

4136
slugsSet := make(map[string]struct{})
42-
if err := extractSlugsFromReader(strings.NewReader(htmlContent), slugsSet); err != nil {
43-
return nil, fmt.Errorf("failed to parse HTML %s: %w", normalized, err)
37+
38+
if body, err := client.Get(normalized + "/"); err == nil {
39+
_ = extractSlugsFromReader(strings.NewReader(body), slugsSet)
40+
}
41+
42+
if body, err := client.Get(normalized + "/wp-content/uploads/"); err == nil {
43+
_ = extractSlugsFromReader(strings.NewReader(body), slugsSet)
4444
}
4545

4646
var slugs []string
@@ -51,31 +51,34 @@ func discoverPluginsFromHTML(target string, headers []string) ([]string, error)
5151
}
5252

5353
func extractSlugsFromReader(r io.Reader, dest map[string]struct{}) error {
54-
z := html.NewTokenizer(r)
54+
var slugPattern = regexp.MustCompile(`(?i)^[a-z][a-z0-9_-]*$`)
5555

56+
z := html.NewTokenizer(r)
5657
for {
5758
tt := z.Next()
58-
switch tt {
59-
case html.ErrorToken:
59+
if tt == html.ErrorToken {
6060
if z.Err() == io.EOF {
6161
return nil
6262
}
6363
return z.Err()
64+
}
65+
if tt != html.StartTagToken && tt != html.SelfClosingTagToken {
66+
continue
67+
}
6468

65-
case html.StartTagToken, html.SelfClosingTagToken:
66-
t := z.Token()
67-
for _, attr := range t.Attr {
68-
val := strings.TrimSpace(attr.Val)
69-
if val == "" {
70-
continue
71-
}
72-
if attr.Key == "href" || attr.Key == "src" {
73-
if idx := strings.Index(val, "/wp-content/plugins/"); idx != -1 {
74-
rest := val[idx+len("/wp-content/plugins/"):]
75-
parts := strings.SplitN(rest, "/", 2)
76-
if len(parts) > 0 && parts[0] != "" {
77-
dest[parts[0]] = struct{}{}
78-
}
69+
tok := z.Token()
70+
for _, attr := range tok.Attr {
71+
val := strings.TrimSpace(attr.Val)
72+
if val == "" {
73+
continue
74+
}
75+
parts := strings.Split(val, "/")
76+
for i := 0; i < len(parts)-2; i++ {
77+
if parts[i] == "wp-content" &&
78+
(parts[i+1] == "plugins" || parts[i+1] == "uploads") {
79+
slug := parts[i+2]
80+
if slugPattern.MatchString(slug) {
81+
dest[slug] = struct{}{}
7982
}
8083
}
8184
}

0 commit comments

Comments
 (0)