9
9
"path"
10
10
"regexp"
11
11
"runtime"
12
+ "slices"
12
13
"strings"
13
14
"sync"
14
15
@@ -22,7 +23,6 @@ import (
22
23
publiccode "github.com/italia/publiccode-parser-go/v4"
23
24
log "github.com/sirupsen/logrus"
24
25
"github.com/spf13/viper"
25
- "golang.org/x/exp/slices"
26
26
)
27
27
28
28
// Crawler is a helper class representing a crawler.
@@ -162,47 +162,6 @@ func (c *Crawler) CrawlPublishers(publishers []common.Publisher) error {
162
162
return c .crawl ()
163
163
}
164
164
165
- func (c * Crawler ) crawl () error {
166
- reposChan := make (chan common.Repository )
167
-
168
- // Start the metrics server.
169
- go metrics .StartPrometheusMetricsServer ()
170
-
171
- defer c .publishersWg .Wait ()
172
-
173
- // Get cpus number
174
- numCPUs := runtime .NumCPU ()
175
- log .Debugf ("CPUs #: %d" , numCPUs )
176
-
177
- // Process the repositories in order to retrieve the files.
178
- for i := 0 ; i < numCPUs ; i ++ {
179
- c .repositoriesWg .Add (1 )
180
- go func (id int ) {
181
- log .Debugf ("Starting ProcessRepositories() goroutine (#%d)" , id )
182
- c .ProcessRepositories (reposChan )
183
- }(i )
184
- }
185
-
186
- for repo := range c .repositories {
187
- reposChan <- repo
188
- }
189
- close (reposChan )
190
- c .repositoriesWg .Wait ()
191
-
192
- log .Infof (
193
- "Summary: Total repos scanned: %v. With good publiccode.yml file: %v. With bad publiccode.yml file: %v\n " +
194
- "Repos with good publiccode.yml file: New repos: %v, Known repos: %v, Failures saving to API: %v" ,
195
- metrics .GetCounterValue ("repository_processed" , c .Index ),
196
- metrics .GetCounterValue ("repository_good_publiccodeyml" , c .Index ),
197
- metrics .GetCounterValue ("repository_bad_publiccodeyml" , c .Index ),
198
- metrics .GetCounterValue ("repository_new" , c .Index ),
199
- metrics .GetCounterValue ("repository_known" , c .Index ),
200
- metrics .GetCounterValue ("repository_upsert_failures" , c .Index ),
201
- )
202
-
203
- return nil
204
- }
205
-
206
165
// ScanPublisher scans all the publisher' repositories and sends the ones
207
166
// with a valid publiccode.yml to the repositories channel.
208
167
func (c * Crawler ) ScanPublisher (publisher common.Publisher ) {
@@ -484,6 +443,47 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx
484
443
}
485
444
}
486
445
446
+ func (c * Crawler ) crawl () error {
447
+ reposChan := make (chan common.Repository )
448
+
449
+ // Start the metrics server.
450
+ go metrics .StartPrometheusMetricsServer ()
451
+
452
+ defer c .publishersWg .Wait ()
453
+
454
+ // Get cpus number
455
+ numCPUs := runtime .NumCPU ()
456
+ log .Debugf ("CPUs #: %d" , numCPUs )
457
+
458
+ // Process the repositories in order to retrieve the files.
459
+ for i := range numCPUs {
460
+ c .repositoriesWg .Add (1 )
461
+ go func (id int ) {
462
+ log .Debugf ("Starting ProcessRepositories() goroutine (#%d)" , id )
463
+ c .ProcessRepositories (reposChan )
464
+ }(i )
465
+ }
466
+
467
+ for repo := range c .repositories {
468
+ reposChan <- repo
469
+ }
470
+ close (reposChan )
471
+ c .repositoriesWg .Wait ()
472
+
473
+ log .Infof (
474
+ "Summary: Total repos scanned: %v. With good publiccode.yml file: %v. With bad publiccode.yml file: %v\n " +
475
+ "Repos with good publiccode.yml file: New repos: %v, Known repos: %v, Failures saving to API: %v" ,
476
+ metrics .GetCounterValue ("repository_processed" , c .Index ),
477
+ metrics .GetCounterValue ("repository_good_publiccodeyml" , c .Index ),
478
+ metrics .GetCounterValue ("repository_bad_publiccodeyml" , c .Index ),
479
+ metrics .GetCounterValue ("repository_new" , c .Index ),
480
+ metrics .GetCounterValue ("repository_known" , c .Index ),
481
+ metrics .GetCounterValue ("repository_upsert_failures" , c .Index ),
482
+ )
483
+
484
+ return nil
485
+ }
486
+
487
487
// validateFile performs additional validations that are not strictly mandated
488
488
// by the publiccode.yml Standard.
489
489
// Using `one` command this check will be skipped.
0 commit comments