diff --git a/acceptance/go.mod b/acceptance/go.mod index 31ec417d..07aa6d34 100644 --- a/acceptance/go.mod +++ b/acceptance/go.mod @@ -37,6 +37,7 @@ require ( github.com/kylelemons/godebug v1.1.0 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect go.opentelemetry.io/otel v1.24.0 // indirect diff --git a/acceptance/go.sum b/acceptance/go.sum index f9ead4e4..db1d5742 100644 --- a/acceptance/go.sum +++ b/acceptance/go.sum @@ -105,11 +105,14 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= +github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= github.com/sethvargo/go-githubactions v1.2.0 h1:Gbr36trCAj6uq7Rx1DolY1NTIg0wnzw3/N5WHdKIjME= github.com/sethvargo/go-githubactions v1.2.0/go.mod h1:7/4WeHgYfSz9U5vwuToCK9KPnELVHAhGtRwLREOQV80= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= diff --git a/acceptance/main.go b/acceptance/main.go index 54579f38..6cd4c9d1 100644 --- a/acceptance/main.go +++ b/acceptance/main.go @@ -15,6 +15,7 @@ import ( "github.com/databrickslabs/sandbox/acceptance/notify" "github.com/databrickslabs/sandbox/acceptance/redaction" "github.com/databrickslabs/sandbox/acceptance/testenv" + "github.com/databrickslabs/sandbox/acceptance/todos" "github.com/databrickslabs/sandbox/go-libs/env" "github.com/databrickslabs/sandbox/go-libs/github" "github.com/databrickslabs/sandbox/go-libs/slack" @@ -45,7 +46,35 @@ type acceptance struct { *boilerplate.Boilerplate } +func (a *acceptance) canCreateIssues() bool { + createIssues := strings.ToLower(a.Action.GetInput("create_issues")) + return createIssues == "true" || createIssues == "yes" +} + +func (a *acceptance) syncTodos(ctx context.Context) error { + if !a.canCreateIssues() { + return nil + } + directory, _, err := a.getProject() + if err != nil { + return fmt.Errorf("project: %w", err) + } + techDebt, err := todos.New(ctx, a.GitHub, directory) + if err != nil { + return fmt.Errorf("tech debt: %w", err) + } + err = techDebt.CreateIssues(ctx) + if err != nil { + return fmt.Errorf("create: %w", err) + } + return nil +} + func (a *acceptance) trigger(ctx context.Context) (*notify.Notification, error) { + err := a.syncTodos(ctx) + if err != nil { + return nil, fmt.Errorf("sync todos: %w", err) + } vaultURI := a.Action.GetInput("vault_uri") directory, project, err := a.getProject() if err != nil { @@ -129,9 +158,8 @@ func (a *acceptance) runWithTimeout( func (a *acceptance) notifyIfNeeded(ctx context.Context, alert *notify.Notification) error { slackWebhook := a.Action.GetInput("slack_webhook") - createIssues := strings.ToLower(a.Action.GetInput("create_issues")) needsSlack := slackWebhook != "" - needsIssues := createIssues == "true" || createIssues == "yes" + needsIssues := a.canCreateIssues() needsNotification := needsSlack || needsIssues if !alert.Report.Pass() && needsNotification { if needsSlack { diff --git a/acceptance/shim.js b/acceptance/shim.js index 77d8f297..72ed69b6 100644 --- a/acceptance/shim.js +++ b/acceptance/shim.js @@ -1,4 +1,4 @@ -const version = 'v0.3.1'; +const version = 'v0.4.1'; const action = 'acceptance'; const { createWriteStream, chmodSync } = require('fs'); diff --git a/acceptance/todos/finder.go b/acceptance/todos/finder.go new file mode 100644 index 00000000..635b1ecb --- /dev/null +++ b/acceptance/todos/finder.go @@ -0,0 +1,160 @@ +package todos + +import ( + "context" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/databrickslabs/sandbox/go-libs/fileset" + "github.com/databrickslabs/sandbox/go-libs/git" + "github.com/databrickslabs/sandbox/go-libs/github" +) + +func New(ctx context.Context, gh *github.GitHubClient, root string) (*TechnicalDebtFinder, error) { + fs, err := fileset.RecursiveChildren(root) + if err != nil { + return nil, fmt.Errorf("fileset: %w", err) + } + raw, err := os.ReadFile(filepath.Join(root, ".gitignore")) + if err != nil { + return nil, fmt.Errorf("read .gitignore: %w", err) + } + ignorer := newIncluder(append(strings.Split(string(raw), "\n"), ".git/", "*.gif", "*.png")) + var scope fileset.FileSet + for _, file := range fs { + include, _ := ignorer.IgnoreFile(file.Relative) + if !include { + continue + } + scope = append(scope, file) + } + checkout, err := git.NewCheckout(ctx, root) + if err != nil { + return nil, fmt.Errorf("git: %w", err) + } + return &TechnicalDebtFinder{ + fs: scope, + git: checkout, + gh: gh, + }, nil +} + +type TechnicalDebtFinder struct { + fs fileset.FileSet + git *git.Checkout + gh *github.GitHubClient +} + +type Todo struct { + message string + link string + blame string +} + +func (f *TechnicalDebtFinder) CreateIssues(ctx context.Context) error { + todos, err := f.allTodos(ctx) + if err != nil { + return fmt.Errorf("all todos: %w", err) + } + seen, err := f.seenIssues(ctx) + if err != nil { + return fmt.Errorf("seen issues: %w", err) + } + for _, todo := range todos { + if seen[todo.message] { + continue + } + if err := f.createIssue(ctx, todo); err != nil { + return fmt.Errorf("create issue: %w", err) + } + } + return nil +} + +func (f *TechnicalDebtFinder) createIssue(ctx context.Context, todo Todo) error { + org, repo, ok := f.git.OrgAndRepo() + if !ok { + return fmt.Errorf("git org and repo") + } + _, err := f.gh.CreateIssue(ctx, org, repo, github.NewIssue{ + Title: fmt.Sprintf("[TODO] %s", todo.message), + Body: fmt.Sprintf("See [more context](%s):\n%s", todo.blame, todo.link), + Labels: []string{"tech debt"}, + }) + if err != nil { + return fmt.Errorf("create issue: %w", err) + } + return nil +} + +func (f *TechnicalDebtFinder) seenIssues(ctx context.Context) (map[string]bool, error) { + org, repo, ok := f.git.OrgAndRepo() + if !ok { + return nil, fmt.Errorf("git org and repo") + } + seen := map[string]bool{} + it := f.gh.ListRepositoryIssues(ctx, org, repo, &github.ListIssues{ + State: "all", + Labels: []string{"tech debt"}, + }) + for it.HasNext(ctx) { + issue, err := it.Next(ctx) + if err != nil { + return nil, fmt.Errorf("next: %w", err) + } + if !strings.HasPrefix(issue.Title, "[TODO] ") { + continue + } + norm := strings.TrimPrefix(issue.Title, "[TODO] ") + seen[norm] = true + } + return seen, nil +} + +func (f *TechnicalDebtFinder) allTodos(ctx context.Context) ([]Todo, error) { + prefix, err := f.embedPrefix(ctx) + if err != nil { + return nil, fmt.Errorf("prefix: %w", err) + } + var todos []Todo + needle := regexp.MustCompile(`TODO:(.*)`) + for _, v := range f.fs { + if !strings.HasSuffix(v.Absolute, v.Relative) { + continue // avoid false positives like https://github.com/databrickslabs/ucx/issues/3193 + } + raw, err := v.Raw() + if err != nil { + return nil, fmt.Errorf("%s: %w", v.Relative, err) + } + lines := strings.Split(string(raw), "\n") + for i, line := range lines { + if !needle.MatchString(line) { + continue + } + link := fmt.Sprintf("%s/%s#L%d-L%d", prefix, v.Relative, i, i+5) + todos = append(todos, Todo{ + message: strings.TrimSpace(needle.FindStringSubmatch(line)[1]), + link: link, + blame: strings.ReplaceAll(link, "/blob/", "/blame/"), + }) + } + } + return todos, nil +} + +func (f *TechnicalDebtFinder) embedPrefix(ctx context.Context) (string, error) { + org, repo, ok := f.git.OrgAndRepo() + if !ok { + return "", fmt.Errorf("git org and repo") + } + commits, err := f.git.History(ctx) + if err != nil { + return "", fmt.Errorf("git history: %w", err) + } + // example: https://github.com/databrickslabs/ucx/blob/69a0cf8ce3450680dc222150f500d84a1eb523fc/src/databricks/labs/ucx/azure/access.py#L25-L35 + prefix := fmt.Sprintf("https://github.com/%s/%s/blob/%s", org, repo, commits[0].Sha) + return prefix, nil +} diff --git a/acceptance/todos/finder_test.go b/acceptance/todos/finder_test.go new file mode 100644 index 00000000..ec358227 --- /dev/null +++ b/acceptance/todos/finder_test.go @@ -0,0 +1,21 @@ +package todos + +import ( + "context" + "testing" + + "github.com/databrickslabs/sandbox/go-libs/github" + "github.com/stretchr/testify/assert" +) + +func TestXXX(t *testing.T) { + t.SkipNow() + ctx := context.Background() + + gh := github.NewClient(&github.GitHubConfig{}) + f, err := New(ctx, gh, "/Users/serge.smertin/git/labs/remorph") + assert.NoError(t, err) + + err = f.CreateIssues(ctx) + assert.NoError(t, err) +} diff --git a/acceptance/todos/ignorer.go b/acceptance/todos/ignorer.go new file mode 100644 index 00000000..6260342a --- /dev/null +++ b/acceptance/todos/ignorer.go @@ -0,0 +1,59 @@ +// copied from https://github.com/databricks/cli/blob/71cf426755260afc9152b41d231b9d0add495497/libs/fileset/ignorer.go + +package todos + +import ( + ignore "github.com/sabhiram/go-gitignore" +) + +// Ignorer is the interface for what determines if a path +// in the [FileSet] must be ignored or not. +type Ignorer interface { + IgnoreFile(path string) (bool, error) + IgnoreDirectory(path string) (bool, error) +} + +// nopIgnorer implements an [Ignorer] that doesn't ignore anything. +type nopIgnorer struct{} + +func (nopIgnorer) IgnoreFile(path string) (bool, error) { + return false, nil +} + +func (nopIgnorer) IgnoreDirectory(path string) (bool, error) { + return false, nil +} + +type includer struct { + matcher *ignore.GitIgnore +} + +func newIncluder(includes []string) *includer { + matcher := ignore.CompileIgnoreLines(includes...) + return &includer{ + matcher, + } +} + +func (i *includer) IgnoreFile(path string) (bool, error) { + return i.ignore(path), nil +} + +// In the context of 'include' functionality, the Ignorer logic appears to be reversed: +// For patterns like 'foo/bar/' which intends to match directories only, we still need to traverse into the directory for potential file matches. +// Ignoring the directory entirely isn't an option, especially when dealing with patterns like 'foo/bar/*.go'. +// While this pattern doesn't target directories, it does match all Go files within them and ignoring directories not matching the pattern +// Will result in missing file matches. +// During the tree traversal process, we call 'IgnoreDirectory' on ".", "./foo", and "./foo/bar", +// all while applying the 'foo/bar/*.go' pattern. To handle this situation effectively, it requires to make the code more complex. +// This could mean generating various prefix patterns to facilitate the exclusion of directories from traversal. +// It's worth noting that, in this particular case, opting for a simpler logic results in a performance trade-off. +func (i *includer) IgnoreDirectory(path string) (bool, error) { + return false, nil +} + +func (i *includer) ignore(path string) bool { + matched := i.matcher.MatchesPath(path) + // If matched, do not ignore the file because we want to include it + return !matched +}