diff --git a/.gitignore b/.gitignore index 40f7ab8..352e539 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,6 @@ src/lang/testdata tools abcoder + +/*.txt +/*.json diff --git a/README.md b/README.md index 08119de..1e3ee01 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ ABCoder currently supports the following languages: | Go | ✅ | ✅ | | Rust | ✅ | Coming Soon | | C | Coming Soon | ❌ | +| Python | Coming Soon | ❌ | diff --git a/lang/collect/collect.go b/lang/collect/collect.go index c68f869..1cda834 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -22,8 +22,10 @@ import ( "strings" "unicode" + "github.com/cloudwego/abcoder/lang/cxx" "github.com/cloudwego/abcoder/lang/log" . "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/rust" "github.com/cloudwego/abcoder/lang/uniast" ) @@ -35,8 +37,13 @@ type CollectOption struct { NoNeedComment bool NeedTest bool Excludes []string + CacheResults bool } +const ( + SUPRESS_COLLECT_ERRORS = true +) + type Collector struct { cli *LSPClient spec LanguageSpec @@ -79,6 +86,10 @@ func switchSpec(l uniast.Language) LanguageSpec { switch l { case uniast.Rust: return &rust.RustSpec{} + case uniast.Cxx: + return &cxx.CxxSpec{} + case uniast.Python: + return &python.PythonSpec{} default: panic(fmt.Sprintf("unsupported language %s", l)) } @@ -101,6 +112,24 @@ func NewCollector(repo string, cli *LSPClient) *Collector { } func (c *Collector) Collect(ctx context.Context) error { + // Example code to configure the LSP client + if !c.NeedStdSymbol { + if c.Language == uniast.Python { + conf := map[string]interface{}{ + "settings": map[string]interface{}{ + "pylsp": map[string]interface{}{ + "plugins": map[string]interface{}{ + "jedi_definition": map[string]interface{}{ + "follow_builtin_definitions": false, + }, + }, + }, + }, + } + c.cli.Notify(ctx, "workspace/didChangeConfiguration", conf) + } + } + excludes := make([]string, len(c.Excludes)) for i, e := range c.Excludes { if !filepath.IsAbs(e) { @@ -111,8 +140,8 @@ func (c *Collector) Collect(ctx context.Context) error { } // scan all files - roots := make([]*DocumentSymbol, 0, 1024) - scanner := func(path string, info os.FileInfo, err error) error { + collect_paths := make([]string, 0, 1024) + if err := filepath.Walk(c.repo, func(path string, info os.FileInfo, err error) error { if err != nil { return err } @@ -127,20 +156,35 @@ func (c *Collector) Collect(ctx context.Context) error { if c.spec.ShouldSkip(path) { return nil } + collect_paths = append(collect_paths, path) + return nil + }); err != nil { + return err + } - // collect symbols + // collect symbols + roots := make([]*DocumentSymbol, 0, 1024) + for i, path := range collect_paths { uri := NewURI(path) symbols, err := c.cli.DocumentSymbols(ctx, uri) if err != nil { return err } + log.Info("collecting %d/%d files %s, has %d symbols\n", i, len(collect_paths), path, len(symbols)) // file := filepath.Base(path) + n_sym := 0 for _, sym := range symbols { + log.Debug(" collecting symbol %d/%d %s\n", n_sym, len(symbols), sym.Name) + n_sym++ // collect content content, err := c.cli.Locate(sym.Location) if err != nil { return err } + // HACK: skip imported symbols + if c.Language == uniast.Python && (strings.HasPrefix(content, "from ") || strings.HasPrefix(content, "import ")) { + continue + } // collect tokens tokens, err := c.cli.SemanticTokens(ctx, sym.Location) if err != nil { @@ -151,12 +195,8 @@ func (c *Collector) Collect(ctx context.Context) error { c.syms[sym.Location] = sym roots = append(roots, sym) } - - return nil - } - if err := filepath.Walk(c.repo, scanner); err != nil { - return err } + log.Info("collected %d root symbols. going to collect more syms and dependencies...\n", len(roots)) // collect some extra metadata syms := make([]*DocumentSymbol, 0, len(roots)) @@ -167,6 +207,7 @@ func (c *Collector) Collect(ctx context.Context) error { } c.processSymbol(ctx, sym, 1) } + log.Info("collected %d symbols. going to collect dependencies...\n", len(c.syms)) // collect internal references // for _, sym := range syms { @@ -200,8 +241,11 @@ func (c *Collector) Collect(ctx context.Context) error { // } // } + num_edges := 0 // collect dependencies - for _, sym := range syms { + for i, sym := range syms { + log.Info("collecting dependencies %d/%d %s\n", i, len(syms), sym.Name) + next_token: for i, token := range sym.Tokens { @@ -247,7 +291,9 @@ func (c *Collector) Collect(ctx context.Context) error { // go to definition dep, err := c.getSymbolByToken(ctx, token) if err != nil || dep == nil { - log.Error("dep token %v not found: %v\n", token, err) + if !SUPRESS_COLLECT_ERRORS { + log.Error("dep token %v not found: %v\n", token, err) + } continue } @@ -268,6 +314,8 @@ func (c *Collector) Collect(ctx context.Context) error { c.syms[dep.Location] = dep } + log.Debug(" Collect: dep %s -> %s (file: %s -> %s)\n", sym.Name, dep.Name, sym.Location, token.Location) + num_edges++ c.deps[sym] = append(c.deps[sym], dependency{ Location: token.Location, Symbol: dep, @@ -276,6 +324,7 @@ func (c *Collector) Collect(ctx context.Context) error { } } + log.Info("collected %d symbols, %d edges.\n", len(c.syms), num_edges) return nil } @@ -297,18 +346,31 @@ func (c *Collector) getSymbolByTokenWithLimit(ctx context.Context, tok Token, de return nil, fmt.Errorf("definition of token %s not found", tok) } if len(defs) > 1 { - log.Error("definition of token %s not unique", tok) + if !SUPRESS_COLLECT_ERRORS { + log.Error("definition of token %s not unique", tok) + } } return c.getSymbolByLocation(ctx, defs[0], depth, tok) } func (c *Collector) filterEntitySymbols(syms []*DocumentSymbol) *DocumentSymbol { + // Choose the most specific symbol + var mostSpecific *DocumentSymbol + mostSpecific = nil for _, sym := range syms { - if c.spec.IsEntitySymbol(*sym) { - return sym + if !c.spec.IsEntitySymbol(*sym) { + continue + } + if mostSpecific == nil || mostSpecific.Location.Include(sym.Location) { + // replace most specific + mostSpecific = sym + } else if sym.Location.Include(mostSpecific.Location) { + // retain most specific + } else { + log.Error("multiple symbols %s and %s not include each other", mostSpecific, sym) } } - return nil + return mostSpecific } // return a language entity symbol @@ -488,7 +550,9 @@ func (c *Collector) getDepsWithLimit(ctx context.Context, sym *DocumentSymbol, t for _, tp := range tps { dep, err := c.getSymbolByTokenWithLimit(ctx, sym.Tokens[tp], depth) if err != nil || sym == nil { - log.Error_skip(1, "token %v not found its symbol: %v", tp, err) + if !SUPRESS_COLLECT_ERRORS { + log.Error_skip(1, "token %v not found its symbol: %v", tp, err) + } } else { d := dependency{sym.Tokens[tp].Location, dep} tsyms[tp] = d @@ -525,7 +589,7 @@ func (c *Collector) collectImpl(ctx context.Context, sym *DocumentSymbol, depth impl = ChunkHead(sym.Text, sym.Location.Range.Start, sym.Tokens[fn].Location.Range.Start) } if impl == "" || len(impl) < len(sym.Name) { - impl = sym.Name + impl = fmt.Sprintf("class %s {\n", sym.Name) } // search all methods for _, method := range c.syms { @@ -581,12 +645,16 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept } } if i < 0 || i >= len(sym.Tokens) { - log.Error("get type token of variable symbol %s failed\n", sym) + if !SUPRESS_COLLECT_ERRORS { + log.Error("get type token of variable symbol %s failed\n", sym) + } return } tsym, err := c.getSymbolByTokenWithLimit(ctx, sym.Tokens[i], depth-1) if err != nil || tsym == nil { - log.Error("get type symbol for token %s failed:%v\n", sym.Tokens[i], err) + if !SUPRESS_COLLECT_ERRORS { + log.Error("get type symbol for token %s failed:%v\n", sym.Tokens[i], err) + } return } c.vars[sym] = dependency{ @@ -613,9 +681,12 @@ func (c *Collector) updateFunctionInfo(sym *DocumentSymbol, tsyms, ipsyms, opsym } } else { f = functionInfo{ - TypeParams: tsyms, - Inputs: ipsyms, - Outputs: opsyms, + TypeParams: tsyms, + Inputs: ipsyms, + Outputs: opsyms, + InputsSorted: is, + OutputsSorted: os, + TypeParamsSorted: ts, } if rsym != nil { if f.Method == nil { diff --git a/lang/collect/export.go b/lang/collect/export.go index 3101d8a..593cd70 100644 --- a/lang/collect/export.go +++ b/lang/collect/export.go @@ -27,6 +27,10 @@ import ( "github.com/cloudwego/abcoder/lang/uniast" ) +const ( + SUPRESS_EXPORT_OUTPUT = true +) + type dependency struct { Location Location `json:"location"` Symbol *DocumentSymbol `json:"symbol"` @@ -40,16 +44,17 @@ func (c *Collector) fileLine(loc Location) uniast.FileLine { rel = filepath.Base(loc.URI.File()) } text := c.cli.GetFile(loc.URI).Text + uri_str := string(loc.URI) return uniast.FileLine{ File: rel, Line: loc.Range.Start.Line, - StartOffset: lsp.PositionOffset(text, loc.Range.Start), - EndOffset: lsp.PositionOffset(text, loc.Range.End), + StartOffset: lsp.PositionOffsetIdentified(uri_str, text, loc.Range.Start), + EndOffset: lsp.PositionOffsetIdentified(uri_str, text, loc.Range.End), } } -func newModule(name string, dir string) *uniast.Module { - ret := uniast.NewModule(name, dir, uniast.Rust) +func newModule(name string, dir string, lang uniast.Language) *uniast.Module { + ret := uniast.NewModule(name, dir, lang) return ret } @@ -67,14 +72,17 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) { if err != nil { return nil, err } - repo.Modules[name] = newModule(name, rel) + repo.Modules[name] = newModule(name, rel, c.Language) } // not allow local symbols inside another symbol c.filterLocalSymbols() // export symbols + i := 0 for _, symbol := range c.syms { + log.Info("export symbol %d/%d: %s\n", i, len(c.syms), symbol.Name) + i++ visited := make(map[*lsp.DocumentSymbol]*uniast.Identity) _, err := c.exportSymbol(&repo, symbol, "", visited) if err != nil { @@ -83,11 +91,13 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) { } // patch module - for p, m := range repo.Modules { - if p == "" || strings.Contains(p, "@") { - continue + if c.modPatcher != nil { + for p, m := range repo.Modules { + if p == "" || strings.Contains(p, "@") { + continue + } + c.modPatcher.Patch(m) } - c.modPatcher.Patch(m) } return &repo, nil @@ -134,13 +144,13 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol id := uniast.NewIdentity(mod, path, name) visited[symbol] = &id - // Load eternal symbol on demands + // Load external symbol on demands if !c.LoadExternalSymbol && (!c.internal(symbol.Location) || symbol.Kind == SKUnknown) { return &id, nil } if repo.Modules[mod] == nil { - repo.Modules[mod] = newModule(mod, "") + repo.Modules[mod] = newModule(mod, "", c.Language) } module := repo.Modules[mod] if repo.Modules[mod].Packages[path] == nil { @@ -284,13 +294,15 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol obj.GlobalVars = make([]uniast.Dependency, 0, len(deps)) } obj.GlobalVars = uniast.InsertDependency(obj.GlobalVars, pdep) - case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum: + case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass: if obj.Types == nil { obj.Types = make([]uniast.Dependency, 0, len(deps)) } obj.Types = uniast.InsertDependency(obj.Types, pdep) default: - log.Error("dep symbol %s not collected for %v\n", dep.Symbol, id) + if !SUPRESS_EXPORT_OUTPUT { + log.Error("dep symbol %s not collected for %v\n", dep.Symbol, id) + } } } } @@ -298,7 +310,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol pkg.Functions[id.Name] = obj // Type - case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum: + case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass: obj := &uniast.Type{ FileLine: fileLine, Content: content, @@ -315,10 +327,12 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol continue } switch dep.Symbol.Kind { - case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum: + case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass: obj.SubStruct = append(obj.SubStruct, uniast.NewDependency(*depid, c.fileLine(dep.Location))) default: - log.Error("dep symbol %s not collected for \n", dep.Symbol, id) + if !SUPRESS_EXPORT_OUTPUT { + log.Error("dep symbol %s not collected for %v\n", dep.Symbol, id) + } } } } @@ -368,6 +382,9 @@ func mapKind(kind lsp.SymbolKind) uniast.TypeKind { switch kind { case lsp.SKStruct: return "struct" + // XXX: C++ should use class instead of struct + case lsp.SKClass: + return "struct" case lsp.SKTypeParameter: return "type-parameter" case lsp.SKInterface: diff --git a/lang/cxx/lib.go b/lang/cxx/lib.go new file mode 100644 index 0000000..70a82b7 --- /dev/null +++ b/lang/cxx/lib.go @@ -0,0 +1,41 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cxx + +import ( + "time" + + "github.com/cloudwego/abcoder/lang/uniast" + "github.com/cloudwego/abcoder/lang/utils" +) + +const MaxWaitDuration = 5 * time.Minute + +func GetDefaultLSP() (lang uniast.Language, name string) { + return uniast.Cxx, "clangd-18" +} + +func CheckRepo(repo string) (string, time.Duration) { + openfile := "" + // TODO: check if the project compiles. + + // NOTICE: wait for Rust projects based on code files + _, size := utils.CountFiles(repo, ".c", "SKIPDIR") + wait := 2*time.Second + time.Second*time.Duration(size/1024) + if wait > MaxWaitDuration { + wait = MaxWaitDuration + } + return openfile, wait +} diff --git a/lang/cxx/spec.go b/lang/cxx/spec.go new file mode 100644 index 0000000..00fdc27 --- /dev/null +++ b/lang/cxx/spec.go @@ -0,0 +1,197 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cxx + +import ( + "fmt" + "path/filepath" + "strings" + + lsp "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/utils" +) + +type CxxSpec struct { + repo string +} + +func NewCxxSpec() *CxxSpec { + return &CxxSpec{} +} + +// XXX: maybe multi module support for C++? +func (c *CxxSpec) WorkSpace(root string) (map[string]string, error) { + c.repo = root + rets := map[string]string{} + absPath, err := filepath.Abs(root) + if err != nil { + return nil, fmt.Errorf("failed to get absolute path: %w", err) + } + rets["current"] = absPath + return rets, nil +} + +// returns: mod, path, error +func (c *CxxSpec) NameSpace(path string) (string, string, error) { + // external lib: only standard library (system headers), in /usr/ + if !strings.HasPrefix(path, c.repo) { + if strings.HasPrefix(path, "/usr") { + // assume it is c system library + return "cstdlib", "cstdlib", nil + } + panic(fmt.Sprintf("external lib: %s\n", path)) + } + + return "current", "current", nil + +} + +func (c *CxxSpec) ShouldSkip(path string) bool { + if strings.HasSuffix(path, ".c") || strings.HasSuffix(path, ".h") { + return false + } + return true +} + +func (c *CxxSpec) IsDocToken(tok lsp.Token) bool { + return tok.Type == "comment" +} + +func (c *CxxSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { + for i, t := range sym.Tokens { + if c.IsDocToken(t) { + continue + } + for _, m := range t.Modifiers { + if m == "declaration" { + return i + } + } + } + return -1 +} + +func (c *CxxSpec) IsEntityToken(tok lsp.Token) bool { + return tok.Type == "class" || tok.Type == "function" || tok.Type == "variable" +} + +func (c *CxxSpec) IsStdToken(tok lsp.Token) bool { + panic("TODO") +} + +func (c *CxxSpec) TokenKind(tok lsp.Token) lsp.SymbolKind { + switch tok.Type { + case "class": + return lsp.SKStruct + case "enum": + return lsp.SKEnum + case "enumMember": + return lsp.SKEnumMember + case "function", "macro": + return lsp.SKFunction + // rust spec does not treat parameter as a variable + case "parameter": + return lsp.SKVariable + case "typeParameter": + return lsp.SKTypeParameter + // type: TODO + case "interface", "concept", "method", "modifier", "namespace", "type": + panic(fmt.Sprintf("Unsupported token type: %s at %+v\n", tok.Type, tok.Location)) + case "bracket", "comment", "label", "operator", "property", "unknown": + return lsp.SKUnknown + } + panic(fmt.Sprintf("Weird token type: %s at %+v\n", tok.Type, tok.Location)) +} + +func (c *CxxSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { + return sym.Kind == lsp.SKFunction && sym.Name == "main" +} + +func (c *CxxSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { + typ := sym.Kind + return typ == lsp.SKFunction || typ == lsp.SKVariable || typ == lsp.SKClass + +} + +func (c *CxxSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { + id := c.DeclareTokenOfSymbol(sym) + if id == -1 { + return false + } + for _, m := range sym.Tokens[id].Modifiers { + if m == "globalScope" { + return true + } + } + return false +} + +// TODO(cpp): support C++ OOP +func (c *CxxSpec) HasImplSymbol() bool { + return false +} + +func (c *CxxSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { + panic("TODO") +} + +func (c *CxxSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { + // No receiver and no type params for C + if sym.Kind != lsp.SKFunction { + return -1, nil, nil, nil + } + receiver := -1 + typeParams := []int{} + inputParams := []int{} + outputs := []int{} + + // general format: RETURNVALUE NAME "(" PARAMS ")" BODY + // -------- + // fnNameText + // state machine phase 0 phase 1 phase 2: break + // TODO: attributes may contain parens. also inline structs. + + endRelOffset := 0 + lines := utils.CountLinesPooled(sym.Text) + phase := 0 + for i, tok := range sym.Tokens { + switch phase { + case 0: + if tok.Type == "function" { + offset := lsp.RelativePostionWithLines(*lines, sym.Location.Range.Start, tok.Location.Range.Start) + endRelOffset = offset + strings.Index(sym.Text[offset:], ")") + phase = 1 + continue + } + if c.IsEntityToken(tok) { + outputs = append(outputs, i) + } + case 1: + offset := lsp.RelativePostionWithLines(*lines, sym.Location.Range.Start, tok.Location.Range.Start) + if offset > endRelOffset { + phase = 2 + continue + } + if c.IsEntityToken(tok) { + inputParams = append(inputParams, i) + } + } + } + return receiver, typeParams, inputParams, outputs +} + +func (c *CxxSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (string, error) { + panic("TODO") +} diff --git a/lang/log/logger.go b/lang/log/logger.go index d5aa598..f69021c 100644 --- a/lang/log/logger.go +++ b/lang/log/logger.go @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/lang/lsp/client.go b/lang/lsp/client.go index 58479fe..bf5571d 100644 --- a/lang/lsp/client.go +++ b/lang/lsp/client.go @@ -35,13 +35,16 @@ type LSPClient struct { tokenTypes []string tokenModifiers []string files map[DocumentURI]*TextDocumentItem + // TODO: now only cache semantic tokens + cachedResults map[string]SemanticTokens ClientOptions } type ClientOptions struct { Server string uniast.Language - Verbose bool + Verbose bool + CacheResults bool } func NewLSPClient(repo string, openfile string, wait time.Duration, opts ClientOptions) (*LSPClient, error) { @@ -58,6 +61,9 @@ func NewLSPClient(repo string, openfile string, wait time.Duration, opts ClientO cli.ClientOptions = opts cli.files = make(map[DocumentURI]*TextDocumentItem) + if opts.CacheResults { + cli.cachedResults = make(map[string]SemanticTokens) + } if openfile != "" { _, err := cli.DidOpen(context.Background(), NewURI(openfile)) @@ -156,10 +162,10 @@ func initLSPClient(ctx context.Context, svr io.ReadWriteCloser, dir DocumentURI, return nil, fmt.Errorf("server did not provide TypeDefinition") } - implementationProvider, ok := vs["implementationProvider"].(bool) - if !ok || !implementationProvider { - return nil, fmt.Errorf("server did not provide Implementation") - } + // implementationProvider, ok := vs["implementationProvider"].(bool) + // if !ok || !implementationProvider { + // return nil, fmt.Errorf("server did not provide Implementation") + // } documentSymbolProvider, ok := vs["documentSymbolProvider"].(bool) if !ok || !documentSymbolProvider { return nil, fmt.Errorf("server did not provide DocumentSymbol") @@ -219,7 +225,7 @@ func (rwc rwc) Close() error { // start a LSP process and return its io func startLSPSever(path string) (io.ReadWriteCloser, error) { - // Launch rust-analyzer + // Launch LSP server cmd := exec.Command(path) stdin, err := cmd.StdinPipe() diff --git a/lang/lsp/handler.go b/lang/lsp/handler.go index ab12a8a..cdc0cea 100644 --- a/lang/lsp/handler.go +++ b/lang/lsp/handler.go @@ -93,9 +93,9 @@ loop: func (h *lspHandler) Handle(ctx context.Context, conn *jsonrpc2.Conn, req *jsonrpc2.Request) { // This method will be called for both requests and notifications - log.Info("handle method: %s\n", req.Method) + log.Debug("handle method: %s\n", req.Method) if req.Params != nil { - log.Info("param: %s\n", string(*req.Params)) + log.Debug("param: %s\n", string(*req.Params)) } if req.Notif { // This is a notification diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go index 21fb843..6531696 100644 --- a/lang/lsp/lsp.go +++ b/lang/lsp/lsp.go @@ -24,6 +24,7 @@ import ( "sort" "strings" + "github.com/cloudwego/abcoder/lang/uniast" "github.com/cloudwego/abcoder/lang/utils" "github.com/sourcegraph/go-lsp" ) @@ -284,6 +285,71 @@ func (cli *LSPClient) References(ctx context.Context, id Location) ([]Location, return resp, nil } +// TODO(perf): cache results especially for whole file queries. +// TODO(refactor): infer use_full_method from capabilities +func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens, use_full_method bool) error { + // Note: resp should be `mutable SemanticTokens * const resp` + if use_full_method { + if cli.cachedResults == nil { + // no caching + req1 := struct { + TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` + }{TextDocument: req.TextDocument} + if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { + return err + } + } else { + cacheRes, ok := cli.cachedResults[string(req.TextDocument.URI)] + if ok { + *resp = cacheRes + } else { + req1 := struct { + TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` + }{TextDocument: req.TextDocument} + if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { + return err + } + cli.cachedResults[string(req.TextDocument.URI)] = *resp + } + } + filterSemanticTokensInRange(resp, req.Range) + } else { + if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, resp); err != nil { + return err + } + } + return nil +} + +func filterSemanticTokensInRange(resp *SemanticTokens, r Range) { + curPos := Position{ + Line: 0, + Character: 0, + } + newData := []uint32{} + for i := 0; i < len(resp.Data); i += 5 { + deltaLine := int(resp.Data[i]) + deltaStart := int(resp.Data[i+1]) + if deltaLine != 0 { + curPos.Line += deltaLine + curPos.Character = deltaStart + } else { + curPos.Character += deltaStart + } + if isPositionInRange(curPos, r, true) { + if len(newData) == 0 { + // add range start to initial delta + newData = append(newData, resp.Data[i:i+5]...) + newData[0] = uint32(curPos.Line) + newData[1] = uint32(curPos.Character) + } else { + newData = append(newData, resp.Data[i:i+5]...) + } + } + } + resp.Data = newData +} + func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, error) { // open file first syms, err := cli.DocumentSymbols(ctx, id.URI) @@ -304,7 +370,7 @@ func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, } var resp SemanticTokens - if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, &resp); err != nil { + if err := cli.getSemanticTokensRange(ctx, req, &resp, cli.Language == uniast.Cxx || cli.Language == uniast.Python); err != nil { return nil, err } @@ -321,6 +387,11 @@ func (cli *LSPClient) Definition(ctx context.Context, uri DocumentURI, pos Posit if err != nil { return nil, err } + if f.Definitions != nil { + if locations, ok := f.Definitions[pos]; ok { + return locations, nil + } + } // call req := lsp.TextDocumentPositionParams{ diff --git a/lang/lsp/utils.go b/lang/lsp/utils.go index f1178dc..03db817 100644 --- a/lang/lsp/utils.go +++ b/lang/lsp/utils.go @@ -36,7 +36,7 @@ import ( ) func GetDistance(text string, start Position, pos Position) int { - lines := utils.CountLinesCached(text) + lines := utils.CountLinesPooled(text) defer utils.PutCount(lines) // find the line of the position return (*lines)[pos.Line-start.Line] + pos.Character - start.Character @@ -59,12 +59,23 @@ func RelativePostionWithLines(lines []int, textPos Position, pos Position) int { return lines[l] + pos.Character - textPos.Character } +func PositionOffsetIdentified(uri string, text string, pos Position) int { + if pos.Line < 0 || pos.Character < 0 { + log.Error("invalid text position: %+v", pos) + return -1 + } + lines := utils.CountLinesCached(uri, text) + defer utils.PutCount(lines) + + return RelativePostionWithLines(*lines, Position{Line: 0, Character: 0}, pos) +} + func PositionOffset(text string, pos Position) int { if pos.Line < 0 || pos.Character < 0 { log.Error("invalid text position: %+v", pos) return -1 } - lines := utils.CountLinesCached(text) + lines := utils.CountLinesPooled(text) defer utils.PutCount(lines) return RelativePostionWithLines(*lines, Position{Line: 0, Character: 0}, pos) diff --git a/lang/parse.go b/lang/parse.go index 95f3d77..3d640df 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -26,9 +26,11 @@ import ( "time" "github.com/cloudwego/abcoder/lang/collect" + "github.com/cloudwego/abcoder/lang/cxx" "github.com/cloudwego/abcoder/lang/golang/parser" "github.com/cloudwego/abcoder/lang/log" "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/rust" "github.com/cloudwego/abcoder/lang/uniast" ) @@ -39,6 +41,8 @@ type ParseOptions struct { LSP string // Language of the repo Verbose bool + // Whether to indent the output JSON + MarshalIndent bool collect.CollectOption } @@ -61,9 +65,10 @@ func Parse(ctx context.Context, uri string, args ParseOptions) ([]byte, error) { log.Info("start initialize LSP server %s...\n", lspPath) var err error client, err = lsp.NewLSPClient(uri, openfile, opentime, lsp.ClientOptions{ - Server: lspPath, - Language: l, - Verbose: args.Verbose, + Server: lspPath, + Language: l, + Verbose: args.Verbose, + CacheResults: args.CacheResults, }) if err != nil { log.Error("failed to initialize LSP server: %v\n", err) @@ -78,11 +83,17 @@ func Parse(ctx context.Context, uri string, args ParseOptions) ([]byte, error) { return nil, err } log.Info("all symbols collected, start writing to stdout...\n") - out, err := json.Marshal(repo) + var out []byte + if args.MarshalIndent { + out, err = json.MarshalIndent(repo, "", " ") + } else { + out, err = json.Marshal(repo) + } if err != nil { log.Error("Failed to marshal repository: %v\n", err) return nil, err } + log.Info("all symbols written to stdout.\n") return out, nil } @@ -94,6 +105,11 @@ func checkRepoPath(repoPath string, language uniast.Language) (openfile string, case uniast.Rust: // NOTICE: open the Cargo.toml file is required for Rust projects openfile, wait = rust.CheckRepo(repoPath) + case uniast.Cxx: + openfile, wait = cxx.CheckRepo(repoPath) + case uniast.Python: + // NOTICE: open the Cargo.toml file is required for Rust projects + openfile, wait = python.CheckRepo(repoPath) default: openfile = "" wait = 0 @@ -107,6 +123,10 @@ func checkLSP(language uniast.Language, lspPath string) (l uniast.Language, s st switch language { case uniast.Rust: l, s = rust.GetDefaultLSP() + case uniast.Cxx: + l, s = cxx.GetDefaultLSP() + case uniast.Python: + l, s = python.GetDefaultLSP() case uniast.Golang: l = uniast.Golang s = "" @@ -154,11 +174,14 @@ func collectSymbol(ctx context.Context, cli *lsp.LSPClient, repoPath string, opt if err != nil { return nil, err } + log.Info("all symbols exported.\n") } + log.Info("start building graph...\n") if err := repo.BuildGraph(); err != nil { return nil, err } + log.Info("graph built.\n") return repo, nil } diff --git a/lang/python/lib.go b/lang/python/lib.go new file mode 100644 index 0000000..286bea4 --- /dev/null +++ b/lang/python/lib.go @@ -0,0 +1,42 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "time" + + "github.com/cloudwego/abcoder/lang/uniast" + "github.com/cloudwego/abcoder/lang/utils" +) + +const MaxWaitDuration = 2 * time.Second + +func GetDefaultLSP() (lang uniast.Language, name string) { + // needs to use the custom pylsp (see commit message) + return uniast.Python, "pylsp" +} + +func CheckRepo(repo string) (string, time.Duration) { + openfile := "" + // TODO: check if the project compiles. + + // NOTICE: wait for Rust projects based on code files + _, size := utils.CountFiles(repo, ".py", "SKIPDIR") + wait := 2*time.Second + time.Second*time.Duration(size/1024) + if wait > MaxWaitDuration { + wait = MaxWaitDuration + } + return openfile, wait +} diff --git a/lang/python/spec.go b/lang/python/spec.go new file mode 100644 index 0000000..0572d7a --- /dev/null +++ b/lang/python/spec.go @@ -0,0 +1,380 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + lsp "github.com/cloudwego/abcoder/lang/lsp" +) + +type PythonSpec struct { + repo string + topModuleName string + topModulePath string +} + +func NewPythonSpec() *PythonSpec { + return &PythonSpec{} +} + +func (c *PythonSpec) WorkSpace(root string) (map[string]string, error) { + // In python, pyspeak:modules are included by pyspeak:packages. + // This is the opposite of ours. + c.repo = root + rets := map[string]string{} + absPath, err := filepath.Abs(root) + if err != nil { + return nil, err + } + + // TODO: maybe infer from pyproject.toml? + // should ignore {tests,examples}/**/pyproject.toml + + // XXX ad-hoc way + if strings.Contains(c.repo, "astropy") { + panic("TODO") + } else if strings.Contains(c.repo, "flask") { + c.topModulePath = absPath + "/src" + c.topModuleName = "flask" + } else { + c.topModulePath = absPath + c.topModuleName = "current" + } + rets[c.topModuleName] = c.topModulePath + return rets, nil +} + +// returns: modName, pkgPath, error +func (c *PythonSpec) NameSpace(path string) (string, string, error) { + if strings.HasPrefix(path, c.topModulePath) { + // internal module + modName := c.topModuleName + relPath, err := filepath.Rel(c.topModulePath, path) + if err != nil { + return "", "", err + } + // todo: handle __init__.py + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + return modName, pkgPath, nil + } + + if strings.HasSuffix(path, "stdlib/3/builtins.pyi") { + // builtin module + return "builtins", "builtins", nil + } + + // XXX: hardcoded python version + condaPrefix := "/home/zhenyang/anaconda3/envs/abcoder/lib/python3.11" + if strings.HasPrefix(path, condaPrefix) { + if strings.HasPrefix(path, condaPrefix+"/site-packages") { + // external module + relPath, err := filepath.Rel(condaPrefix+"/site-packages", path) + if err != nil { + return "", "", err + } + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + modPath := strings.Split(pkgPath, ".") + if len(modPath) >= 1 { + modName := modPath[0] + return modName, pkgPath, nil + } + panic(fmt.Sprintf("Malformed Namespace %s, pkgPath %s", path, pkgPath)) + } + // builtin module + modName := "builtins" + relPath, err := filepath.Rel(condaPrefix, path) + if err != nil { + return "", "", err + } + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + return modName, pkgPath, nil + } + + panic(fmt.Sprintf("Unhandled Namespace %s", path)) +} + +func (c *PythonSpec) ShouldSkip(path string) bool { + if !strings.HasSuffix(path, ".py") { + return true + } + return false +} + +func (c *PythonSpec) IsDocToken(tok lsp.Token) bool { + return tok.Type == "comment" +} + +func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { + for i, t := range sym.Tokens { + if c.IsDocToken(t) { + continue + } + for _, m := range t.Modifiers { + if m == "declaration" { + return i + } + } + } + return -1 +} + +func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { + typ := tok.Type + if strings.HasPrefix(tok.Text, "from ") || strings.HasPrefix(tok.Text, "import ") { + // Python LSP highlights imported symbols as function/types + return false + } + return typ == "function" || typ == "variable" || typ == "property" || typ == "class" || typ == "type" +} + +func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { + panic("TODO") +} + +func (c *PythonSpec) TokenKind(tok lsp.Token) lsp.SymbolKind { + switch tok.Type { + case "namespace": + return lsp.SKNamespace + case "type": + return lsp.SKObject // no direct match; mapped to Object conservatively + case "class": + return lsp.SKClass + case "enum": + return lsp.SKEnum + case "interface": + return lsp.SKInterface + case "struct": + return lsp.SKStruct + case "typeParameter": + return lsp.SKTypeParameter + case "parameter": + return lsp.SKVariable + case "variable": + return lsp.SKVariable + case "property": + return lsp.SKProperty + case "enumMember": + return lsp.SKEnumMember + case "event": + return lsp.SKEvent + case "function": + return lsp.SKFunction + case "method": + return lsp.SKMethod + case "macro": + return lsp.SKFunction + case "string": + return lsp.SKString + case "number": + return lsp.SKNumber + case "operator": + return lsp.SKOperator + default: + return lsp.SKUnknown + } +} + +func (c *PythonSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { + return sym.Kind == lsp.SKFunction && sym.Name == "main" +} + +func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { + typ := sym.Kind + if strings.HasPrefix(sym.Text, "from ") || strings.HasPrefix(sym.Text, "import ") { + // Python LSP highlights imported symbols as function/types + return false + } + return typ == lsp.SKObject || typ == lsp.SKMethod || typ == lsp.SKFunction || typ == lsp.SKVariable || + typ == lsp.SKStruct || typ == lsp.SKEnum || typ == lsp.SKTypeParameter || typ == lsp.SKConstant || typ == lsp.SKClass +} + +func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { + // builtin methods are exported + if strings.HasPrefix(sym.Name, "__") && strings.HasSuffix(sym.Name, "__") { + return true + } + if strings.HasPrefix(sym.Name, "_") { + return false + } + return true +} + +func (c *PythonSpec) HasImplSymbol() bool { + return true +} + +func invalidPos() lsp.Position { + return lsp.Position{ + Line: -1, + Character: -1, + } +} + +// returns interface, receiver, first method +func (c *PythonSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { + // reference: https://docs.python.org/3/reference/grammar.html + if sym.Kind != lsp.SKClass { + return -1, -1, -1 + } + + implType := -1 + receiverType := -1 + firstMethod := -1 + + // state 0: goto state -1 when we see a 'class' + state := 0 + clsnamepos := invalidPos() + curpos := sym.Location.Range.Start + for i := range len(sym.Text) { + if state == -1 { + break + } + switch state { + case 0: + if i+6 >= len(sym.Text) { + // class text does not contain a 'class' + // should be an import + return -1, -1, -1 + } + next6chars := sym.Text[i : i+6] + // heuristics should work with reasonable python code + if next6chars == "class " { + clsnamepos = curpos + state = -1 + } + } + if sym.Text[i] == '\n' { + curpos.Line++ + curpos.Character = 0 + } else { + curpos.Character++ + } + } + + for i, t := range sym.Tokens { + if receiverType == -1 && clsnamepos.Less(t.Location.Range.Start) { + receiverType = i + } + } + + return implType, receiverType, firstMethod +} + +// returns: receiver, typeParams, inputParams, outputParams +func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { + // FunctionSymbol do not return receivers. + // TODO type params in python (nobody uses them) + // reference: https://docs.python.org/3/reference/grammar.html + receiver := -1 + // python actually has these but TODO + typeParams := []int{} + + // Hell, manually parse function text to get locations of key tokens since LSP does not support this... + // + // state 0: goto state 1 when we see a def + // state 1: goto state 2 when we see a ( + // state 2: we're in the param list. + // collect input params by checking entity tokens. + // goto state 3 when we see a ) + // state 3: collect output params. + // finish when we see a : + state := 0 + paren_depth := 0 + // defpos := invalidPos() + lparenpos := invalidPos() + rparenpos := invalidPos() + bodypos := invalidPos() + curpos := sym.Location.Range.Start + for i := range len(sym.Text) { + if state == -1 { + break + } + switch state { + case 0: + if i+4 >= len(sym.Text) { + // function text does not contain a 'def' + // should be an import + return -1, []int{}, []int{}, []int{} + } + next4chars := sym.Text[i : i+4] + // heuristics should work with reasonable python code + if next4chars == "def " { + // defpos = curpos + state = 1 + } + case 1: + if sym.Text[i] == '(' { + lparenpos = curpos + paren_depth = 1 + state = 2 + } + case 2: + if sym.Text[i] == ')' { + rparenpos = curpos + paren_depth -= 1 + if paren_depth == 0 { + state = 3 + } + } + case 3: + if sym.Text[i] == ':' { + bodypos = curpos + state = -1 + } + } + if sym.Text[i] == '\n' { + curpos.Line++ + curpos.Character = 0 + } else { + curpos.Character++ + } + } + + paramsrange := lsp.Range{ + Start: lparenpos, + End: rparenpos, + } + returnrange := lsp.Range{ + Start: rparenpos, + End: bodypos, + } + inputParams := []int{} + outputParams := []int{} + for i, t := range sym.Tokens { + if paramsrange.Include(t.Location.Range) { + if c.IsEntityToken(t) { + inputParams = append(inputParams, i) + } + } + if returnrange.Include(t.Location.Range) { + if c.IsEntityToken(t) { + outputParams = append(outputParams, i) + } + } + } + + return receiver, typeParams, inputParams, outputParams +} + +func (c *PythonSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (string, error) { + panic("TODO") +} diff --git a/lang/rust/repo.go b/lang/rust/repo.go index 0bf2c04..ba81142 100644 --- a/lang/rust/repo.go +++ b/lang/rust/repo.go @@ -27,7 +27,7 @@ import ( "github.com/cloudwego/abcoder/lang/utils" ) -const MaxWaitDuration = 5 * time.Minute +const MaxWaitDuration = 5 * time.Second func CheckRepo(repo string) (string, time.Duration) { // NOTICE: open the Cargo.toml file is required for Rust projects diff --git a/lang/rust/spec.go b/lang/rust/spec.go index f68223f..e40a410 100644 --- a/lang/rust/spec.go +++ b/lang/rust/spec.go @@ -177,7 +177,7 @@ func hasKeyword(tokens []lsp.Token, keyword string) int { return -1 } -func findSpecifiToken(tokens []lsp.Token, typ string, text string) int { +func findSpecificToken(tokens []lsp.Token, typ string, text string) int { for i := 0; i < len(tokens); i++ { if tokens[i].Type == typ && tokens[i].Text == text { return i @@ -217,7 +217,7 @@ func (c *RustSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { // find the impl type token var implType, receiverType = -1, -1 - var fn = start + findSpecifiToken(tokens[start:], "keyword", "fn") + var fn = start + findSpecificToken(tokens[start:], "keyword", "fn") var forToken = findSpecifiTokenUntil(tokens, "keyword", "for", start, fn) for i := start; i < forToken; i++ { @@ -253,15 +253,15 @@ func (c *RustSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, [] } // exclude #[xxx] - fn := start + findSpecifiToken(tokens[start:], "keyword", "fn") + fn := start + findSpecificToken(tokens[start:], "keyword", "fn") if fn < 0 { return -1, nil, nil, nil } - where := start + findSpecifiToken(tokens[start:], "keyword", "where") + where := start + findSpecificToken(tokens[start:], "keyword", "where") if where == -1 { where = len(tokens) - 1 } - lines := utils.CountLinesCached(sym.Text) + lines := utils.CountLinesPooled(sym.Text) // find the typeParam's type token between "fn" and "(" var typeParams []int diff --git a/lang/rust/utils/lsp_test.go b/lang/rust/utils/lsp_test.go index e019b46..d19b6ae 100644 --- a/lang/rust/utils/lsp_test.go +++ b/lang/rust/utils/lsp_test.go @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index 9f93694..844897b 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -28,6 +28,8 @@ type Language string const ( Golang Language = "go" Rust Language = "rust" + Cxx Language = "cxx" + Python Language = "python" Unknown Language = "" ) @@ -37,6 +39,10 @@ func (l Language) String() string { return "rust" case Golang: return "go" + case Cxx: + return "cxx" + case Python: + return "python" default: return string(l) } @@ -53,6 +59,10 @@ func NewLanguage(lang string) (l Language) { return Golang case "rust": return Rust + case "cxx": + return Cxx + case "python": + return Python default: return Unknown } diff --git a/lang/utils/strings.go b/lang/utils/strings.go index 2aeb042..75d77e2 100644 --- a/lang/utils/strings.go +++ b/lang/utils/strings.go @@ -28,7 +28,19 @@ func PutCount(count *[]int) { countPool.Put(count) } -func CountLinesCached(text string) *[]int { +var cachedLines = sync.Map{} + +func CountLinesCached(ident string, text string) *[]int { + if v, ok := cachedLines.Load(ident); ok { + res := v.([]int) + return &res + } + tmp := CountLines(text) + cachedLines.Store(ident, tmp) + return &tmp +} + +func CountLinesPooled(text string) *[]int { tmp := countPool.Get().(*[]int) *tmp = append(*tmp, 0) for i, c := range text { diff --git a/main.go b/main.go index 53e026f..2542bef 100644 --- a/main.go +++ b/main.go @@ -50,6 +50,8 @@ Action: write write the UniAST to the output directory Language: rust for rust codes + cxx for c codes (cpp support is on the way) + python for python codes go for golang codes URI: for action parse: the directory path of the repo @@ -79,6 +81,7 @@ func main() { uri := os.Args[3] flagVerbose := flags.Bool("verbose", false, "Verbose mode.") + flagVeryVerbose := flags.Bool("veryverbose", false, "Very verbose mode.") flagOutput := flags.String("o", "", "Output path.") @@ -89,14 +92,20 @@ func main() { flags.BoolVar(&opts.LoadExternalSymbol, "load-external-symbol", false, "load external symbols into results") flags.BoolVar(&opts.NoNeedComment, "no-need-comment", false, "do not need comment (only works for Go now)") flags.BoolVar(&opts.NeedTest, "need-test", false, "need parse test files (only works for Go now)") + flags.BoolVar(&opts.CacheResults, "cache", false, "cache language server query results") + flags.BoolVar(&opts.MarshalIndent, "indent", false, "indent the marshaled output") flags.Var((*StringArray)(&opts.Excludes), "exclude", "exclude files or directories, support multiple values") flagLsp := flags.String("lsp", "", "Specify the language server path.") flags.Parse(os.Args[4:]) - if flagVerbose != nil && *flagVerbose { + if flagVeryVerbose != nil && *flagVeryVerbose { log.SetLogLevel(log.DebugLevel) opts.Verbose = true } + if flagVerbose != nil && *flagVerbose { + log.SetLogLevel(log.InfoLevel) + opts.Verbose = true + } opts.Language = language if flagLsp != nil { diff --git a/testdata/cduplicate/CMakeLists.txt b/testdata/cduplicate/CMakeLists.txt new file mode 100644 index 0000000..181fffe --- /dev/null +++ b/testdata/cduplicate/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.0) +project(my_combined_project) + +add_subdirectory(d1) + +add_subdirectory(d2) diff --git a/testdata/cduplicate/d1/CMakeLists.txt b/testdata/cduplicate/d1/CMakeLists.txt new file mode 100644 index 0000000..f7f6732 --- /dev/null +++ b/testdata/cduplicate/d1/CMakeLists.txt @@ -0,0 +1 @@ +add_executable(prog1 ../main.c add.c) diff --git a/testdata/cduplicate/d1/add.c b/testdata/cduplicate/d1/add.c new file mode 100644 index 0000000..a60220b --- /dev/null +++ b/testdata/cduplicate/d1/add.c @@ -0,0 +1 @@ +int add(int a){return a+1;} diff --git a/testdata/cduplicate/d2/CMakeLists.txt b/testdata/cduplicate/d2/CMakeLists.txt new file mode 100644 index 0000000..3006db0 --- /dev/null +++ b/testdata/cduplicate/d2/CMakeLists.txt @@ -0,0 +1 @@ +add_executable(prog2 ../main.c add.c) diff --git a/testdata/cduplicate/d2/add.c b/testdata/cduplicate/d2/add.c new file mode 100644 index 0000000..a60220b --- /dev/null +++ b/testdata/cduplicate/d2/add.c @@ -0,0 +1 @@ +int add(int a){return a+1;} diff --git a/testdata/cduplicate/main.c b/testdata/cduplicate/main.c new file mode 100644 index 0000000..2a9264c --- /dev/null +++ b/testdata/cduplicate/main.c @@ -0,0 +1 @@ +extern int add(int); int main(int argc,char**argv){return add(argc);} diff --git a/testdata/cxxsimple/main.c b/testdata/cxxsimple/main.c new file mode 100644 index 0000000..1e7d129 --- /dev/null +++ b/testdata/cxxsimple/main.c @@ -0,0 +1,45 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "pair.h" + +union IntOrChar { + int i; + char c; +}; + +extern int add(int, int); + +#define MAXN 100 +int arr[MAXN]; + +int compare(const void *a, const void *b) { + int int_a = *((int *)a); + int int_b = *((int *)b); + if (int_a < int_b) return -1; + if (int_a > int_b) return 1; + return 0; +} + +int main() { + StructIntPair x; + x.a = 5; + x.b = 6; + swapPair(&x); + struct IntPair y = myself(&x); + return y.a+y.b; +} + diff --git a/testdata/cxxsimple/pair.c b/testdata/cxxsimple/pair.c new file mode 100644 index 0000000..55539e6 --- /dev/null +++ b/testdata/cxxsimple/pair.c @@ -0,0 +1,25 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pair.h" + +void swapPair(StructIntPair *a) { + int c = a->b; + a->b = a->a; + a->a = c; +} + +struct IntPair myself(StructIntPair *a) { + return *a; +} diff --git a/testdata/cxxsimple/pair.h b/testdata/cxxsimple/pair.h new file mode 100644 index 0000000..9da53c3 --- /dev/null +++ b/testdata/cxxsimple/pair.h @@ -0,0 +1,28 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PAIR_H +#define PAIR_H + +struct IntPair { + int a; + int b; +}; +typedef struct IntPair StructIntPair; + +void swapPair(StructIntPair *a); + +struct IntPair myself(StructIntPair *a); + +#endif // PAIR_H diff --git a/testdata/golang/pkg/entity/entity.go b/testdata/golang/pkg/entity/entity.go index 2398349..b0410c3 100644 --- a/testdata/golang/pkg/entity/entity.go +++ b/testdata/golang/pkg/entity/entity.go @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/testdata/golang/pkg/refer.go b/testdata/golang/pkg/refer.go index 75ade87..badfd41 100644 --- a/testdata/golang/pkg/refer.go +++ b/testdata/golang/pkg/refer.go @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/testdata/pyglobvar/main.py b/testdata/pyglobvar/main.py new file mode 100644 index 0000000..4152fd3 --- /dev/null +++ b/testdata/pyglobvar/main.py @@ -0,0 +1,22 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def foo(): + return 2 + +def bar(): + return foo() + +v = foo() diff --git a/testdata/pyimport/main.py b/testdata/pyimport/main.py new file mode 100644 index 0000000..a4b8125 --- /dev/null +++ b/testdata/pyimport/main.py @@ -0,0 +1,24 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union + +# similar to rust's `pub use`. + + +def main(): + pass + + +# main.py::types should not include Union diff --git a/testdata/pysimpleobj/main.py b/testdata/pysimpleobj/main.py new file mode 100644 index 0000000..5c672ce --- /dev/null +++ b/testdata/pysimpleobj/main.py @@ -0,0 +1,27 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Foo: + def __init__(self): + self.x = 5 + + def bar(self, v: int) -> int: + self.x += v + return self.x + + +def main(): + f = Foo() + f.bar(6) diff --git a/testdata/pythonsimple/test.py b/testdata/pythonsimple/test.py new file mode 100644 index 0000000..7be5114 --- /dev/null +++ b/testdata/pythonsimple/test.py @@ -0,0 +1,85 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union +from test2 import IntPair +from test3 import * + + +def swap_pair(pair: IntPair) -> None: + """ + Swaps the values of a and b in an IntPair. + Note: The original Rust code had a logical error if a swap was intended; + it would result in both pair.a and pair.b being set to the original value of pair.a. + This Python version implements a correct swap. + """ + pair.a, pair.b = pair.b, pair.a + + +from test3 import * + + +def add(a: int, b: int) -> int: + return a + b + + +def compare(a: int, b: int) -> int: + if a < b: + return -1 + elif a > b: + return 1 + else: + return 0 + + +IntOrChar = Union[IntVariant, CharVariant] +# TODO: global var not suppported +globalvar = 5 + + +def main() -> None: + global globalvar + globalvar = 65 + + ls = list((1, 2)) + + x = add(2, 3) + print(x) + + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") + swap_pair(my_pair) + print(f"Swapped pair: {my_pair}") + print(f"my_pair.sum = {my_pair.sum()}") + + val1: IntOrChar = IntVariant(123) + val2: IntOrChar = CharVariant(ord("A")) + + print(f"IntOrChar 1: {val1}") + print(f"IntOrChar 2: {val2}") + + if isinstance(val1, IntVariant): + print(f"val1 is an IntVariant with value: {val1.value}") + if isinstance(val2, CharVariant): + print( + f"val2 is a CharVariant with u8 value: {val2.value} (char: '{chr(val2.value)}')" + ) + + print(f"Comparing 5 and 10: {compare(5, 10)}") + print(f"Comparing 10 and 5: {compare(10, 5)}") + print(f"Comparing 7 and 7: {compare(7, 7)}") + + +if __name__ == "__main__": + main() diff --git a/testdata/pythonsimple/test2.py b/testdata/pythonsimple/test2.py new file mode 100644 index 0000000..7e5f2c2 --- /dev/null +++ b/testdata/pythonsimple/test2.py @@ -0,0 +1,28 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from dataclasses import dataclass + + +@dataclass +class IntPair: + a: int + b: int + + def sum(self): + return self.a + self.b + + +def main() -> None: + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") diff --git a/testdata/pythonsimple/test3.py b/testdata/pythonsimple/test3.py new file mode 100644 index 0000000..5812e0d --- /dev/null +++ b/testdata/pythonsimple/test3.py @@ -0,0 +1,31 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class IntVariant: + def __init__(self, value: int): + self.value: int = value + + def __repr__(self) -> str: + return f"IntVariant({self.value})" + + +class CharVariant: + def __init__(self, value: int): + if not (0 <= value <= 255): + raise ValueError( + "CharVariant value must be an integer between 0 and 255 (u8 equivalent)" + ) + self.value: int = value + + def __repr__(self) -> str: + return f"CharVariant(value={self.value}, char='{chr(self.value)}')" diff --git a/testdata/pythonsingle/main.py b/testdata/pythonsingle/main.py new file mode 100644 index 0000000..ea3a402 --- /dev/null +++ b/testdata/pythonsingle/main.py @@ -0,0 +1,98 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from dataclasses import dataclass +from typing import Union + + +@dataclass +class IntPair: + a: int + b: int + + +def swap_pair(pair: IntPair) -> None: + """ + Swaps the values of a and b in an IntPair. + Note: The original Rust code had a logical error if a swap was intended; + it would result in both pair.a and pair.b being set to the original value of pair.a. + This Python version implements a correct swap. + """ + pair.a, pair.b = pair.b, pair.a + + +class IntVariant: + def __init__(self, value: int): + self.value: int = value + + def __repr__(self) -> str: + return f"IntVariant({self.value})" + + +class CharVariant: + def __init__(self, value: int): + if not (0 <= value <= 255): + raise ValueError( + "CharVariant value must be an integer between 0 and 255 (u8 equivalent)" + ) + self.value: int = value + + def __repr__(self) -> str: + return f"CharVariant(value={self.value}, char='{chr(self.value)}')" + + +IntOrChar = Union[IntVariant, CharVariant] + + +def add(a: int, b: int) -> int: + return a + b + + +def compare(a: int, b: int) -> int: + if a < b: + return -1 + elif a > b: + return 1 + else: + return 0 + + +def main() -> None: + x = add(2, 3) + print(x) + + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") + swap_pair(my_pair) + print(f"Swapped pair: {my_pair}") + + val1: IntOrChar = IntVariant(123) + val2: IntOrChar = CharVariant(ord("A")) + + print(f"IntOrChar 1: {val1}") + print(f"IntOrChar 2: {val2}") + + if isinstance(val1, IntVariant): + print(f"val1 is an IntVariant with value: {val1.value}") + if isinstance(val2, CharVariant): + print( + f"val2 is a CharVariant with u8 value: {val2.value} (char: '{chr(val2.value)}')" + ) + + print(f"Comparing 5 and 10: {compare(5, 10)}") + print(f"Comparing 10 and 5: {compare(10, 5)}") + print(f"Comparing 7 and 7: {compare(7, 7)}") + + +if __name__ == "__main__": + main() diff --git a/testdata/rustsimpleobj/Cargo.toml b/testdata/rustsimpleobj/Cargo.toml new file mode 100644 index 0000000..0ba2a47 --- /dev/null +++ b/testdata/rustsimpleobj/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "rustsimpleobj" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/testdata/rustsimpleobj/src/main.rs b/testdata/rustsimpleobj/src/main.rs new file mode 100644 index 0000000..2efed47 --- /dev/null +++ b/testdata/rustsimpleobj/src/main.rs @@ -0,0 +1,35 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +struct Foo(u32); + +impl Foo { + pub fn new(value: u32) -> Self { + Foo(value) + } + + pub fn bar(&mut self, increment: u32) { + self.0 += increment; + } + + pub fn faz(&mut self, decrement: u32) { + self.0 -= decrement; + } +} + +fn main() { + let mut my_foo = Foo::new(10); + my_foo.bar(5); + my_foo.faz(5); +}