From 7e80be509d71f31f6859a48184bf1cbf6d33f582 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 30 Apr 2025 17:20:16 +0800 Subject: [PATCH 01/32] fix: fix hardcode in export.go --- lang/collect/export.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lang/collect/export.go b/lang/collect/export.go index 3101d8a..b57ea42 100644 --- a/lang/collect/export.go +++ b/lang/collect/export.go @@ -48,8 +48,8 @@ func (c *Collector) fileLine(loc Location) uniast.FileLine { } } -func newModule(name string, dir string) *uniast.Module { - ret := uniast.NewModule(name, dir, uniast.Rust) +func newModule(name string, dir string, lang uniast.Language) *uniast.Module { + ret := uniast.NewModule(name, dir, lang) return ret } @@ -67,7 +67,7 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) { if err != nil { return nil, err } - repo.Modules[name] = newModule(name, rel) + repo.Modules[name] = newModule(name, rel, c.Language) } // not allow local symbols inside another symbol @@ -83,11 +83,13 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) { } // patch module - for p, m := range repo.Modules { - if p == "" || strings.Contains(p, "@") { - continue + if c.modPatcher != nil { + for p, m := range repo.Modules { + if p == "" || strings.Contains(p, "@") { + continue + } + c.modPatcher.Patch(m) } - c.modPatcher.Patch(m) } return &repo, nil @@ -140,7 +142,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol } if repo.Modules[mod] == nil { - repo.Modules[mod] = newModule(mod, "") + repo.Modules[mod] = newModule(mod, "", c.Language) } module := repo.Modules[mod] if repo.Modules[mod].Packages[path] == nil { From ddca707b1fbb6af8f0f438e92a4617c30ec0433a Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 30 Apr 2025 17:24:19 +0800 Subject: [PATCH 02/32] feat: cxx/c parser skeleton with clangd-18 --- lang/collect/collect.go | 3 ++ lang/cxx/lib.go | 41 +++++++++++++++++++ lang/cxx/spec.go | 83 +++++++++++++++++++++++++++++++++++++++ lang/parse.go | 6 +++ lang/uniast/ast.go | 5 +++ main.go | 1 + testdata/cxxsimple/main.c | 45 +++++++++++++++++++++ testdata/cxxsimple/pair.c | 25 ++++++++++++ testdata/cxxsimple/pair.h | 28 +++++++++++++ 9 files changed, 237 insertions(+) create mode 100644 lang/cxx/lib.go create mode 100644 lang/cxx/spec.go create mode 100644 testdata/cxxsimple/main.c create mode 100644 testdata/cxxsimple/pair.c create mode 100644 testdata/cxxsimple/pair.h diff --git a/lang/collect/collect.go b/lang/collect/collect.go index c68f869..22f18a6 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -22,6 +22,7 @@ import ( "strings" "unicode" + "github.com/cloudwego/abcoder/lang/cxx" "github.com/cloudwego/abcoder/lang/log" . "github.com/cloudwego/abcoder/lang/lsp" "github.com/cloudwego/abcoder/lang/rust" @@ -79,6 +80,8 @@ func switchSpec(l uniast.Language) LanguageSpec { switch l { case uniast.Rust: return &rust.RustSpec{} + case uniast.Cxx: + return &cxx.CxxSpec{} default: panic(fmt.Sprintf("unsupported language %s", l)) } diff --git a/lang/cxx/lib.go b/lang/cxx/lib.go new file mode 100644 index 0000000..70a82b7 --- /dev/null +++ b/lang/cxx/lib.go @@ -0,0 +1,41 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cxx + +import ( + "time" + + "github.com/cloudwego/abcoder/lang/uniast" + "github.com/cloudwego/abcoder/lang/utils" +) + +const MaxWaitDuration = 5 * time.Minute + +func GetDefaultLSP() (lang uniast.Language, name string) { + return uniast.Cxx, "clangd-18" +} + +func CheckRepo(repo string) (string, time.Duration) { + openfile := "" + // TODO: check if the project compiles. + + // NOTICE: wait for Rust projects based on code files + _, size := utils.CountFiles(repo, ".c", "SKIPDIR") + wait := 2*time.Second + time.Second*time.Duration(size/1024) + if wait > MaxWaitDuration { + wait = MaxWaitDuration + } + return openfile, wait +} diff --git a/lang/cxx/spec.go b/lang/cxx/spec.go new file mode 100644 index 0000000..243cbcc --- /dev/null +++ b/lang/cxx/spec.go @@ -0,0 +1,83 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cxx + +import ( + lsp "github.com/cloudwego/abcoder/lang/lsp" +) + +type CxxSpec struct { + repo string +} + +func NewCxxSpec() *CxxSpec { + return &CxxSpec{} +} + +func (c *CxxSpec) WorkSpace(root string) (map[string]string, error) { + panic("TODO") +} + +func (c *CxxSpec) NameSpace(path string) (string, string, error) { + panic("TODO") +} + +func (c *CxxSpec) ShouldSkip(path string) bool { + panic("TODO") +} + +func (c *CxxSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { + panic("TODO") +} + +func (c *CxxSpec) IsEntityToken(tok lsp.Token) bool { + panic("TODO") +} + +func (c *CxxSpec) IsStdToken(tok lsp.Token) bool { + panic("TODO") +} + +func (c *CxxSpec) TokenKind(tok lsp.Token) lsp.SymbolKind { + panic("TODO") +} + +func (c *CxxSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *CxxSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *CxxSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *CxxSpec) HasImplSymbol() bool { + panic("TODO") +} + +func (c *CxxSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { + panic("TODO") +} + +func (c *CxxSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { + panic("TODO") +} + +func (c *CxxSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (string, error) { + panic("TODO") +} diff --git a/lang/parse.go b/lang/parse.go index 95f3d77..e44bd34 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -26,6 +26,7 @@ import ( "time" "github.com/cloudwego/abcoder/lang/collect" + "github.com/cloudwego/abcoder/lang/cxx" "github.com/cloudwego/abcoder/lang/golang/parser" "github.com/cloudwego/abcoder/lang/log" "github.com/cloudwego/abcoder/lang/lsp" @@ -94,6 +95,9 @@ func checkRepoPath(repoPath string, language uniast.Language) (openfile string, case uniast.Rust: // NOTICE: open the Cargo.toml file is required for Rust projects openfile, wait = rust.CheckRepo(repoPath) + case uniast.Cxx: + // NOTICE: open the Cargo.toml file is required for Rust projects + openfile, wait = cxx.CheckRepo(repoPath) default: openfile = "" wait = 0 @@ -107,6 +111,8 @@ func checkLSP(language uniast.Language, lspPath string) (l uniast.Language, s st switch language { case uniast.Rust: l, s = rust.GetDefaultLSP() + case uniast.Cxx: + l, s = cxx.GetDefaultLSP() case uniast.Golang: l = uniast.Golang s = "" diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index 9f93694..f6e8416 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -28,6 +28,7 @@ type Language string const ( Golang Language = "go" Rust Language = "rust" + Cxx Language = "cxx" Unknown Language = "" ) @@ -37,6 +38,8 @@ func (l Language) String() string { return "rust" case Golang: return "go" + case Cxx: + return "cxx" default: return string(l) } @@ -53,6 +56,8 @@ func NewLanguage(lang string) (l Language) { return Golang case "rust": return Rust + case "cxx": + return Cxx default: return Unknown } diff --git a/main.go b/main.go index 53e026f..dcd1d33 100644 --- a/main.go +++ b/main.go @@ -50,6 +50,7 @@ Action: write write the UniAST to the output directory Language: rust for rust codes + cxx for c codes (cpp support is on the way) go for golang codes URI: for action parse: the directory path of the repo diff --git a/testdata/cxxsimple/main.c b/testdata/cxxsimple/main.c new file mode 100644 index 0000000..1e7d129 --- /dev/null +++ b/testdata/cxxsimple/main.c @@ -0,0 +1,45 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "pair.h" + +union IntOrChar { + int i; + char c; +}; + +extern int add(int, int); + +#define MAXN 100 +int arr[MAXN]; + +int compare(const void *a, const void *b) { + int int_a = *((int *)a); + int int_b = *((int *)b); + if (int_a < int_b) return -1; + if (int_a > int_b) return 1; + return 0; +} + +int main() { + StructIntPair x; + x.a = 5; + x.b = 6; + swapPair(&x); + struct IntPair y = myself(&x); + return y.a+y.b; +} + diff --git a/testdata/cxxsimple/pair.c b/testdata/cxxsimple/pair.c new file mode 100644 index 0000000..55539e6 --- /dev/null +++ b/testdata/cxxsimple/pair.c @@ -0,0 +1,25 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pair.h" + +void swapPair(StructIntPair *a) { + int c = a->b; + a->b = a->a; + a->a = c; +} + +struct IntPair myself(StructIntPair *a) { + return *a; +} diff --git a/testdata/cxxsimple/pair.h b/testdata/cxxsimple/pair.h new file mode 100644 index 0000000..9da53c3 --- /dev/null +++ b/testdata/cxxsimple/pair.h @@ -0,0 +1,28 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PAIR_H +#define PAIR_H + +struct IntPair { + int a; + int b; +}; +typedef struct IntPair StructIntPair; + +void swapPair(StructIntPair *a); + +struct IntPair myself(StructIntPair *a); + +#endif // PAIR_H From 5ba1764f3cbdab9ca26b950a3dc1725ddfab690c Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 30 Apr 2025 17:36:57 +0800 Subject: [PATCH 03/32] feat: initial support for cxx collect 1. Since clangd does not support semanticTokens/range method, use semanticTokens/full + filtering to emulate. 2. Since the concept of package and module does not apply to C/C++, treat the whole repo as a single package/module. --- lang/cxx/spec.go | 140 ++++++++++++++++++++++++++++++++++++++++++----- lang/lsp/lsp.go | 55 ++++++++++++++++++- 2 files changed, 181 insertions(+), 14 deletions(-) diff --git a/lang/cxx/spec.go b/lang/cxx/spec.go index 243cbcc..6e1f753 100644 --- a/lang/cxx/spec.go +++ b/lang/cxx/spec.go @@ -15,7 +15,12 @@ package cxx import ( + "fmt" + "path/filepath" + "strings" + lsp "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/utils" ) type CxxSpec struct { @@ -26,24 +31,60 @@ func NewCxxSpec() *CxxSpec { return &CxxSpec{} } +// XXX: maybe multi module support for C++? func (c *CxxSpec) WorkSpace(root string) (map[string]string, error) { - panic("TODO") + c.repo = root + rets := map[string]string{} + absPath, err := filepath.Abs(root) + if err != nil { + return nil, fmt.Errorf("failed to get absolute path: %w", err) + } + rets["current"] = absPath + return rets, nil } +// returns: mod, path, error func (c *CxxSpec) NameSpace(path string) (string, string, error) { - panic("TODO") + // external lib: only standard library (system headers), in /usr/ + if !strings.HasPrefix(path, c.repo) { + if strings.HasPrefix(path, "/usr") { + // assume it is c system library + return "cstdlib", "cstdlib", nil + } + panic(fmt.Sprintf("external lib: %s\n", path)) + } + + return "current", "current", nil + } func (c *CxxSpec) ShouldSkip(path string) bool { - panic("TODO") + if !strings.HasSuffix(path, ".c") { + return true + } + return false +} + +func (c *CxxSpec) IsDocToken(tok lsp.Token) bool { + return tok.Type == "comment" } func (c *CxxSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { - panic("TODO") + for i, t := range sym.Tokens { + if c.IsDocToken(t) { + continue + } + for _, m := range t.Modifiers { + if m == "declaration" { + return i + } + } + } + return -1 } func (c *CxxSpec) IsEntityToken(tok lsp.Token) bool { - panic("TODO") + return tok.Type == "class" || tok.Type == "function" || tok.Type == "variable" } func (c *CxxSpec) IsStdToken(tok lsp.Token) bool { @@ -51,23 +92,55 @@ func (c *CxxSpec) IsStdToken(tok lsp.Token) bool { } func (c *CxxSpec) TokenKind(tok lsp.Token) lsp.SymbolKind { - panic("TODO") + switch tok.Type { + case "class": + return lsp.SKStruct + case "enum": + return lsp.SKEnum + case "enumMember": + return lsp.SKEnumMember + case "function", "macro": + return lsp.SKFunction + // rust spec does not treat parameter as a variable + case "parameter": + return lsp.SKVariable + case "typeParameter": + return lsp.SKTypeParameter + // type: TODO + case "interface", "concept", "method", "modifier", "namespace", "type": + panic(fmt.Sprintf("Unsupported token type: %s at %+v\n", tok.Type, tok.Location)) + case "bracket", "comment", "label", "operator", "property", "unknown": + return lsp.SKUnknown + } + panic(fmt.Sprintf("Weird token type: %s at %+v\n", tok.Type, tok.Location)) } func (c *CxxSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { - panic("TODO") + return sym.Kind == lsp.SKFunction && sym.Name == "main" } func (c *CxxSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { - panic("TODO") -} + typ := sym.Kind + return typ == lsp.SKFunction || typ == lsp.SKVariable || typ == lsp.SKClass -func (c *CxxSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { - panic("TODO") } +func (c *CxxSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { + id := c.DeclareTokenOfSymbol(sym) + if id == -1 { + return false + } + for _, m := range sym.Tokens[id].Modifiers { + if m == "globalScope" { + return true + } + } + return false +} + +// TODO(cpp): support C++ OOP func (c *CxxSpec) HasImplSymbol() bool { - panic("TODO") + return false } func (c *CxxSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { @@ -75,7 +148,48 @@ func (c *CxxSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { } func (c *CxxSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { - panic("TODO") + // No receiver and no type params for C + if sym.Kind != lsp.SKFunction { + return -1, nil, nil, nil + } + receiver := -1 + typeParams := []int{} + inputParams := []int{} + outputs := []int{} + + // general format: RETURNVALUE NAME "(" PARAMS ")" BODY + // -------- + // fnNameText + // state machine phase 0 phase 1 phase 2: break + // TODO: attributes may contain parens. also inline structs. + + endRelOffset := 0 + lines := utils.CountLinesCached(sym.Text) + phase := 0 + for i, tok := range sym.Tokens { + switch phase { + case 0: + if tok.Type == "function" { + offset := lsp.RelativePostionWithLines(*lines, sym.Location.Range.Start, tok.Location.Range.Start) + endRelOffset = offset + strings.Index(sym.Text[offset:], ")") + phase = 1 + continue + } + if c.IsEntityToken(tok) { + outputs = append(outputs, i) + } + case 1: + offset := lsp.RelativePostionWithLines(*lines, sym.Location.Range.Start, tok.Location.Range.Start) + if offset > endRelOffset { + phase = 2 + continue + } + if c.IsEntityToken(tok) { + inputParams = append(inputParams, i) + } + } + } + return receiver, typeParams, inputParams, outputs } func (c *CxxSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (string, error) { diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go index 21fb843..e226326 100644 --- a/lang/lsp/lsp.go +++ b/lang/lsp/lsp.go @@ -24,6 +24,7 @@ import ( "sort" "strings" + "github.com/cloudwego/abcoder/lang/uniast" "github.com/cloudwego/abcoder/lang/utils" "github.com/sourcegraph/go-lsp" ) @@ -284,6 +285,57 @@ func (cli *LSPClient) References(ctx context.Context, id Location) ([]Location, return resp, nil } +// TODO(perf): cache results especially for whole file queries. +// TODO(refactor): infer use_full_method from capabilities +func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens, use_full_method bool) error { + if use_full_method { + req1 := struct { + TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` + }{TextDocument: req.TextDocument} + if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { + return err + } + filterSemanticTokensInRange(resp, req.Range) + } else { + if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, resp); err != nil { + return err + } + } + return nil +} + +func filterSemanticTokensInRange(resp *SemanticTokens, r Range) { + // LSP starts from 0:0 but the project seems to use 1:1 (see collect PositionOffset) + curPos := Position{ + Line: 0, + Character: 0, + } + newData := []uint32{} + includedIs := []int{} + for i := 0; i < len(resp.Data); i += 5 { + deltaLine := int(resp.Data[i]) + deltaStart := int(resp.Data[i+1]) + if deltaLine != 0 { + curPos.Line += deltaLine + curPos.Character = deltaStart + } else { + curPos.Character += deltaStart + } + if isPositionInRange(curPos, r, true) { + if len(newData) == 0 { + // add range start to initial delta + newData = append(newData, resp.Data[i:i+5]...) + newData[0] = uint32(curPos.Line) + newData[1] = uint32(curPos.Character) + } else { + newData = append(newData, resp.Data[i:i+5]...) + } + includedIs = append(includedIs, i) + } + } + resp.Data = newData +} + func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, error) { // open file first syms, err := cli.DocumentSymbols(ctx, id.URI) @@ -304,7 +356,8 @@ func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, } var resp SemanticTokens - if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, &resp); err != nil { + if err := cli.getSemanticTokensRange(ctx, req, &resp, cli.Language == uniast.Cxx); err != nil { + return nil, err } From 83847e709c1a8cc77febcb12a9149f3036526573 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 30 Apr 2025 18:49:47 +0800 Subject: [PATCH 04/32] fix: get json.Types and build graph --- lang/collect/export.go | 9 ++++++--- lang/cxx/spec.go | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lang/collect/export.go b/lang/collect/export.go index b57ea42..b35bd27 100644 --- a/lang/collect/export.go +++ b/lang/collect/export.go @@ -286,7 +286,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol obj.GlobalVars = make([]uniast.Dependency, 0, len(deps)) } obj.GlobalVars = uniast.InsertDependency(obj.GlobalVars, pdep) - case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum: + case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass: if obj.Types == nil { obj.Types = make([]uniast.Dependency, 0, len(deps)) } @@ -300,7 +300,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol pkg.Functions[id.Name] = obj // Type - case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum: + case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass: obj := &uniast.Type{ FileLine: fileLine, Content: content, @@ -317,7 +317,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol continue } switch dep.Symbol.Kind { - case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum: + case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass: obj.SubStruct = append(obj.SubStruct, uniast.NewDependency(*depid, c.fileLine(dep.Location))) default: log.Error("dep symbol %s not collected for \n", dep.Symbol, id) @@ -370,6 +370,9 @@ func mapKind(kind lsp.SymbolKind) uniast.TypeKind { switch kind { case lsp.SKStruct: return "struct" + // XXX: C++ should use class instead of struct + case lsp.SKClass: + return "struct" case lsp.SKTypeParameter: return "type-parameter" case lsp.SKInterface: diff --git a/lang/cxx/spec.go b/lang/cxx/spec.go index 6e1f753..487da54 100644 --- a/lang/cxx/spec.go +++ b/lang/cxx/spec.go @@ -59,10 +59,10 @@ func (c *CxxSpec) NameSpace(path string) (string, string, error) { } func (c *CxxSpec) ShouldSkip(path string) bool { - if !strings.HasSuffix(path, ".c") { - return true + if strings.HasSuffix(path, ".c") || strings.HasSuffix(path, ".h") { + return false } - return false + return true } func (c *CxxSpec) IsDocToken(tok lsp.Token) bool { From 0ef54acd7071f16b9c23b3f2b1a2763f9da8ced2 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 9 May 2025 18:14:17 +0800 Subject: [PATCH 05/32] fix: fix comments --- lang/lsp/lsp.go | 1 - lang/parse.go | 1 - 2 files changed, 2 deletions(-) diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go index e226326..d4a85b7 100644 --- a/lang/lsp/lsp.go +++ b/lang/lsp/lsp.go @@ -305,7 +305,6 @@ func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRa } func filterSemanticTokensInRange(resp *SemanticTokens, r Range) { - // LSP starts from 0:0 but the project seems to use 1:1 (see collect PositionOffset) curPos := Position{ Line: 0, Character: 0, diff --git a/lang/parse.go b/lang/parse.go index e44bd34..b902f22 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -96,7 +96,6 @@ func checkRepoPath(repoPath string, language uniast.Language) (openfile string, // NOTICE: open the Cargo.toml file is required for Rust projects openfile, wait = rust.CheckRepo(repoPath) case uniast.Cxx: - // NOTICE: open the Cargo.toml file is required for Rust projects openfile, wait = cxx.CheckRepo(repoPath) default: openfile = "" From fe8fb67ff9326bb615f2cfabadfb49889a5c248c Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 9 May 2025 18:22:59 +0800 Subject: [PATCH 06/32] test: duplicate c functions --- testdata/cduplicate/CMakeLists.txt | 6 ++++++ testdata/cduplicate/d1/CMakeLists.txt | 1 + testdata/cduplicate/d1/add.c | 1 + testdata/cduplicate/d2/CMakeLists.txt | 1 + testdata/cduplicate/d2/add.c | 1 + testdata/cduplicate/main.c | 1 + 6 files changed, 11 insertions(+) create mode 100644 testdata/cduplicate/CMakeLists.txt create mode 100644 testdata/cduplicate/d1/CMakeLists.txt create mode 100644 testdata/cduplicate/d1/add.c create mode 100644 testdata/cduplicate/d2/CMakeLists.txt create mode 100644 testdata/cduplicate/d2/add.c create mode 100644 testdata/cduplicate/main.c diff --git a/testdata/cduplicate/CMakeLists.txt b/testdata/cduplicate/CMakeLists.txt new file mode 100644 index 0000000..181fffe --- /dev/null +++ b/testdata/cduplicate/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.0) +project(my_combined_project) + +add_subdirectory(d1) + +add_subdirectory(d2) diff --git a/testdata/cduplicate/d1/CMakeLists.txt b/testdata/cduplicate/d1/CMakeLists.txt new file mode 100644 index 0000000..f7f6732 --- /dev/null +++ b/testdata/cduplicate/d1/CMakeLists.txt @@ -0,0 +1 @@ +add_executable(prog1 ../main.c add.c) diff --git a/testdata/cduplicate/d1/add.c b/testdata/cduplicate/d1/add.c new file mode 100644 index 0000000..a60220b --- /dev/null +++ b/testdata/cduplicate/d1/add.c @@ -0,0 +1 @@ +int add(int a){return a+1;} diff --git a/testdata/cduplicate/d2/CMakeLists.txt b/testdata/cduplicate/d2/CMakeLists.txt new file mode 100644 index 0000000..3006db0 --- /dev/null +++ b/testdata/cduplicate/d2/CMakeLists.txt @@ -0,0 +1 @@ +add_executable(prog2 ../main.c add.c) diff --git a/testdata/cduplicate/d2/add.c b/testdata/cduplicate/d2/add.c new file mode 100644 index 0000000..a60220b --- /dev/null +++ b/testdata/cduplicate/d2/add.c @@ -0,0 +1 @@ +int add(int a){return a+1;} diff --git a/testdata/cduplicate/main.c b/testdata/cduplicate/main.c new file mode 100644 index 0000000..2a9264c --- /dev/null +++ b/testdata/cduplicate/main.c @@ -0,0 +1 @@ +extern int add(int); int main(int argc,char**argv){return add(argc);} From 8d868e80c16438fe79e32452eeba81a8ae624651 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 14 May 2025 16:03:50 +0800 Subject: [PATCH 07/32] feat: python parser skeleton with custom pylsp Custom pylsp is based on [python-lsp-server](https://github.com/python-lsp/python-lsp-server), and plus the following pull requests: 1. semanticTokens/full: https://github.com/python-lsp/python-lsp-server/pull/645 2. typeDefinition: https://github.com/python-lsp/python-lsp-server/pull/533 Maybe also 3. implementation: https://github.com/python-lsp/python-lsp-server/pull/644 --- README.md | 1 + lang/collect/collect.go | 3 ++ lang/parse.go | 6 +++ lang/python/lib.go | 42 +++++++++++++++++++++ lang/python/spec.go | 83 +++++++++++++++++++++++++++++++++++++++++ lang/uniast/ast.go | 5 +++ main.go | 1 + 7 files changed, 141 insertions(+) create mode 100644 lang/python/lib.go create mode 100644 lang/python/spec.go diff --git a/README.md b/README.md index 08119de..1e3ee01 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ ABCoder currently supports the following languages: | Go | ✅ | ✅ | | Rust | ✅ | Coming Soon | | C | Coming Soon | ❌ | +| Python | Coming Soon | ❌ | diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 22f18a6..dbbc12f 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -23,6 +23,7 @@ import ( "unicode" "github.com/cloudwego/abcoder/lang/cxx" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/log" . "github.com/cloudwego/abcoder/lang/lsp" "github.com/cloudwego/abcoder/lang/rust" @@ -82,6 +83,8 @@ func switchSpec(l uniast.Language) LanguageSpec { return &rust.RustSpec{} case uniast.Cxx: return &cxx.CxxSpec{} + case uniast.Python: + return &python.PythonSpec{} default: panic(fmt.Sprintf("unsupported language %s", l)) } diff --git a/lang/parse.go b/lang/parse.go index b902f22..a40044f 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -27,6 +27,7 @@ import ( "github.com/cloudwego/abcoder/lang/collect" "github.com/cloudwego/abcoder/lang/cxx" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/golang/parser" "github.com/cloudwego/abcoder/lang/log" "github.com/cloudwego/abcoder/lang/lsp" @@ -97,6 +98,9 @@ func checkRepoPath(repoPath string, language uniast.Language) (openfile string, openfile, wait = rust.CheckRepo(repoPath) case uniast.Cxx: openfile, wait = cxx.CheckRepo(repoPath) + case uniast.Python: + // NOTICE: open the Cargo.toml file is required for Rust projects + openfile, wait = python.CheckRepo(repoPath) default: openfile = "" wait = 0 @@ -112,6 +116,8 @@ func checkLSP(language uniast.Language, lspPath string) (l uniast.Language, s st l, s = rust.GetDefaultLSP() case uniast.Cxx: l, s = cxx.GetDefaultLSP() + case uniast.Python: + l, s = python.GetDefaultLSP() case uniast.Golang: l = uniast.Golang s = "" diff --git a/lang/python/lib.go b/lang/python/lib.go new file mode 100644 index 0000000..2639787 --- /dev/null +++ b/lang/python/lib.go @@ -0,0 +1,42 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "time" + + "github.com/cloudwego/abcoder/lang/uniast" + "github.com/cloudwego/abcoder/lang/utils" +) + +const MaxWaitDuration = 5 * time.Minute + +func GetDefaultLSP() (lang uniast.Language, name string) { + // needs to use the pylsp from https://github.com/python-lsp/python-lsp-server/pull/533 + return uniast.Python, "pylsp" +} + +func CheckRepo(repo string) (string, time.Duration) { + openfile := "" + // TODO: check if the project compiles. + + // NOTICE: wait for Rust projects based on code files + _, size := utils.CountFiles(repo, ".py", "SKIPDIR") + wait := 2*time.Second + time.Second*time.Duration(size/1024) + if wait > MaxWaitDuration { + wait = MaxWaitDuration + } + return openfile, wait +} diff --git a/lang/python/spec.go b/lang/python/spec.go new file mode 100644 index 0000000..a9232c6 --- /dev/null +++ b/lang/python/spec.go @@ -0,0 +1,83 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + lsp "github.com/cloudwego/abcoder/lang/lsp" +) + +type PythonSpec struct { + repo string +} + +func NewPythonSpec() *PythonSpec { + return &PythonSpec{} +} + +func (c *PythonSpec) WorkSpace(root string) (map[string]string, error) { + panic("TODO") +} + +func (c *PythonSpec) NameSpace(path string) (string, string, error) { + panic("TODO") +} + +func (c *PythonSpec) ShouldSkip(path string) bool { + panic("TODO") +} + +func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { + panic("TODO") +} + +func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { + panic("TODO") +} + +func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { + panic("TODO") +} + +func (c *PythonSpec) TokenKind(tok lsp.Token) lsp.SymbolKind { + panic("TODO") +} + +func (c *PythonSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { + panic("TODO") +} + +func (c *PythonSpec) HasImplSymbol() bool { + panic("TODO") +} + +func (c *PythonSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { + panic("TODO") +} + +func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { + panic("TODO") +} + +func (c *PythonSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (string, error) { + panic("TODO") +} diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index f6e8416..38ce58a 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -29,6 +29,7 @@ const ( Golang Language = "go" Rust Language = "rust" Cxx Language = "cxx" + Python Language = "python" Unknown Language = "" ) @@ -40,6 +41,8 @@ func (l Language) String() string { return "go" case Cxx: return "cxx" + case Python: + return "python" default: return string(l) } @@ -58,6 +61,8 @@ func NewLanguage(lang string) (l Language) { return Rust case "cxx": return Cxx + case "python": + return Python default: return Unknown } diff --git a/main.go b/main.go index dcd1d33..6ccc960 100644 --- a/main.go +++ b/main.go @@ -51,6 +51,7 @@ Action: Language: rust for rust codes cxx for c codes (cpp support is on the way) + python for python codes go for golang codes URI: for action parse: the directory path of the repo From 92dbe3b32d12f8eb15087e56e0a948640fd84c92 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 14 May 2025 16:08:01 +0800 Subject: [PATCH 08/32] feat: add tests for python parser --- testdata/pythonsimple/test.py | 65 ++++++++++++++++++++++++++ testdata/pythonsimple/test2.py | 12 +++++ testdata/pythonsimple/test3.py | 18 +++++++ testdata/pythonsingle/main.py | 85 ++++++++++++++++++++++++++++++++++ 4 files changed, 180 insertions(+) create mode 100644 testdata/pythonsimple/test.py create mode 100644 testdata/pythonsimple/test2.py create mode 100644 testdata/pythonsimple/test3.py create mode 100644 testdata/pythonsingle/main.py diff --git a/testdata/pythonsimple/test.py b/testdata/pythonsimple/test.py new file mode 100644 index 0000000..36a9022 --- /dev/null +++ b/testdata/pythonsimple/test.py @@ -0,0 +1,65 @@ +from typing import Union +from test2 import IntPair +from test3 import * + + +def swap_pair(pair: IntPair) -> None: + """ + Swaps the values of a and b in an IntPair. + Note: The original Rust code had a logical error if a swap was intended; + it would result in both pair.a and pair.b being set to the original value of pair.a. + This Python version implements a correct swap. + """ + pair.a, pair.b = pair.b, pair.a + + +from test3 import * + + +def add(a: int, b: int) -> int: + return a + b + + +def compare(a: int, b: int) -> int: + if a < b: + return -1 + elif a > b: + return 1 + else: + return 0 + + +IntOrChar = Union[IntVariant, CharVariant] + + +def main() -> None: + ls = list((1, 2)) + + x = add(2, 3) + print(x) + + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") + swap_pair(my_pair) + print(f"Swapped pair: {my_pair}") + + val1: IntOrChar = IntVariant(123) + val2: IntOrChar = CharVariant(ord("A")) + + print(f"IntOrChar 1: {val1}") + print(f"IntOrChar 2: {val2}") + + if isinstance(val1, IntVariant): + print(f"val1 is an IntVariant with value: {val1.value}") + if isinstance(val2, CharVariant): + print( + f"val2 is a CharVariant with u8 value: {val2.value} (char: '{chr(val2.value)}')" + ) + + print(f"Comparing 5 and 10: {compare(5, 10)}") + print(f"Comparing 10 and 5: {compare(10, 5)}") + print(f"Comparing 7 and 7: {compare(7, 7)}") + + +if __name__ == "__main__": + main() diff --git a/testdata/pythonsimple/test2.py b/testdata/pythonsimple/test2.py new file mode 100644 index 0000000..38198c5 --- /dev/null +++ b/testdata/pythonsimple/test2.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + + +@dataclass +class IntPair: + a: int + b: int + + +def main() -> None: + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") diff --git a/testdata/pythonsimple/test3.py b/testdata/pythonsimple/test3.py new file mode 100644 index 0000000..508b06d --- /dev/null +++ b/testdata/pythonsimple/test3.py @@ -0,0 +1,18 @@ +class IntVariant: + def __init__(self, value: int): + self.value: int = value + + def __repr__(self) -> str: + return f"IntVariant({self.value})" + + +class CharVariant: + def __init__(self, value: int): + if not (0 <= value <= 255): + raise ValueError( + "CharVariant value must be an integer between 0 and 255 (u8 equivalent)" + ) + self.value: int = value + + def __repr__(self) -> str: + return f"CharVariant(value={self.value}, char='{chr(self.value)}')" diff --git a/testdata/pythonsingle/main.py b/testdata/pythonsingle/main.py new file mode 100644 index 0000000..6600f35 --- /dev/null +++ b/testdata/pythonsingle/main.py @@ -0,0 +1,85 @@ +from dataclasses import dataclass +from typing import Union + + +@dataclass +class IntPair: + a: int + b: int + + +def swap_pair(pair: IntPair) -> None: + """ + Swaps the values of a and b in an IntPair. + Note: The original Rust code had a logical error if a swap was intended; + it would result in both pair.a and pair.b being set to the original value of pair.a. + This Python version implements a correct swap. + """ + pair.a, pair.b = pair.b, pair.a + + +class IntVariant: + def __init__(self, value: int): + self.value: int = value + + def __repr__(self) -> str: + return f"IntVariant({self.value})" + + +class CharVariant: + def __init__(self, value: int): + if not (0 <= value <= 255): + raise ValueError( + "CharVariant value must be an integer between 0 and 255 (u8 equivalent)" + ) + self.value: int = value + + def __repr__(self) -> str: + return f"CharVariant(value={self.value}, char='{chr(self.value)}')" + + +IntOrChar = Union[IntVariant, CharVariant] + + +def add(a: int, b: int) -> int: + return a + b + + +def compare(a: int, b: int) -> int: + if a < b: + return -1 + elif a > b: + return 1 + else: + return 0 + + +def main() -> None: + x = add(2, 3) + print(x) + + my_pair = IntPair(a=10, b=20) + print(f"Original pair: {my_pair}") + swap_pair(my_pair) + print(f"Swapped pair: {my_pair}") + + val1: IntOrChar = IntVariant(123) + val2: IntOrChar = CharVariant(ord("A")) + + print(f"IntOrChar 1: {val1}") + print(f"IntOrChar 2: {val2}") + + if isinstance(val1, IntVariant): + print(f"val1 is an IntVariant with value: {val1.value}") + if isinstance(val2, CharVariant): + print( + f"val2 is a CharVariant with u8 value: {val2.value} (char: '{chr(val2.value)}')" + ) + + print(f"Comparing 5 and 10: {compare(5, 10)}") + print(f"Comparing 10 and 5: {compare(10, 5)}") + print(f"Comparing 7 and 7: {compare(7, 7)}") + + +if __name__ == "__main__": + main() From 9ecefb5e8e5b43e4c085f781191f1754db9d1e67 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Wed, 14 May 2025 16:08:21 +0800 Subject: [PATCH 09/32] XXX: temporarily disable unused implementation check --- lang/lsp/client.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lang/lsp/client.go b/lang/lsp/client.go index 58479fe..4940f66 100644 --- a/lang/lsp/client.go +++ b/lang/lsp/client.go @@ -156,10 +156,10 @@ func initLSPClient(ctx context.Context, svr io.ReadWriteCloser, dir DocumentURI, return nil, fmt.Errorf("server did not provide TypeDefinition") } - implementationProvider, ok := vs["implementationProvider"].(bool) - if !ok || !implementationProvider { - return nil, fmt.Errorf("server did not provide Implementation") - } + // implementationProvider, ok := vs["implementationProvider"].(bool) + // if !ok || !implementationProvider { + // return nil, fmt.Errorf("server did not provide Implementation") + // } documentSymbolProvider, ok := vs["documentSymbolProvider"].(bool) if !ok || !documentSymbolProvider { return nil, fmt.Errorf("server did not provide DocumentSymbol") From a541e5a04c3861ed3f34a352a161d1352fc81f7a Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 14:12:49 +0800 Subject: [PATCH 10/32] refactor: go fmt --- lang/log/logger.go | 6 +++--- lang/parse.go | 2 +- lang/rust/utils/lsp_test.go | 6 +++--- lang/uniast/ast.go | 2 +- testdata/golang/pkg/entity/entity.go | 6 +++--- testdata/golang/pkg/refer.go | 6 +++--- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lang/log/logger.go b/lang/log/logger.go index d5aa598..f69021c 100644 --- a/lang/log/logger.go +++ b/lang/log/logger.go @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/lang/parse.go b/lang/parse.go index a40044f..446ac5a 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -27,10 +27,10 @@ import ( "github.com/cloudwego/abcoder/lang/collect" "github.com/cloudwego/abcoder/lang/cxx" - "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/golang/parser" "github.com/cloudwego/abcoder/lang/log" "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/rust" "github.com/cloudwego/abcoder/lang/uniast" ) diff --git a/lang/rust/utils/lsp_test.go b/lang/rust/utils/lsp_test.go index e019b46..d19b6ae 100644 --- a/lang/rust/utils/lsp_test.go +++ b/lang/rust/utils/lsp_test.go @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index 38ce58a..844897b 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -29,7 +29,7 @@ const ( Golang Language = "go" Rust Language = "rust" Cxx Language = "cxx" - Python Language = "python" + Python Language = "python" Unknown Language = "" ) diff --git a/testdata/golang/pkg/entity/entity.go b/testdata/golang/pkg/entity/entity.go index 2398349..b0410c3 100644 --- a/testdata/golang/pkg/entity/entity.go +++ b/testdata/golang/pkg/entity/entity.go @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/testdata/golang/pkg/refer.go b/testdata/golang/pkg/refer.go index 75ade87..badfd41 100644 --- a/testdata/golang/pkg/refer.go +++ b/testdata/golang/pkg/refer.go @@ -1,11 +1,11 @@ // Copyright 2025 CloudWeGo Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. From 3e88127bf7c47487d02bf1e3b5c4be0f42b1dbd5 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 14:13:44 +0800 Subject: [PATCH 11/32] feat: initial support for python parse Parses and generates json. Tons of ad hoc decisions. --- lang/collect/collect.go | 4 +- lang/lsp/lsp.go | 2 +- lang/python/lib.go | 2 +- lang/python/spec.go | 211 +++++++++++++++++++++++++++++++++++++--- lang/rust/repo.go | 2 +- 5 files changed, 205 insertions(+), 16 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index dbbc12f..6602bde 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -23,9 +23,9 @@ import ( "unicode" "github.com/cloudwego/abcoder/lang/cxx" - "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/log" . "github.com/cloudwego/abcoder/lang/lsp" + "github.com/cloudwego/abcoder/lang/python" "github.com/cloudwego/abcoder/lang/rust" "github.com/cloudwego/abcoder/lang/uniast" ) @@ -170,6 +170,8 @@ func (c *Collector) Collect(ctx context.Context) error { // only language entity symbols need to be collect on next if c.spec.IsEntitySymbol(*sym) { syms = append(syms, sym) + } else { + fmt.Printf("skip %s at %+v with %+v\n", sym.Name, sym.Location, sym.Kind) } c.processSymbol(ctx, sym, 1) } diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go index d4a85b7..0cf9cdf 100644 --- a/lang/lsp/lsp.go +++ b/lang/lsp/lsp.go @@ -355,7 +355,7 @@ func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, } var resp SemanticTokens - if err := cli.getSemanticTokensRange(ctx, req, &resp, cli.Language == uniast.Cxx); err != nil { + if err := cli.getSemanticTokensRange(ctx, req, &resp, cli.Language == uniast.Cxx || cli.Language == uniast.Python); err != nil { return nil, err } diff --git a/lang/python/lib.go b/lang/python/lib.go index 2639787..f504045 100644 --- a/lang/python/lib.go +++ b/lang/python/lib.go @@ -24,7 +24,7 @@ import ( const MaxWaitDuration = 5 * time.Minute func GetDefaultLSP() (lang uniast.Language, name string) { - // needs to use the pylsp from https://github.com/python-lsp/python-lsp-server/pull/533 + // needs to use the custom pylsp (see commit message) return uniast.Python, "pylsp" } diff --git a/lang/python/spec.go b/lang/python/spec.go index a9232c6..28c2b38 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -15,11 +15,18 @@ package python import ( + "fmt" + "os" + "path/filepath" + "strings" + lsp "github.com/cloudwego/abcoder/lang/lsp" ) type PythonSpec struct { - repo string + repo string + topModuleName string + topModulePath string } func NewPythonSpec() *PythonSpec { @@ -27,23 +34,109 @@ func NewPythonSpec() *PythonSpec { } func (c *PythonSpec) WorkSpace(root string) (map[string]string, error) { - panic("TODO") + // In python, pyspeak:modules are included by pyspeak:packages. + // This is the opposite of ours. + c.repo = root + rets := map[string]string{} + absPath, err := filepath.Abs(root) + if err != nil { + return nil, err + } + + num_projfiles := 0 + scanner := func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + base := filepath.Base(path) + if base == "pyproject.toml" { + num_projfiles++ + if num_projfiles > 1 { + panic("multiple pyproject.toml files found") + } + // it's hard to infer the name or package from pyproject.toml + } + return nil + } + if err := filepath.Walk(root, scanner); err != nil { + return nil, err + } + + // XXX ad-hoc way + if strings.Contains(c.repo, "astropy") { + panic("TODO") + } else { + c.topModulePath = absPath + c.topModuleName = "current" + rets[c.topModuleName] = c.topModulePath + } + return rets, nil } +// returns: modName, pkgPath, error func (c *PythonSpec) NameSpace(path string) (string, string, error) { - panic("TODO") + if strings.HasPrefix(path, c.topModulePath) { + // internal module + modName := c.topModuleName + relPath, err := filepath.Rel(c.topModulePath, path) + if err != nil { + return "", "", err + } + // todo: handle __init__.py + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + return modName, pkgPath, nil + } + + if strings.HasSuffix(path, "stdlib/3/builtins.pyi") { + // builtin module + return "builtins", "builtins", nil + } + + // XXX: hardcoded python version + condaPrefix := "/home/zhenyang/anaconda3/envs/abcoder/lib/python3.11" + if strings.HasPrefix(path, condaPrefix) { + modName := "builtins" + relPath, err := filepath.Rel(condaPrefix, path) + if err != nil { + return "", "", err + } + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + return modName, pkgPath, nil + } + + panic(fmt.Sprintf("Namespace %s", path)) } func (c *PythonSpec) ShouldSkip(path string) bool { - panic("TODO") + if !strings.HasSuffix(path, ".py") { + return true + } + return false +} + +func (c *PythonSpec) IsDocToken(tok lsp.Token) bool { + return tok.Type == "comment" } func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { - panic("TODO") + for i, t := range sym.Tokens { + if c.IsDocToken(t) { + continue + } + for _, m := range t.Modifiers { + if m == "declaration" { + return i + } + } + } + return -1 } func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { - panic("TODO") + typ := tok.Type + return typ == "function" || typ == "parameter" || typ == "variable" || typ == "property" } func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { @@ -51,31 +144,125 @@ func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { } func (c *PythonSpec) TokenKind(tok lsp.Token) lsp.SymbolKind { - panic("TODO") + switch tok.Type { + case "namespace": + return lsp.SKNamespace + case "type": + return lsp.SKObject // no direct match; mapped to Object conservatively + case "class": + return lsp.SKClass + case "enum": + return lsp.SKEnum + case "interface": + return lsp.SKInterface + case "struct": + return lsp.SKStruct + case "typeParameter": + return lsp.SKTypeParameter + case "parameter": + return lsp.SKVariable + case "variable": + return lsp.SKVariable + case "property": + return lsp.SKProperty + case "enumMember": + return lsp.SKEnumMember + case "event": + return lsp.SKEvent + case "function": + return lsp.SKFunction + case "method": + return lsp.SKMethod + case "macro": + return lsp.SKFunction + case "string": + return lsp.SKString + case "number": + return lsp.SKNumber + case "operator": + return lsp.SKOperator + default: + return lsp.SKUnknown + } } func (c *PythonSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { - panic("TODO") + return sym.Kind == lsp.SKFunction && sym.Name == "main" } func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { - panic("TODO") + typ := sym.Kind + return typ == lsp.SKObject || typ == lsp.SKMethod || typ == lsp.SKFunction || typ == lsp.SKVariable || typ == lsp.SKStruct || typ == lsp.SKEnum || typ == lsp.SKTypeParameter || typ == lsp.SKInterface || typ == lsp.SKConstant } func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { - panic("TODO") + if strings.HasPrefix(sym.Name, "_") { + return false + } + return true } func (c *PythonSpec) HasImplSymbol() bool { - panic("TODO") + // Python does not have direct impl symbols + return false } func (c *PythonSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { panic("TODO") } +// returns: receiver, typeParams, inputParams, outputParams func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { - panic("TODO") + // no receiver. no type params in python + // reference: https://docs.python.org/3/reference/grammar.html + receiver := -1 + typeParams := []int{} + + // state 0: goto state 1 when we see a def + // state 1: goto state 2 when we see a ( + // state 2: we're in the param list. + // collect input params by checking entity tokens. + // goto state 3 when we see a ) + // state 3: collect output params. + // finish when we see a : + state := 0 + paren_depth := 0 + inputParams := []int{} + outputParams := []int{} + for i, t := range sym.Tokens { + if state == -1 { + break + } + switch state { + case 0: + if t.Text == "def" { + state = 1 + } + case 1: + if t.Text == "(" { + state = 2 + paren_depth = 1 + } + case 2: + if t.Text == ")" { + paren_depth -= 1 + if paren_depth == 0 { + state = 3 + } + } else if c.IsEntityToken(t) { + inputParams = append(inputParams, i) + } + case 3: + // no-op + if t.Text == ":" { + state = -1 + } else if c.IsEntityToken(t) { + outputParams = append(outputParams, i) + } + } + } + + return receiver, typeParams, inputParams, outputParams } func (c *PythonSpec) GetUnloadedSymbol(from lsp.Token, define lsp.Location) (string, error) { diff --git a/lang/rust/repo.go b/lang/rust/repo.go index 0bf2c04..ba81142 100644 --- a/lang/rust/repo.go +++ b/lang/rust/repo.go @@ -27,7 +27,7 @@ import ( "github.com/cloudwego/abcoder/lang/utils" ) -const MaxWaitDuration = 5 * time.Minute +const MaxWaitDuration = 5 * time.Second func CheckRepo(repo string) (string, time.Duration) { // NOTICE: open the Cargo.toml file is required for Rust projects From 7a7cd45fad57edb5de92320c523f072bf331d92e Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 19:06:56 +0800 Subject: [PATCH 12/32] fix: entity types. fix calls/types for funcs --- lang/python/lib.go | 2 +- lang/python/spec.go | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lang/python/lib.go b/lang/python/lib.go index f504045..1b44135 100644 --- a/lang/python/lib.go +++ b/lang/python/lib.go @@ -21,7 +21,7 @@ import ( "github.com/cloudwego/abcoder/lang/utils" ) -const MaxWaitDuration = 5 * time.Minute +const MaxWaitDuration = 5 * time.Second func GetDefaultLSP() (lang uniast.Language, name string) { // needs to use the custom pylsp (see commit message) diff --git a/lang/python/spec.go b/lang/python/spec.go index 28c2b38..c95528b 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -136,7 +136,7 @@ func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { typ := tok.Type - return typ == "function" || typ == "parameter" || typ == "variable" || typ == "property" + return typ == "function" || typ == "parameter" || typ == "variable" || typ == "property" || typ == "class" || typ == "type" } func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { @@ -192,7 +192,8 @@ func (c *PythonSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { typ := sym.Kind - return typ == lsp.SKObject || typ == lsp.SKMethod || typ == lsp.SKFunction || typ == lsp.SKVariable || typ == lsp.SKStruct || typ == lsp.SKEnum || typ == lsp.SKTypeParameter || typ == lsp.SKInterface || typ == lsp.SKConstant + return typ == lsp.SKObject || typ == lsp.SKMethod || typ == lsp.SKFunction || typ == lsp.SKVariable || + typ == lsp.SKStruct || typ == lsp.SKEnum || typ == lsp.SKTypeParameter || typ == lsp.SKConstant || typ == lsp.SKClass } func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { From 5c371adb3319ddbbbcdede5a41b598c4206856ae Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 20:17:27 +0800 Subject: [PATCH 13/32] fix: manualy parse to get correct Params Python LSP does not print `def` or `(` as semtokens, but we need their position to determine which semantic tokens are in parameters or return values. So add a manual parsing based on `sym.Text`. --- lang/collect/collect.go | 9 +++-- lang/python/spec.go | 74 ++++++++++++++++++++++++++++++++--------- 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 6602bde..b18a8a4 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -621,9 +621,12 @@ func (c *Collector) updateFunctionInfo(sym *DocumentSymbol, tsyms, ipsyms, opsym } } else { f = functionInfo{ - TypeParams: tsyms, - Inputs: ipsyms, - Outputs: opsyms, + TypeParams: tsyms, + Inputs: ipsyms, + Outputs: opsyms, + InputsSorted: is, + OutputsSorted: os, + TypeParamsSorted: ts, } if rsym != nil { if f.Method == nil { diff --git a/lang/python/spec.go b/lang/python/spec.go index c95528b..561c903 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -136,7 +136,7 @@ func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { typ := tok.Type - return typ == "function" || typ == "parameter" || typ == "variable" || typ == "property" || typ == "class" || typ == "type" + return typ == "function" || typ == "variable" || typ == "property" || typ == "class" || typ == "type" } func (c *PythonSpec) IsStdToken(tok lsp.Token) bool { @@ -217,8 +217,11 @@ func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, // no receiver. no type params in python // reference: https://docs.python.org/3/reference/grammar.html receiver := -1 + // python actually has these but TODO typeParams := []int{} + // Hell, manually parse function text to get locations of key tokens since LSP does not support this... + // // state 0: goto state 1 when we see a def // state 1: goto state 2 when we see a ( // state 2: we're in the param list. @@ -228,36 +231,75 @@ func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, // finish when we see a : state := 0 paren_depth := 0 - inputParams := []int{} - outputParams := []int{} - for i, t := range sym.Tokens { - if state == -1 { - break - } + invalidpos := lsp.Position{ + Line: -1, + Character: -1, + } + // defpos := invalidpos + lparenpos := invalidpos + rparenpos := invalidpos + bodypos := invalidpos + curpos := sym.Location.Range.Start + for i := range len(sym.Text) { switch state { case 0: - if t.Text == "def" { + if i+4 >= len(sym.Text) { + // function text does not contain a def + // should be an import + return -1, []int{}, []int{}, []int{} + } + next4chars := sym.Text[i : i+4] + // heuristics should work with reasonable python code + if next4chars == "def " { + // defpos = curpos state = 1 } case 1: - if t.Text == "(" { - state = 2 + if sym.Text[i] == '(' { + lparenpos = curpos paren_depth = 1 + state = 2 } case 2: - if t.Text == ")" { + if sym.Text[i] == ')' { + rparenpos = curpos paren_depth -= 1 if paren_depth == 0 { state = 3 } - } else if c.IsEntityToken(t) { - inputParams = append(inputParams, i) } case 3: - // no-op - if t.Text == ":" { + if sym.Text[i] == ':' { + bodypos = curpos state = -1 - } else if c.IsEntityToken(t) { + } + } + if sym.Text[i] == '\n' { + curpos.Line++ + curpos.Character = 0 + } else { + curpos.Character++ + } + } + + paramsrange := lsp.Range{ + Start: lparenpos, + End: rparenpos, + } + returnrange := lsp.Range{ + Start: rparenpos, + End: bodypos, + } + inputParams := []int{} + outputParams := []int{} + for i, t := range sym.Tokens { + if paramsrange.Include(t.Location.Range) { + if c.IsEntityToken(t) { + inputParams = append(inputParams, i) + } + } + if returnrange.Include(t.Location.Range) { + if c.IsEntityToken(t) { outputParams = append(outputParams, i) } } From 2c3f3176db42d4d4a93d91dbd9adda9ee5ed0fd9 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 20:52:46 +0800 Subject: [PATCH 14/32] fix: uniast.file.Types now exclude imported types (and funcs too) --- lang/collect/collect.go | 4 ++++ lang/python/spec.go | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index b18a8a4..9dd5b10 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -147,6 +147,10 @@ func (c *Collector) Collect(ctx context.Context) error { if err != nil { return err } + // HACK: skip imported symbols + if c.Language == uniast.Python && (strings.HasPrefix(content, "from ") || strings.HasPrefix(content, "import ")) { + continue + } // collect tokens tokens, err := c.cli.SemanticTokens(ctx, sym.Location) if err != nil { diff --git a/lang/python/spec.go b/lang/python/spec.go index 561c903..e1ff33d 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -136,6 +136,10 @@ func (c *PythonSpec) DeclareTokenOfSymbol(sym lsp.DocumentSymbol) int { func (c *PythonSpec) IsEntityToken(tok lsp.Token) bool { typ := tok.Type + if strings.HasPrefix(tok.Text, "from ") || strings.HasPrefix(tok.Text, "import ") { + // Python LSP highlights imported symbols as function/types + return false + } return typ == "function" || typ == "variable" || typ == "property" || typ == "class" || typ == "type" } @@ -192,6 +196,10 @@ func (c *PythonSpec) IsMainFunction(sym lsp.DocumentSymbol) bool { func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { typ := sym.Kind + if strings.HasPrefix(sym.Text, "from ") || strings.HasPrefix(sym.Text, "import ") { + // Python LSP highlights imported symbols as function/types + return false + } return typ == lsp.SKObject || typ == lsp.SKMethod || typ == lsp.SKFunction || typ == lsp.SKVariable || typ == lsp.SKStruct || typ == lsp.SKEnum || typ == lsp.SKTypeParameter || typ == lsp.SKConstant || typ == lsp.SKClass } From ab3fab49871a6bf9eb3ec8b2daec424ddf1fcfd7 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 21:03:32 +0800 Subject: [PATCH 15/32] feat: test for python parser method dependency --- testdata/pythonsimple/test.py | 6 ++++++ testdata/pythonsimple/test2.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/testdata/pythonsimple/test.py b/testdata/pythonsimple/test.py index 36a9022..42660b6 100644 --- a/testdata/pythonsimple/test.py +++ b/testdata/pythonsimple/test.py @@ -30,9 +30,14 @@ def compare(a: int, b: int) -> int: IntOrChar = Union[IntVariant, CharVariant] +# TODO: global var not suppported +globalvar = 5 def main() -> None: + global globalvar + globalvar = 65 + ls = list((1, 2)) x = add(2, 3) @@ -42,6 +47,7 @@ def main() -> None: print(f"Original pair: {my_pair}") swap_pair(my_pair) print(f"Swapped pair: {my_pair}") + print(f"my_pair.sum = {my_pair.sum()}") val1: IntOrChar = IntVariant(123) val2: IntOrChar = CharVariant(ord("A")) diff --git a/testdata/pythonsimple/test2.py b/testdata/pythonsimple/test2.py index 38198c5..01370e1 100644 --- a/testdata/pythonsimple/test2.py +++ b/testdata/pythonsimple/test2.py @@ -6,6 +6,9 @@ class IntPair: a: int b: int + def sum(self): + return self.a + self.b + def main() -> None: my_pair = IntPair(a=10, b=20) From 2a52a5e648d7956b2179205da5904a421e568de8 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 21:53:59 +0800 Subject: [PATCH 16/32] feat: more tests for python parser --- testdata/pyimport/main.py | 10 ++++++++++ testdata/pysimpleobj/main.py | 12 ++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 testdata/pyimport/main.py create mode 100644 testdata/pysimpleobj/main.py diff --git a/testdata/pyimport/main.py b/testdata/pyimport/main.py new file mode 100644 index 0000000..c59e5ff --- /dev/null +++ b/testdata/pyimport/main.py @@ -0,0 +1,10 @@ +from typing import Union + +# similar to rust's `pub use`. + + +def main(): + pass + + +# main.py::types should not include Union diff --git a/testdata/pysimpleobj/main.py b/testdata/pysimpleobj/main.py new file mode 100644 index 0000000..6f5cb4d --- /dev/null +++ b/testdata/pysimpleobj/main.py @@ -0,0 +1,12 @@ +class Foo: + def __init__(self): + self.x = 5 + + def bar(self, v: int) -> int: + self.x += v + return self.x + + +def main(): + f = Foo() + f.bar(6) From bcc7c36c62be5c83202c0f17bf294ca121ee5366 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 21:56:38 +0800 Subject: [PATCH 17/32] fix: use most specific sym if multiple are possible in filterEntitySymbols For example, a class `Foo` has a method `bar`. When trying to infer the symbol from a location within `bar`, we follow getSymbolByToken -> getSymbolByLocation -> filterEntitySymbols. The `getSymbolByLocation` presents two candidates `Foo` and `bar`. We should accept the `bar` because it is most specific. Existing Rust implementation avoids the problem because `Foo` will be an impl symbol, which is not an entity symbol. However in Python, `Foo` is a class and thus has to be an entity symbol. --- lang/collect/collect.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 9dd5b10..1562290 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -315,12 +315,23 @@ func (c *Collector) getSymbolByTokenWithLimit(ctx context.Context, tok Token, de } func (c *Collector) filterEntitySymbols(syms []*DocumentSymbol) *DocumentSymbol { + // Choose the most specific symbol + var mostSpecific *DocumentSymbol + mostSpecific = nil for _, sym := range syms { - if c.spec.IsEntitySymbol(*sym) { - return sym + if !c.spec.IsEntitySymbol(*sym) { + continue + } + if mostSpecific == nil || mostSpecific.Location.Include(sym.Location) { + // replace most specific + mostSpecific = sym + } else if sym.Location.Include(mostSpecific.Location) { + // retain most specific + } else { + log.Error("multiple symbols %s and %s not include each other", mostSpecific, sym) } } - return nil + return mostSpecific } // return a language entity symbol From 4647d89ee49eada0eddfa5d0de309b511612955c Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 22:18:35 +0800 Subject: [PATCH 18/32] fix: typo --- lang/rust/spec.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lang/rust/spec.go b/lang/rust/spec.go index f68223f..8dc213f 100644 --- a/lang/rust/spec.go +++ b/lang/rust/spec.go @@ -177,7 +177,7 @@ func hasKeyword(tokens []lsp.Token, keyword string) int { return -1 } -func findSpecifiToken(tokens []lsp.Token, typ string, text string) int { +func findSpecificToken(tokens []lsp.Token, typ string, text string) int { for i := 0; i < len(tokens); i++ { if tokens[i].Type == typ && tokens[i].Text == text { return i @@ -217,7 +217,7 @@ func (c *RustSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { // find the impl type token var implType, receiverType = -1, -1 - var fn = start + findSpecifiToken(tokens[start:], "keyword", "fn") + var fn = start + findSpecificToken(tokens[start:], "keyword", "fn") var forToken = findSpecifiTokenUntil(tokens, "keyword", "for", start, fn) for i := start; i < forToken; i++ { @@ -253,11 +253,11 @@ func (c *RustSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, [] } // exclude #[xxx] - fn := start + findSpecifiToken(tokens[start:], "keyword", "fn") + fn := start + findSpecificToken(tokens[start:], "keyword", "fn") if fn < 0 { return -1, nil, nil, nil } - where := start + findSpecifiToken(tokens[start:], "keyword", "where") + where := start + findSpecificToken(tokens[start:], "keyword", "where") if where == -1 { where = len(tokens) - 1 } From ca8675760ece5a9a9c1604f7fd07c37b76b29018 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 22:52:01 +0800 Subject: [PATCH 19/32] feat: simple object test for rust --- testdata/rustsimpleobj/Cargo.toml | 6 ++++++ testdata/rustsimpleobj/src/main.rs | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 testdata/rustsimpleobj/Cargo.toml create mode 100644 testdata/rustsimpleobj/src/main.rs diff --git a/testdata/rustsimpleobj/Cargo.toml b/testdata/rustsimpleobj/Cargo.toml new file mode 100644 index 0000000..0ba2a47 --- /dev/null +++ b/testdata/rustsimpleobj/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "rustsimpleobj" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/testdata/rustsimpleobj/src/main.rs b/testdata/rustsimpleobj/src/main.rs new file mode 100644 index 0000000..efa2b44 --- /dev/null +++ b/testdata/rustsimpleobj/src/main.rs @@ -0,0 +1,21 @@ +struct Foo(u32); + +impl Foo { + pub fn new(value: u32) -> Self { + Foo(value) + } + + pub fn bar(&mut self, increment: u32) { + self.0 += increment; + } + + pub fn faz(&mut self, decrement: u32) { + self.0 -= decrement; + } +} + +fn main() { + let mut my_foo = Foo::new(10); + my_foo.bar(5); + my_foo.faz(5); +} From 94b3445cf1c2f952a74496da733d8398ab81a704 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Thu, 15 May 2025 22:53:01 +0800 Subject: [PATCH 20/32] feat: impl support and export __xx__ for python --- lang/collect/collect.go | 4 +- lang/python/spec.go | 82 ++++++++++++++++++++++++++++++++++------- 2 files changed, 70 insertions(+), 16 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 1562290..bdaf06a 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -174,8 +174,6 @@ func (c *Collector) Collect(ctx context.Context) error { // only language entity symbols need to be collect on next if c.spec.IsEntitySymbol(*sym) { syms = append(syms, sym) - } else { - fmt.Printf("skip %s at %+v with %+v\n", sym.Name, sym.Location, sym.Kind) } c.processSymbol(ctx, sym, 1) } @@ -548,7 +546,7 @@ func (c *Collector) collectImpl(ctx context.Context, sym *DocumentSymbol, depth impl = ChunkHead(sym.Text, sym.Location.Range.Start, sym.Tokens[fn].Location.Range.Start) } if impl == "" || len(impl) < len(sym.Name) { - impl = sym.Name + impl = fmt.Sprintf("class %s {\n", sym.Name) } // search all methods for _, method := range c.syms { diff --git a/lang/python/spec.go b/lang/python/spec.go index e1ff33d..9dc6bcf 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -205,6 +205,10 @@ func (c *PythonSpec) IsEntitySymbol(sym lsp.DocumentSymbol) bool { } func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { + // builtin methods are exported + if strings.HasPrefix(sym.Name, "__") && strings.HasSuffix(sym.Name, "__") { + return true + } if strings.HasPrefix(sym.Name, "_") { return false } @@ -212,17 +216,70 @@ func (c *PythonSpec) IsPublicSymbol(sym lsp.DocumentSymbol) bool { } func (c *PythonSpec) HasImplSymbol() bool { - // Python does not have direct impl symbols - return false + return true } +func invalidPos() lsp.Position { + return lsp.Position{ + Line: -1, + Character: -1, + } +} + +// returns interface, receiver, first method func (c *PythonSpec) ImplSymbol(sym lsp.DocumentSymbol) (int, int, int) { - panic("TODO") + // reference: https://docs.python.org/3/reference/grammar.html + if sym.Kind != lsp.SKClass { + return -1, -1, -1 + } + + implType := -1 + receiverType := -1 + firstMethod := -1 + + // state 0: goto state -1 when we see a 'class' + state := 0 + clsnamepos := invalidPos() + curpos := sym.Location.Range.Start + for i := range len(sym.Text) { + if state == -1 { + break + } + switch state { + case 0: + if i+6 >= len(sym.Text) { + // class text does not contain a 'class' + // should be an import + return -1, -1, -1 + } + next6chars := sym.Text[i : i+6] + // heuristics should work with reasonable python code + if next6chars == "class " { + clsnamepos = curpos + state = -1 + } + } + if sym.Text[i] == '\n' { + curpos.Line++ + curpos.Character = 0 + } else { + curpos.Character++ + } + } + + for i, t := range sym.Tokens { + if receiverType == -1 && clsnamepos.Less(t.Location.Range.Start) { + receiverType = i + } + } + + return implType, receiverType, firstMethod } // returns: receiver, typeParams, inputParams, outputParams func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []int) { - // no receiver. no type params in python + // FunctionSymbol do not return receivers. + // TODO type params in python (nobody uses them) // reference: https://docs.python.org/3/reference/grammar.html receiver := -1 // python actually has these but TODO @@ -239,20 +296,19 @@ func (c *PythonSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, // finish when we see a : state := 0 paren_depth := 0 - invalidpos := lsp.Position{ - Line: -1, - Character: -1, - } - // defpos := invalidpos - lparenpos := invalidpos - rparenpos := invalidpos - bodypos := invalidpos + // defpos := invalidPos() + lparenpos := invalidPos() + rparenpos := invalidPos() + bodypos := invalidPos() curpos := sym.Location.Range.Start for i := range len(sym.Text) { + if state == -1 { + break + } switch state { case 0: if i+4 >= len(sym.Text) { - // function text does not contain a def + // function text does not contain a 'def' // should be an import return -1, []int{}, []int{}, []int{} } From 537a2eda9759066ad06d2a8a781daa3fe4687061 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 16 May 2025 12:30:13 +0800 Subject: [PATCH 21/32] fix: copyright headers --- testdata/pyimport/main.py | 14 ++++++++++++++ testdata/pysimpleobj/main.py | 15 +++++++++++++++ testdata/pythonsimple/test.py | 14 ++++++++++++++ testdata/pythonsimple/test2.py | 13 +++++++++++++ testdata/pythonsimple/test3.py | 13 +++++++++++++ testdata/pythonsingle/main.py | 13 +++++++++++++ testdata/rustsimpleobj/src/main.rs | 14 ++++++++++++++ 7 files changed, 96 insertions(+) diff --git a/testdata/pyimport/main.py b/testdata/pyimport/main.py index c59e5ff..a4b8125 100644 --- a/testdata/pyimport/main.py +++ b/testdata/pyimport/main.py @@ -1,3 +1,17 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Union # similar to rust's `pub use`. diff --git a/testdata/pysimpleobj/main.py b/testdata/pysimpleobj/main.py index 6f5cb4d..5c672ce 100644 --- a/testdata/pysimpleobj/main.py +++ b/testdata/pysimpleobj/main.py @@ -1,3 +1,18 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + class Foo: def __init__(self): self.x = 5 diff --git a/testdata/pythonsimple/test.py b/testdata/pythonsimple/test.py index 42660b6..7be5114 100644 --- a/testdata/pythonsimple/test.py +++ b/testdata/pythonsimple/test.py @@ -1,3 +1,17 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Union from test2 import IntPair from test3 import * diff --git a/testdata/pythonsimple/test2.py b/testdata/pythonsimple/test2.py index 01370e1..7e5f2c2 100644 --- a/testdata/pythonsimple/test2.py +++ b/testdata/pythonsimple/test2.py @@ -1,3 +1,16 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from dataclasses import dataclass diff --git a/testdata/pythonsimple/test3.py b/testdata/pythonsimple/test3.py index 508b06d..5812e0d 100644 --- a/testdata/pythonsimple/test3.py +++ b/testdata/pythonsimple/test3.py @@ -1,3 +1,16 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + class IntVariant: def __init__(self, value: int): self.value: int = value diff --git a/testdata/pythonsingle/main.py b/testdata/pythonsingle/main.py index 6600f35..ea3a402 100644 --- a/testdata/pythonsingle/main.py +++ b/testdata/pythonsingle/main.py @@ -1,3 +1,16 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from dataclasses import dataclass from typing import Union diff --git a/testdata/rustsimpleobj/src/main.rs b/testdata/rustsimpleobj/src/main.rs index efa2b44..2efed47 100644 --- a/testdata/rustsimpleobj/src/main.rs +++ b/testdata/rustsimpleobj/src/main.rs @@ -1,3 +1,17 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + struct Foo(u32); impl Foo { From 15c667ddf8b48938265a1fc0bd0b4d6362e8fc0c Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 16 May 2025 13:05:42 +0800 Subject: [PATCH 22/32] feat: progress tracking for collect --- lang/collect/collect.go | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index bdaf06a..ab23735 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -117,8 +117,8 @@ func (c *Collector) Collect(ctx context.Context) error { } // scan all files - roots := make([]*DocumentSymbol, 0, 1024) - scanner := func(path string, info os.FileInfo, err error) error { + collect_paths := make([]string, 0, 1024) + if err := filepath.Walk(c.repo, func(path string, info os.FileInfo, err error) error { if err != nil { return err } @@ -133,15 +133,26 @@ func (c *Collector) Collect(ctx context.Context) error { if c.spec.ShouldSkip(path) { return nil } - + collect_paths = append(collect_paths, path) + return nil + }); err != nil { + return err + } + // collect root symbols + roots := make([]*DocumentSymbol, 0, 1024) + for i, path := range collect_paths { // collect symbols uri := NewURI(path) symbols, err := c.cli.DocumentSymbols(ctx, uri) if err != nil { return err } + log.Info("collecting %d/%d files %s, has %d symbols\n", i, len(collect_paths), path, len(symbols)) // file := filepath.Base(path) + n_sym := 0 for _, sym := range symbols { + log.Debug(" symbol %d/%d %s\n", n_sym, len(symbols), sym.Name) + n_sym++ // collect content content, err := c.cli.Locate(sym.Location) if err != nil { @@ -161,11 +172,6 @@ func (c *Collector) Collect(ctx context.Context) error { c.syms[sym.Location] = sym roots = append(roots, sym) } - - return nil - } - if err := filepath.Walk(c.repo, scanner); err != nil { - return err } // collect some extra metadata From 709eb4818badc5a9889a664cefff0a1669aa8472 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 16 May 2025 16:20:33 +0800 Subject: [PATCH 23/32] feat: cache semantic tokens with `-cache` --- lang/collect/collect.go | 9 ++++++--- lang/collect/export.go | 2 +- lang/lsp/client.go | 8 +++++++- lang/lsp/lsp.go | 30 ++++++++++++++++++++++-------- lang/parse.go | 7 ++++--- main.go | 1 + 6 files changed, 41 insertions(+), 16 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index ab23735..2d1df16 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -37,6 +37,7 @@ type CollectOption struct { NoNeedComment bool NeedTest bool Excludes []string + CacheResults bool } type Collector struct { @@ -138,10 +139,10 @@ func (c *Collector) Collect(ctx context.Context) error { }); err != nil { return err } - // collect root symbols + + // collect symbols roots := make([]*DocumentSymbol, 0, 1024) for i, path := range collect_paths { - // collect symbols uri := NewURI(path) symbols, err := c.cli.DocumentSymbols(ctx, uri) if err != nil { @@ -151,7 +152,7 @@ func (c *Collector) Collect(ctx context.Context) error { // file := filepath.Base(path) n_sym := 0 for _, sym := range symbols { - log.Debug(" symbol %d/%d %s\n", n_sym, len(symbols), sym.Name) + log.Debug(" collecting symbol %d/%d %s\n", n_sym, len(symbols), sym.Name) n_sym++ // collect content content, err := c.cli.Locate(sym.Location) @@ -173,6 +174,7 @@ func (c *Collector) Collect(ctx context.Context) error { roots = append(roots, sym) } } + log.Info("collected symbols.") // collect some extra metadata syms := make([]*DocumentSymbol, 0, len(roots)) @@ -284,6 +286,7 @@ func (c *Collector) Collect(ctx context.Context) error { c.syms[dep.Location] = dep } + log.Debug(" Collect: dep %s -> %s (file: %s -> %s)\n", sym.Name, dep.Name, sym.Location, token.Location) c.deps[sym] = append(c.deps[sym], dependency{ Location: token.Location, Symbol: dep, diff --git a/lang/collect/export.go b/lang/collect/export.go index b35bd27..abe24f5 100644 --- a/lang/collect/export.go +++ b/lang/collect/export.go @@ -136,7 +136,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol id := uniast.NewIdentity(mod, path, name) visited[symbol] = &id - // Load eternal symbol on demands + // Load external symbol on demands if !c.LoadExternalSymbol && (!c.internal(symbol.Location) || symbol.Kind == SKUnknown) { return &id, nil } diff --git a/lang/lsp/client.go b/lang/lsp/client.go index 4940f66..1ff0acb 100644 --- a/lang/lsp/client.go +++ b/lang/lsp/client.go @@ -35,13 +35,16 @@ type LSPClient struct { tokenTypes []string tokenModifiers []string files map[DocumentURI]*TextDocumentItem + // TODO: now only cache semantic tokens + cachedResults map[string]SemanticTokens ClientOptions } type ClientOptions struct { Server string uniast.Language - Verbose bool + Verbose bool + CacheResults bool } func NewLSPClient(repo string, openfile string, wait time.Duration, opts ClientOptions) (*LSPClient, error) { @@ -58,6 +61,9 @@ func NewLSPClient(repo string, openfile string, wait time.Duration, opts ClientO cli.ClientOptions = opts cli.files = make(map[DocumentURI]*TextDocumentItem) + if opts.CacheResults { + cli.cachedResults = make(map[string]SemanticTokens) + } if openfile != "" { _, err := cli.DidOpen(context.Background(), NewURI(openfile)) diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go index 0cf9cdf..71a2214 100644 --- a/lang/lsp/lsp.go +++ b/lang/lsp/lsp.go @@ -288,12 +288,29 @@ func (cli *LSPClient) References(ctx context.Context, id Location) ([]Location, // TODO(perf): cache results especially for whole file queries. // TODO(refactor): infer use_full_method from capabilities func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens, use_full_method bool) error { + // Note: resp should be `mutable SemanticTokens * const resp` if use_full_method { - req1 := struct { - TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` - }{TextDocument: req.TextDocument} - if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { - return err + if cli.cachedResults == nil { + // no caching + req1 := struct { + TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` + }{TextDocument: req.TextDocument} + if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { + return err + } + } else { + cacheRes, ok := cli.cachedResults[string(req.TextDocument.URI)] + if ok { + *resp = cacheRes + } else { + req1 := struct { + TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` + }{TextDocument: req.TextDocument} + if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { + return err + } + cli.cachedResults[string(req.TextDocument.URI)] = *resp + } } filterSemanticTokensInRange(resp, req.Range) } else { @@ -310,7 +327,6 @@ func filterSemanticTokensInRange(resp *SemanticTokens, r Range) { Character: 0, } newData := []uint32{} - includedIs := []int{} for i := 0; i < len(resp.Data); i += 5 { deltaLine := int(resp.Data[i]) deltaStart := int(resp.Data[i+1]) @@ -329,7 +345,6 @@ func filterSemanticTokensInRange(resp *SemanticTokens, r Range) { } else { newData = append(newData, resp.Data[i:i+5]...) } - includedIs = append(includedIs, i) } } resp.Data = newData @@ -356,7 +371,6 @@ func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, var resp SemanticTokens if err := cli.getSemanticTokensRange(ctx, req, &resp, cli.Language == uniast.Cxx || cli.Language == uniast.Python); err != nil { - return nil, err } diff --git a/lang/parse.go b/lang/parse.go index 446ac5a..e1e5787 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -63,9 +63,10 @@ func Parse(ctx context.Context, uri string, args ParseOptions) ([]byte, error) { log.Info("start initialize LSP server %s...\n", lspPath) var err error client, err = lsp.NewLSPClient(uri, openfile, opentime, lsp.ClientOptions{ - Server: lspPath, - Language: l, - Verbose: args.Verbose, + Server: lspPath, + Language: l, + Verbose: args.Verbose, + CacheResults: args.CacheResults, }) if err != nil { log.Error("failed to initialize LSP server: %v\n", err) diff --git a/main.go b/main.go index 6ccc960..3810d8f 100644 --- a/main.go +++ b/main.go @@ -91,6 +91,7 @@ func main() { flags.BoolVar(&opts.LoadExternalSymbol, "load-external-symbol", false, "load external symbols into results") flags.BoolVar(&opts.NoNeedComment, "no-need-comment", false, "do not need comment (only works for Go now)") flags.BoolVar(&opts.NeedTest, "need-test", false, "need parse test files (only works for Go now)") + flags.BoolVar(&opts.CacheResults, "cache", false, "cache language server query results") flags.Var((*StringArray)(&opts.Excludes), "exclude", "exclude files or directories, support multiple values") flagLsp := flags.String("lsp", "", "Specify the language server path.") From a471a24006a20fbd1d62e553ebd3d7cebab3bea4 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 16 May 2025 16:43:09 +0800 Subject: [PATCH 24/32] feat: -veryverbose and -verbose for debug/info logs --- main.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index 3810d8f..dcb92ba 100644 --- a/main.go +++ b/main.go @@ -81,6 +81,7 @@ func main() { uri := os.Args[3] flagVerbose := flags.Bool("verbose", false, "Verbose mode.") + flagVeryVerbose := flags.Bool("veryverbose", false, "Very verbose mode.") flagOutput := flags.String("o", "", "Output path.") @@ -96,10 +97,14 @@ func main() { flagLsp := flags.String("lsp", "", "Specify the language server path.") flags.Parse(os.Args[4:]) - if flagVerbose != nil && *flagVerbose { + if flagVeryVerbose != nil && *flagVeryVerbose { log.SetLogLevel(log.DebugLevel) opts.Verbose = true } + if flagVerbose != nil && *flagVerbose { + log.SetLogLevel(log.InfoLevel) + opts.Verbose = true + } opts.Language = language if flagLsp != nil { From 24b21885d361f286f94de3a78d5a1d27cbef8b97 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Fri, 16 May 2025 18:02:44 +0800 Subject: [PATCH 25/32] feat: parses flask --- lang/collect/collect.go | 16 ++++++++++++++++ lang/collect/export.go | 2 +- lang/lsp/client.go | 2 +- lang/lsp/handler.go | 4 ++-- lang/python/lib.go | 2 +- lang/python/spec.go | 25 ++++++------------------- 6 files changed, 27 insertions(+), 24 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 2d1df16..0b675ac 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -108,6 +108,22 @@ func NewCollector(repo string, cli *LSPClient) *Collector { } func (c *Collector) Collect(ctx context.Context) error { + // Example code to configure the LSP client + // if c.Language == uniast.Python { + // conf := map[string]interface{}{ + // "settings": map[string]interface{}{ + // "pylsp": map[string]interface{}{ + // "plugins": map[string]interface{}{ + // "jedi_definition": map[string]interface{}{ + // "follow_builtin_definitions": false, + // }, + // }, + // }, + // }, + // } + // c.cli.Notify(ctx, "workspace/didChangeConfiguration", conf) + // } + excludes := make([]string, len(c.Excludes)) for i, e := range c.Excludes { if !filepath.IsAbs(e) { diff --git a/lang/collect/export.go b/lang/collect/export.go index abe24f5..f31f04d 100644 --- a/lang/collect/export.go +++ b/lang/collect/export.go @@ -320,7 +320,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass: obj.SubStruct = append(obj.SubStruct, uniast.NewDependency(*depid, c.fileLine(dep.Location))) default: - log.Error("dep symbol %s not collected for \n", dep.Symbol, id) + log.Error("dep symbol %s not collected for %v\n", dep.Symbol, id) } } } diff --git a/lang/lsp/client.go b/lang/lsp/client.go index 1ff0acb..bf5571d 100644 --- a/lang/lsp/client.go +++ b/lang/lsp/client.go @@ -225,7 +225,7 @@ func (rwc rwc) Close() error { // start a LSP process and return its io func startLSPSever(path string) (io.ReadWriteCloser, error) { - // Launch rust-analyzer + // Launch LSP server cmd := exec.Command(path) stdin, err := cmd.StdinPipe() diff --git a/lang/lsp/handler.go b/lang/lsp/handler.go index ab12a8a..cdc0cea 100644 --- a/lang/lsp/handler.go +++ b/lang/lsp/handler.go @@ -93,9 +93,9 @@ loop: func (h *lspHandler) Handle(ctx context.Context, conn *jsonrpc2.Conn, req *jsonrpc2.Request) { // This method will be called for both requests and notifications - log.Info("handle method: %s\n", req.Method) + log.Debug("handle method: %s\n", req.Method) if req.Params != nil { - log.Info("param: %s\n", string(*req.Params)) + log.Debug("param: %s\n", string(*req.Params)) } if req.Notif { // This is a notification diff --git a/lang/python/lib.go b/lang/python/lib.go index 1b44135..286bea4 100644 --- a/lang/python/lib.go +++ b/lang/python/lib.go @@ -21,7 +21,7 @@ import ( "github.com/cloudwego/abcoder/lang/utils" ) -const MaxWaitDuration = 5 * time.Second +const MaxWaitDuration = 2 * time.Second func GetDefaultLSP() (lang uniast.Language, name string) { // needs to use the custom pylsp (see commit message) diff --git a/lang/python/spec.go b/lang/python/spec.go index 9dc6bcf..cb1513b 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -43,33 +43,20 @@ func (c *PythonSpec) WorkSpace(root string) (map[string]string, error) { return nil, err } - num_projfiles := 0 - scanner := func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - base := filepath.Base(path) - if base == "pyproject.toml" { - num_projfiles++ - if num_projfiles > 1 { - panic("multiple pyproject.toml files found") - } - // it's hard to infer the name or package from pyproject.toml - } - return nil - } - if err := filepath.Walk(root, scanner); err != nil { - return nil, err - } + // TODO: maybe infer from pyproject.toml? + // should ignore {tests,examples}/**/pyproject.toml // XXX ad-hoc way if strings.Contains(c.repo, "astropy") { panic("TODO") + } else if strings.Contains(c.repo, "flask") { + c.topModulePath = absPath + "/src" + c.topModuleName = "flask" } else { c.topModulePath = absPath c.topModuleName = "current" - rets[c.topModuleName] = c.topModulePath } + rets[c.topModuleName] = c.topModulePath return rets, nil } From b3d6331d8c0fdc45d11c34867a3042aed2fe2d50 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Tue, 20 May 2025 20:17:24 +0800 Subject: [PATCH 26/32] feat: ignore pystd if !NeedStdSymbols --- lang/collect/collect.go | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 0b675ac..79002e1 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -109,20 +109,22 @@ func NewCollector(repo string, cli *LSPClient) *Collector { func (c *Collector) Collect(ctx context.Context) error { // Example code to configure the LSP client - // if c.Language == uniast.Python { - // conf := map[string]interface{}{ - // "settings": map[string]interface{}{ - // "pylsp": map[string]interface{}{ - // "plugins": map[string]interface{}{ - // "jedi_definition": map[string]interface{}{ - // "follow_builtin_definitions": false, - // }, - // }, - // }, - // }, - // } - // c.cli.Notify(ctx, "workspace/didChangeConfiguration", conf) - // } + if !c.NeedStdSymbol { + if c.Language == uniast.Python { + conf := map[string]interface{}{ + "settings": map[string]interface{}{ + "pylsp": map[string]interface{}{ + "plugins": map[string]interface{}{ + "jedi_definition": map[string]interface{}{ + "follow_builtin_definitions": false, + }, + }, + }, + }, + } + c.cli.Notify(ctx, "workspace/didChangeConfiguration", conf) + } + } excludes := make([]string, len(c.Excludes)) for i, e := range c.Excludes { From bb35fec1decacba67b7c02f207a687fe3c841482 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Tue, 20 May 2025 20:17:53 +0800 Subject: [PATCH 27/32] feat: arg -indent to indent output json --- lang/parse.go | 9 ++++++++- main.go | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lang/parse.go b/lang/parse.go index e1e5787..f9050fd 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -41,6 +41,8 @@ type ParseOptions struct { LSP string // Language of the repo Verbose bool + // Whether to indent the output JSON + MarshalIndent bool collect.CollectOption } @@ -81,7 +83,12 @@ func Parse(ctx context.Context, uri string, args ParseOptions) ([]byte, error) { return nil, err } log.Info("all symbols collected, start writing to stdout...\n") - out, err := json.Marshal(repo) + var out []byte + if args.MarshalIndent { + out, err = json.MarshalIndent(repo, "", " ") + } else { + out, err = json.Marshal(repo) + } if err != nil { log.Error("Failed to marshal repository: %v\n", err) return nil, err diff --git a/main.go b/main.go index dcb92ba..2542bef 100644 --- a/main.go +++ b/main.go @@ -93,6 +93,7 @@ func main() { flags.BoolVar(&opts.NoNeedComment, "no-need-comment", false, "do not need comment (only works for Go now)") flags.BoolVar(&opts.NeedTest, "need-test", false, "need parse test files (only works for Go now)") flags.BoolVar(&opts.CacheResults, "cache", false, "cache language server query results") + flags.BoolVar(&opts.MarshalIndent, "indent", false, "indent the marshaled output") flags.Var((*StringArray)(&opts.Excludes), "exclude", "exclude files or directories, support multiple values") flagLsp := flags.String("lsp", "", "Specify the language server path.") From ed6da83f0bfb35b0dda469f4397a886f4f2d7f4a Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Tue, 20 May 2025 20:23:25 +0800 Subject: [PATCH 28/32] feat: separate builtins from site-packages --- .gitignore | 3 +++ lang/python/spec.go | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 40f7ab8..352e539 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,6 @@ src/lang/testdata tools abcoder + +/*.txt +/*.json diff --git a/lang/python/spec.go b/lang/python/spec.go index cb1513b..0572d7a 100644 --- a/lang/python/spec.go +++ b/lang/python/spec.go @@ -83,6 +83,22 @@ func (c *PythonSpec) NameSpace(path string) (string, string, error) { // XXX: hardcoded python version condaPrefix := "/home/zhenyang/anaconda3/envs/abcoder/lib/python3.11" if strings.HasPrefix(path, condaPrefix) { + if strings.HasPrefix(path, condaPrefix+"/site-packages") { + // external module + relPath, err := filepath.Rel(condaPrefix+"/site-packages", path) + if err != nil { + return "", "", err + } + relPath = strings.TrimSuffix(relPath, ".py") + pkgPath := strings.ReplaceAll(relPath, string(os.PathSeparator), ".") + modPath := strings.Split(pkgPath, ".") + if len(modPath) >= 1 { + modName := modPath[0] + return modName, pkgPath, nil + } + panic(fmt.Sprintf("Malformed Namespace %s, pkgPath %s", path, pkgPath)) + } + // builtin module modName := "builtins" relPath, err := filepath.Rel(condaPrefix, path) if err != nil { @@ -93,7 +109,7 @@ func (c *PythonSpec) NameSpace(path string) (string, string, error) { return modName, pkgPath, nil } - panic(fmt.Sprintf("Namespace %s", path)) + panic(fmt.Sprintf("Unhandled Namespace %s", path)) } func (c *PythonSpec) ShouldSkip(path string) bool { From fcb9b3acb794ef7a4bbf8b626dfa263bb0a86645 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Tue, 10 Jun 2025 15:17:36 +0800 Subject: [PATCH 29/32] test: global var in python --- testdata/pyglobvar/main.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 testdata/pyglobvar/main.py diff --git a/testdata/pyglobvar/main.py b/testdata/pyglobvar/main.py new file mode 100644 index 0000000..4152fd3 --- /dev/null +++ b/testdata/pyglobvar/main.py @@ -0,0 +1,22 @@ +# Copyright 2025 CloudWeGo Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def foo(): + return 2 + +def bar(): + return foo() + +v = foo() From eb5d94e602eab545ec2d0d0e191dc94d8e221783 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Tue, 10 Jun 2025 18:56:57 +0800 Subject: [PATCH 30/32] fix(lsp): cache definition --- lang/lsp/lsp.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go index 71a2214..6531696 100644 --- a/lang/lsp/lsp.go +++ b/lang/lsp/lsp.go @@ -387,6 +387,11 @@ func (cli *LSPClient) Definition(ctx context.Context, uri DocumentURI, pos Posit if err != nil { return nil, err } + if f.Definitions != nil { + if locations, ok := f.Definitions[pos]; ok { + return locations, nil + } + } // call req := lsp.TextDocumentPositionParams{ From 795b36b5c7e26035fcbd6256a69c3025c053d4c5 Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Tue, 10 Jun 2025 19:43:46 +0800 Subject: [PATCH 31/32] feat: optimized progress output --- lang/collect/collect.go | 34 +++++++++++++++++++++++++++------- lang/collect/export.go | 15 +++++++++++++-- lang/parse.go | 4 ++++ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 79002e1..1cda834 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -40,6 +40,10 @@ type CollectOption struct { CacheResults bool } +const ( + SUPRESS_COLLECT_ERRORS = true +) + type Collector struct { cli *LSPClient spec LanguageSpec @@ -192,7 +196,7 @@ func (c *Collector) Collect(ctx context.Context) error { roots = append(roots, sym) } } - log.Info("collected symbols.") + log.Info("collected %d root symbols. going to collect more syms and dependencies...\n", len(roots)) // collect some extra metadata syms := make([]*DocumentSymbol, 0, len(roots)) @@ -203,6 +207,7 @@ func (c *Collector) Collect(ctx context.Context) error { } c.processSymbol(ctx, sym, 1) } + log.Info("collected %d symbols. going to collect dependencies...\n", len(c.syms)) // collect internal references // for _, sym := range syms { @@ -236,8 +241,11 @@ func (c *Collector) Collect(ctx context.Context) error { // } // } + num_edges := 0 // collect dependencies - for _, sym := range syms { + for i, sym := range syms { + log.Info("collecting dependencies %d/%d %s\n", i, len(syms), sym.Name) + next_token: for i, token := range sym.Tokens { @@ -283,7 +291,9 @@ func (c *Collector) Collect(ctx context.Context) error { // go to definition dep, err := c.getSymbolByToken(ctx, token) if err != nil || dep == nil { - log.Error("dep token %v not found: %v\n", token, err) + if !SUPRESS_COLLECT_ERRORS { + log.Error("dep token %v not found: %v\n", token, err) + } continue } @@ -305,6 +315,7 @@ func (c *Collector) Collect(ctx context.Context) error { } log.Debug(" Collect: dep %s -> %s (file: %s -> %s)\n", sym.Name, dep.Name, sym.Location, token.Location) + num_edges++ c.deps[sym] = append(c.deps[sym], dependency{ Location: token.Location, Symbol: dep, @@ -313,6 +324,7 @@ func (c *Collector) Collect(ctx context.Context) error { } } + log.Info("collected %d symbols, %d edges.\n", len(c.syms), num_edges) return nil } @@ -334,7 +346,9 @@ func (c *Collector) getSymbolByTokenWithLimit(ctx context.Context, tok Token, de return nil, fmt.Errorf("definition of token %s not found", tok) } if len(defs) > 1 { - log.Error("definition of token %s not unique", tok) + if !SUPRESS_COLLECT_ERRORS { + log.Error("definition of token %s not unique", tok) + } } return c.getSymbolByLocation(ctx, defs[0], depth, tok) } @@ -536,7 +550,9 @@ func (c *Collector) getDepsWithLimit(ctx context.Context, sym *DocumentSymbol, t for _, tp := range tps { dep, err := c.getSymbolByTokenWithLimit(ctx, sym.Tokens[tp], depth) if err != nil || sym == nil { - log.Error_skip(1, "token %v not found its symbol: %v", tp, err) + if !SUPRESS_COLLECT_ERRORS { + log.Error_skip(1, "token %v not found its symbol: %v", tp, err) + } } else { d := dependency{sym.Tokens[tp].Location, dep} tsyms[tp] = d @@ -629,12 +645,16 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept } } if i < 0 || i >= len(sym.Tokens) { - log.Error("get type token of variable symbol %s failed\n", sym) + if !SUPRESS_COLLECT_ERRORS { + log.Error("get type token of variable symbol %s failed\n", sym) + } return } tsym, err := c.getSymbolByTokenWithLimit(ctx, sym.Tokens[i], depth-1) if err != nil || tsym == nil { - log.Error("get type symbol for token %s failed:%v\n", sym.Tokens[i], err) + if !SUPRESS_COLLECT_ERRORS { + log.Error("get type symbol for token %s failed:%v\n", sym.Tokens[i], err) + } return } c.vars[sym] = dependency{ diff --git a/lang/collect/export.go b/lang/collect/export.go index f31f04d..b74ff2d 100644 --- a/lang/collect/export.go +++ b/lang/collect/export.go @@ -27,6 +27,10 @@ import ( "github.com/cloudwego/abcoder/lang/uniast" ) +const ( + SUPRESS_EXPORT_OUTPUT = true +) + type dependency struct { Location Location `json:"location"` Symbol *DocumentSymbol `json:"symbol"` @@ -74,7 +78,10 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) { c.filterLocalSymbols() // export symbols + i := 0 for _, symbol := range c.syms { + log.Info("export symbol %d/%d: %s\n", i, len(c.syms), symbol.Name) + i++ visited := make(map[*lsp.DocumentSymbol]*uniast.Identity) _, err := c.exportSymbol(&repo, symbol, "", visited) if err != nil { @@ -292,7 +299,9 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol } obj.Types = uniast.InsertDependency(obj.Types, pdep) default: - log.Error("dep symbol %s not collected for %v\n", dep.Symbol, id) + if !SUPRESS_EXPORT_OUTPUT { + log.Error("dep symbol %s not collected for %v\n", dep.Symbol, id) + } } } } @@ -320,7 +329,9 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol case lsp.SKStruct, lsp.SKTypeParameter, lsp.SKInterface, lsp.SKEnum, lsp.SKClass: obj.SubStruct = append(obj.SubStruct, uniast.NewDependency(*depid, c.fileLine(dep.Location))) default: - log.Error("dep symbol %s not collected for %v\n", dep.Symbol, id) + if !SUPRESS_EXPORT_OUTPUT { + log.Error("dep symbol %s not collected for %v\n", dep.Symbol, id) + } } } } diff --git a/lang/parse.go b/lang/parse.go index f9050fd..3d640df 100644 --- a/lang/parse.go +++ b/lang/parse.go @@ -93,6 +93,7 @@ func Parse(ctx context.Context, uri string, args ParseOptions) ([]byte, error) { log.Error("Failed to marshal repository: %v\n", err) return nil, err } + log.Info("all symbols written to stdout.\n") return out, nil } @@ -173,11 +174,14 @@ func collectSymbol(ctx context.Context, cli *lsp.LSPClient, repoPath string, opt if err != nil { return nil, err } + log.Info("all symbols exported.\n") } + log.Info("start building graph...\n") if err := repo.BuildGraph(); err != nil { return nil, err } + log.Info("graph built.\n") return repo, nil } From 2d56a144f9e35683d84de3cd3df654ec282f0ecf Mon Sep 17 00:00:00 2001 From: Hoblovski Date: Tue, 10 Jun 2025 20:10:36 +0800 Subject: [PATCH 32/32] fix: cache countLines during export --- lang/collect/export.go | 5 +++-- lang/cxx/spec.go | 2 +- lang/lsp/utils.go | 15 +++++++++++++-- lang/rust/spec.go | 2 +- lang/utils/strings.go | 14 +++++++++++++- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/lang/collect/export.go b/lang/collect/export.go index b74ff2d..593cd70 100644 --- a/lang/collect/export.go +++ b/lang/collect/export.go @@ -44,11 +44,12 @@ func (c *Collector) fileLine(loc Location) uniast.FileLine { rel = filepath.Base(loc.URI.File()) } text := c.cli.GetFile(loc.URI).Text + uri_str := string(loc.URI) return uniast.FileLine{ File: rel, Line: loc.Range.Start.Line, - StartOffset: lsp.PositionOffset(text, loc.Range.Start), - EndOffset: lsp.PositionOffset(text, loc.Range.End), + StartOffset: lsp.PositionOffsetIdentified(uri_str, text, loc.Range.Start), + EndOffset: lsp.PositionOffsetIdentified(uri_str, text, loc.Range.End), } } diff --git a/lang/cxx/spec.go b/lang/cxx/spec.go index 487da54..00fdc27 100644 --- a/lang/cxx/spec.go +++ b/lang/cxx/spec.go @@ -164,7 +164,7 @@ func (c *CxxSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, []i // TODO: attributes may contain parens. also inline structs. endRelOffset := 0 - lines := utils.CountLinesCached(sym.Text) + lines := utils.CountLinesPooled(sym.Text) phase := 0 for i, tok := range sym.Tokens { switch phase { diff --git a/lang/lsp/utils.go b/lang/lsp/utils.go index f1178dc..03db817 100644 --- a/lang/lsp/utils.go +++ b/lang/lsp/utils.go @@ -36,7 +36,7 @@ import ( ) func GetDistance(text string, start Position, pos Position) int { - lines := utils.CountLinesCached(text) + lines := utils.CountLinesPooled(text) defer utils.PutCount(lines) // find the line of the position return (*lines)[pos.Line-start.Line] + pos.Character - start.Character @@ -59,12 +59,23 @@ func RelativePostionWithLines(lines []int, textPos Position, pos Position) int { return lines[l] + pos.Character - textPos.Character } +func PositionOffsetIdentified(uri string, text string, pos Position) int { + if pos.Line < 0 || pos.Character < 0 { + log.Error("invalid text position: %+v", pos) + return -1 + } + lines := utils.CountLinesCached(uri, text) + defer utils.PutCount(lines) + + return RelativePostionWithLines(*lines, Position{Line: 0, Character: 0}, pos) +} + func PositionOffset(text string, pos Position) int { if pos.Line < 0 || pos.Character < 0 { log.Error("invalid text position: %+v", pos) return -1 } - lines := utils.CountLinesCached(text) + lines := utils.CountLinesPooled(text) defer utils.PutCount(lines) return RelativePostionWithLines(*lines, Position{Line: 0, Character: 0}, pos) diff --git a/lang/rust/spec.go b/lang/rust/spec.go index 8dc213f..e40a410 100644 --- a/lang/rust/spec.go +++ b/lang/rust/spec.go @@ -261,7 +261,7 @@ func (c *RustSpec) FunctionSymbol(sym lsp.DocumentSymbol) (int, []int, []int, [] if where == -1 { where = len(tokens) - 1 } - lines := utils.CountLinesCached(sym.Text) + lines := utils.CountLinesPooled(sym.Text) // find the typeParam's type token between "fn" and "(" var typeParams []int diff --git a/lang/utils/strings.go b/lang/utils/strings.go index 2aeb042..75d77e2 100644 --- a/lang/utils/strings.go +++ b/lang/utils/strings.go @@ -28,7 +28,19 @@ func PutCount(count *[]int) { countPool.Put(count) } -func CountLinesCached(text string) *[]int { +var cachedLines = sync.Map{} + +func CountLinesCached(ident string, text string) *[]int { + if v, ok := cachedLines.Load(ident); ok { + res := v.([]int) + return &res + } + tmp := CountLines(text) + cachedLines.Store(ident, tmp) + return &tmp +} + +func CountLinesPooled(text string) *[]int { tmp := countPool.Get().(*[]int) *tmp = append(*tmp, 0) for i, c := range text {