Skip to content

Commit 176962c

Browse files
Add support for 3D/CAD file formats preview (#34794)
Fix #34775 --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
1 parent f74a136 commit 176962c

File tree

29 files changed

+628
-410
lines changed

29 files changed

+628
-410
lines changed

modules/git/blob.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,22 @@ func (b *Blob) Name() string {
2222
return b.name
2323
}
2424

25-
// GetBlobContent Gets the limited content of the blob as raw text
26-
func (b *Blob) GetBlobContent(limit int64) (string, error) {
25+
// GetBlobBytes Gets the limited content of the blob
26+
func (b *Blob) GetBlobBytes(limit int64) ([]byte, error) {
2727
if limit <= 0 {
28-
return "", nil
28+
return nil, nil
2929
}
3030
dataRc, err := b.DataAsync()
3131
if err != nil {
32-
return "", err
32+
return nil, err
3333
}
3434
defer dataRc.Close()
35-
buf, err := util.ReadWithLimit(dataRc, int(limit))
35+
return util.ReadWithLimit(dataRc, int(limit))
36+
}
37+
38+
// GetBlobContent Gets the limited content of the blob as raw text
39+
func (b *Blob) GetBlobContent(limit int64) (string, error) {
40+
buf, err := b.GetBlobBytes(limit)
3641
return string(buf), err
3742
}
3843

@@ -99,11 +104,9 @@ loop:
99104

100105
// GuessContentType guesses the content type of the blob.
101106
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
102-
r, err := b.DataAsync()
107+
buf, err := b.GetBlobBytes(typesniffer.SniffContentSize)
103108
if err != nil {
104109
return typesniffer.SniffedType{}, err
105110
}
106-
defer r.Close()
107-
108-
return typesniffer.DetectContentTypeFromReader(r)
111+
return typesniffer.DetectContentType(buf), nil
109112
}

modules/markup/console/console.go

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ package console
66
import (
77
"bytes"
88
"io"
9-
"path"
9+
"unicode/utf8"
1010

1111
"code.gitea.io/gitea/modules/markup"
1212
"code.gitea.io/gitea/modules/setting"
13+
"code.gitea.io/gitea/modules/typesniffer"
14+
"code.gitea.io/gitea/modules/util"
1315

1416
trend "github.com/buildkite/terminal-to-html/v3"
15-
"github.com/go-enry/go-enry/v2"
1617
)
1718

1819
func init() {
@@ -22,6 +23,8 @@ func init() {
2223
// Renderer implements markup.Renderer
2324
type Renderer struct{}
2425

26+
var _ markup.RendererContentDetector = (*Renderer)(nil)
27+
2528
// Name implements markup.Renderer
2629
func (Renderer) Name() string {
2730
return "console"
@@ -40,15 +43,36 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
4043
}
4144

4245
// CanRender implements markup.RendererContentDetector
43-
func (Renderer) CanRender(filename string, input io.Reader) bool {
44-
buf, err := io.ReadAll(input)
45-
if err != nil {
46+
func (Renderer) CanRender(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) bool {
47+
if !sniffedType.IsTextPlain() {
4648
return false
4749
}
48-
if enry.GetLanguage(path.Base(filename), buf) != enry.OtherLanguage {
50+
51+
s := util.UnsafeBytesToString(prefetchBuf)
52+
rs := []rune(s)
53+
cnt := 0
54+
firstErrPos := -1
55+
isCtrlSep := func(p int) bool {
56+
return p < len(rs) && (rs[p] == ';' || rs[p] == 'm')
57+
}
58+
for i, c := range rs {
59+
if c == 0 {
60+
return false
61+
}
62+
if c == '\x1b' {
63+
match := i+1 < len(rs) && rs[i+1] == '['
64+
if match && (isCtrlSep(i+2) || isCtrlSep(i+3) || isCtrlSep(i+4) || isCtrlSep(i+5)) {
65+
cnt++
66+
}
67+
}
68+
if c == utf8.RuneError && firstErrPos == -1 {
69+
firstErrPos = i
70+
}
71+
}
72+
if firstErrPos != -1 && firstErrPos != len(rs)-1 {
4973
return false
5074
}
51-
return bytes.ContainsRune(buf, '\x1b')
75+
return cnt >= 2 // only render it as console output if there are at least two escape sequences
5276
}
5377

5478
// Render renders terminal colors to HTML with all specific handling stuff.

modules/markup/console/console_test.go

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,39 @@ import (
88
"testing"
99

1010
"code.gitea.io/gitea/modules/markup"
11+
"code.gitea.io/gitea/modules/typesniffer"
1112

1213
"github.com/stretchr/testify/assert"
1314
)
1415

1516
func TestRenderConsole(t *testing.T) {
16-
var render Renderer
17-
kases := map[string]string{
18-
"\x1b[37m\x1b[40mnpm\x1b[0m \x1b[0m\x1b[32minfo\x1b[0m \x1b[0m\x1b[35mit worked if it ends with\x1b[0m ok": "<span class=\"term-fg37 term-bg40\">npm</span> <span class=\"term-fg32\">info</span> <span class=\"term-fg35\">it worked if it ends with</span> ok",
17+
cases := []struct {
18+
input string
19+
expected string
20+
}{
21+
{"\x1b[37m\x1b[40mnpm\x1b[0m \x1b[0m\x1b[32minfo\x1b[0m \x1b[0m\x1b[35mit worked if it ends with\x1b[0m ok", `<span class="term-fg37 term-bg40">npm</span> <span class="term-fg32">info</span> <span class="term-fg35">it worked if it ends with</span> ok`},
22+
{"\x1b[1;2m \x1b[123m 啊", `<span class="term-fg2"> 啊</span>`},
23+
{"\x1b[1;2m \x1b[123m \xef", `<span class="term-fg2"> �</span>`},
24+
{"\x1b[1;2m \x1b[123m \xef \xef", ``},
25+
{"\x1b[12", ``},
26+
{"\x1b[1", ``},
27+
{"\x1b[FOO\x1b[", ``},
28+
{"\x1b[mFOO\x1b[m", `FOO`},
1929
}
2030

21-
for k, v := range kases {
31+
var render Renderer
32+
for i, c := range cases {
2233
var buf strings.Builder
23-
canRender := render.CanRender("test", strings.NewReader(k))
24-
assert.True(t, canRender)
34+
st := typesniffer.DetectContentType([]byte(c.input))
35+
canRender := render.CanRender("test", st, []byte(c.input))
36+
if c.expected == "" {
37+
assert.False(t, canRender, "case %d: expected not to render", i)
38+
continue
39+
}
2540

26-
err := render.Render(markup.NewRenderContext(t.Context()), strings.NewReader(k), &buf)
41+
assert.True(t, canRender)
42+
err := render.Render(markup.NewRenderContext(t.Context()), strings.NewReader(c.input), &buf)
2743
assert.NoError(t, err)
28-
assert.Equal(t, v, buf.String())
44+
assert.Equal(t, c.expected, buf.String())
2945
}
3046
}

modules/markup/renderer.go

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
package markup
55

66
import (
7-
"bytes"
87
"io"
98
"path"
109
"strings"
1110

1211
"code.gitea.io/gitea/modules/setting"
12+
"code.gitea.io/gitea/modules/typesniffer"
1313
)
1414

1515
// Renderer defines an interface for rendering markup file to HTML
@@ -37,7 +37,7 @@ type ExternalRenderer interface {
3737
// RendererContentDetector detects if the content can be rendered
3838
// by specified renderer
3939
type RendererContentDetector interface {
40-
CanRender(filename string, input io.Reader) bool
40+
CanRender(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) bool
4141
}
4242

4343
var (
@@ -60,13 +60,9 @@ func GetRendererByFileName(filename string) Renderer {
6060
}
6161

6262
// DetectRendererType detects the markup type of the content
63-
func DetectRendererType(filename string, input io.Reader) string {
64-
buf, err := io.ReadAll(input)
65-
if err != nil {
66-
return ""
67-
}
63+
func DetectRendererType(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) string {
6864
for _, renderer := range renderers {
69-
if detector, ok := renderer.(RendererContentDetector); ok && detector.CanRender(filename, bytes.NewReader(buf)) {
65+
if detector, ok := renderer.(RendererContentDetector); ok && detector.CanRender(filename, sniffedType, prefetchBuf) {
7066
return renderer.Name()
7167
}
7268
}

modules/typesniffer/typesniffer.go

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,14 @@ package typesniffer
66
import (
77
"bytes"
88
"encoding/binary"
9-
"fmt"
10-
"io"
119
"net/http"
1210
"regexp"
1311
"slices"
1412
"strings"
15-
16-
"code.gitea.io/gitea/modules/util"
13+
"sync"
1714
)
1815

19-
// Use at most this many bytes to determine Content Type.
20-
const sniffLen = 1024
16+
const SniffContentSize = 1024
2117

2218
const (
2319
MimeTypeImageSvg = "image/svg+xml"
@@ -26,22 +22,30 @@ const (
2622
MimeTypeApplicationOctetStream = "application/octet-stream"
2723
)
2824

29-
var (
30-
svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
31-
svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
32-
svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
33-
)
34-
35-
// SniffedType contains information about a blobs type.
25+
var globalVars = sync.OnceValue(func() (ret struct {
26+
svgComment, svgTagRegex, svgTagInXMLRegex *regexp.Regexp
27+
},
28+
) {
29+
ret.svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
30+
ret.svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
31+
ret.svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
32+
return ret
33+
})
34+
35+
// SniffedType contains information about a blob's type.
3636
type SniffedType struct {
3737
contentType string
3838
}
3939

40-
// IsText etects if content format is plain text.
40+
// IsText detects if the content format is text family, including text/plain, text/html, text/css, etc.
4141
func (ct SniffedType) IsText() bool {
4242
return strings.Contains(ct.contentType, "text/")
4343
}
4444

45+
func (ct SniffedType) IsTextPlain() bool {
46+
return strings.Contains(ct.contentType, "text/plain")
47+
}
48+
4549
// IsImage detects if data is an image format
4650
func (ct SniffedType) IsImage() bool {
4751
return strings.Contains(ct.contentType, "image/")
@@ -57,12 +61,12 @@ func (ct SniffedType) IsPDF() bool {
5761
return strings.Contains(ct.contentType, "application/pdf")
5862
}
5963

60-
// IsVideo detects if data is an video format
64+
// IsVideo detects if data is a video format
6165
func (ct SniffedType) IsVideo() bool {
6266
return strings.Contains(ct.contentType, "video/")
6367
}
6468

65-
// IsAudio detects if data is an video format
69+
// IsAudio detects if data is a video format
6670
func (ct SniffedType) IsAudio() bool {
6771
return strings.Contains(ct.contentType, "audio/")
6872
}
@@ -103,33 +107,34 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) {
103107
return brands, true
104108
}
105109

106-
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
110+
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty.
107111
func DetectContentType(data []byte) SniffedType {
108112
if len(data) == 0 {
109-
return SniffedType{"text/unknown"}
113+
return SniffedType{"text/plain"}
110114
}
111115

112116
ct := http.DetectContentType(data)
113117

114-
if len(data) > sniffLen {
115-
data = data[:sniffLen]
118+
if len(data) > SniffContentSize {
119+
data = data[:SniffContentSize]
116120
}
117121

122+
vars := globalVars()
118123
// SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
119124
detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
120125
detectByXML := strings.Contains(ct, "text/xml")
121126
if detectByHTML || detectByXML {
122-
dataProcessed := svgComment.ReplaceAll(data, nil)
127+
dataProcessed := vars.svgComment.ReplaceAll(data, nil)
123128
dataProcessed = bytes.TrimSpace(dataProcessed)
124-
if detectByHTML && svgTagRegex.Match(dataProcessed) ||
125-
detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
129+
if detectByHTML && vars.svgTagRegex.Match(dataProcessed) ||
130+
detectByXML && vars.svgTagInXMLRegex.Match(dataProcessed) {
126131
ct = MimeTypeImageSvg
127132
}
128133
}
129134

130135
if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
131136
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
132-
// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
137+
// So remove the "ID3" prefix and detect again, then if the result is "text", it must be text content.
133138
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
134139
ct2 := http.DetectContentType(data[3:])
135140
if strings.HasPrefix(ct2, "text/") {
@@ -155,15 +160,3 @@ func DetectContentType(data []byte) SniffedType {
155160
}
156161
return SniffedType{ct}
157162
}
158-
159-
// DetectContentTypeFromReader guesses the content type contained in the reader.
160-
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
161-
buf := make([]byte, sniffLen)
162-
n, err := util.ReadAtMost(r, buf)
163-
if err != nil {
164-
return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err)
165-
}
166-
buf = buf[:n]
167-
168-
return DetectContentType(buf), nil
169-
}

modules/typesniffer/typesniffer_test.go

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
package typesniffer
55

66
import (
7-
"bytes"
87
"encoding/base64"
98
"encoding/hex"
109
"strings"
@@ -17,7 +16,7 @@ func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
1716
// Pre-condition: Shorter than sniffLen detects SVG.
1817
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
1918
// Longer than sniffLen detects something else.
20-
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
19+
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", SniffContentSize)+` --><svg></svg>`)).contentType)
2120
}
2221

2322
func TestIsTextFile(t *testing.T) {
@@ -116,22 +115,13 @@ func TestIsAudio(t *testing.T) {
116115
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
117116
}
118117

119-
func TestDetectContentTypeFromReader(t *testing.T) {
120-
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
121-
st, err := DetectContentTypeFromReader(bytes.NewReader(mp3))
122-
assert.NoError(t, err)
123-
assert.True(t, st.IsAudio())
124-
}
125-
126118
func TestDetectContentTypeOgg(t *testing.T) {
127119
oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000")
128-
st, err := DetectContentTypeFromReader(bytes.NewReader(oggAudio))
129-
assert.NoError(t, err)
120+
st := DetectContentType(oggAudio)
130121
assert.True(t, st.IsAudio())
131122

132123
oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001")
133-
st, err = DetectContentTypeFromReader(bytes.NewReader(oggVideo))
134-
assert.NoError(t, err)
124+
st = DetectContentType(oggVideo)
135125
assert.True(t, st.IsVideo())
136126
}
137127

0 commit comments

Comments
 (0)