Skip to content

Add support for 3D/CAD file formats preview #34794

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Jun 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions modules/git/blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,22 @@ func (b *Blob) Name() string {
return b.name
}

// GetBlobContent Gets the limited content of the blob as raw text
func (b *Blob) GetBlobContent(limit int64) (string, error) {
// GetBlobBytes Gets the limited content of the blob
func (b *Blob) GetBlobBytes(limit int64) ([]byte, error) {
if limit <= 0 {
return "", nil
return nil, nil
}
dataRc, err := b.DataAsync()
if err != nil {
return "", err
return nil, err
}
defer dataRc.Close()
buf, err := util.ReadWithLimit(dataRc, int(limit))
return util.ReadWithLimit(dataRc, int(limit))
}

// GetBlobContent Gets the limited content of the blob as raw text
func (b *Blob) GetBlobContent(limit int64) (string, error) {
buf, err := b.GetBlobBytes(limit)
return string(buf), err
}

Expand Down Expand Up @@ -99,11 +104,9 @@ loop:

// GuessContentType guesses the content type of the blob.
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
r, err := b.DataAsync()
buf, err := b.GetBlobBytes(typesniffer.SniffContentSize)
if err != nil {
return typesniffer.SniffedType{}, err
}
defer r.Close()

return typesniffer.DetectContentTypeFromReader(r)
return typesniffer.DetectContentType(buf), nil
}
38 changes: 31 additions & 7 deletions modules/markup/console/console.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ package console
import (
"bytes"
"io"
"path"
"unicode/utf8"

"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"

trend "github.com/buildkite/terminal-to-html/v3"
"github.com/go-enry/go-enry/v2"
)

func init() {
Expand All @@ -22,6 +23,8 @@ func init() {
// Renderer implements markup.Renderer
type Renderer struct{}

var _ markup.RendererContentDetector = (*Renderer)(nil)

// Name implements markup.Renderer
func (Renderer) Name() string {
return "console"
Expand All @@ -40,15 +43,36 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
}

// CanRender implements markup.RendererContentDetector
func (Renderer) CanRender(filename string, input io.Reader) bool {
buf, err := io.ReadAll(input)
if err != nil {
func (Renderer) CanRender(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) bool {
if !sniffedType.IsTextPlain() {
return false
}
if enry.GetLanguage(path.Base(filename), buf) != enry.OtherLanguage {

s := util.UnsafeBytesToString(prefetchBuf)
rs := []rune(s)
cnt := 0
firstErrPos := -1
isCtrlSep := func(p int) bool {
return p < len(rs) && (rs[p] == ';' || rs[p] == 'm')
}
for i, c := range rs {
if c == 0 {
return false
}
if c == '\x1b' {
match := i+1 < len(rs) && rs[i+1] == '['
if match && (isCtrlSep(i+2) || isCtrlSep(i+3) || isCtrlSep(i+4) || isCtrlSep(i+5)) {
cnt++
}
}
if c == utf8.RuneError && firstErrPos == -1 {
firstErrPos = i
}
}
if firstErrPos != -1 && firstErrPos != len(rs)-1 {
return false
}
return bytes.ContainsRune(buf, '\x1b')
return cnt >= 2 // only render it as console output if there are at least two escape sequences
}

// Render renders terminal colors to HTML with all specific handling stuff.
Expand Down
32 changes: 24 additions & 8 deletions modules/markup/console/console_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,39 @@ import (
"testing"

"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/typesniffer"

"github.com/stretchr/testify/assert"
)

func TestRenderConsole(t *testing.T) {
var render Renderer
kases := map[string]string{
"\x1b[37m\x1b[40mnpm\x1b[0m \x1b[0m\x1b[32minfo\x1b[0m \x1b[0m\x1b[35mit worked if it ends with\x1b[0m ok": "<span class=\"term-fg37 term-bg40\">npm</span> <span class=\"term-fg32\">info</span> <span class=\"term-fg35\">it worked if it ends with</span> ok",
cases := []struct {
input string
expected string
}{
{"\x1b[37m\x1b[40mnpm\x1b[0m \x1b[0m\x1b[32minfo\x1b[0m \x1b[0m\x1b[35mit worked if it ends with\x1b[0m ok", `<span class="term-fg37 term-bg40">npm</span> <span class="term-fg32">info</span> <span class="term-fg35">it worked if it ends with</span> ok`},
{"\x1b[1;2m \x1b[123m 啊", `<span class="term-fg2"> 啊</span>`},
{"\x1b[1;2m \x1b[123m \xef", `<span class="term-fg2"> �</span>`},
{"\x1b[1;2m \x1b[123m \xef \xef", ``},
{"\x1b[12", ``},
{"\x1b[1", ``},
{"\x1b[FOO\x1b[", ``},
{"\x1b[mFOO\x1b[m", `FOO`},
}

for k, v := range kases {
var render Renderer
for i, c := range cases {
var buf strings.Builder
canRender := render.CanRender("test", strings.NewReader(k))
assert.True(t, canRender)
st := typesniffer.DetectContentType([]byte(c.input))
canRender := render.CanRender("test", st, []byte(c.input))
if c.expected == "" {
assert.False(t, canRender, "case %d: expected not to render", i)
continue
}

err := render.Render(markup.NewRenderContext(t.Context()), strings.NewReader(k), &buf)
assert.True(t, canRender)
err := render.Render(markup.NewRenderContext(t.Context()), strings.NewReader(c.input), &buf)
assert.NoError(t, err)
assert.Equal(t, v, buf.String())
assert.Equal(t, c.expected, buf.String())
}
}
12 changes: 4 additions & 8 deletions modules/markup/renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
package markup

import (
"bytes"
"io"
"path"
"strings"

"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/typesniffer"
)

// Renderer defines an interface for rendering markup file to HTML
Expand Down Expand Up @@ -37,7 +37,7 @@ type ExternalRenderer interface {
// RendererContentDetector detects if the content can be rendered
// by specified renderer
type RendererContentDetector interface {
CanRender(filename string, input io.Reader) bool
CanRender(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) bool
}

var (
Expand All @@ -60,13 +60,9 @@ func GetRendererByFileName(filename string) Renderer {
}

// DetectRendererType detects the markup type of the content
func DetectRendererType(filename string, input io.Reader) string {
buf, err := io.ReadAll(input)
if err != nil {
return ""
}
func DetectRendererType(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) string {
for _, renderer := range renderers {
if detector, ok := renderer.(RendererContentDetector); ok && detector.CanRender(filename, bytes.NewReader(buf)) {
if detector, ok := renderer.(RendererContentDetector); ok && detector.CanRender(filename, sniffedType, prefetchBuf) {
return renderer.Name()
}
}
Expand Down
65 changes: 29 additions & 36 deletions modules/typesniffer/typesniffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,14 @@ package typesniffer
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"net/http"
"regexp"
"slices"
"strings"

"code.gitea.io/gitea/modules/util"
"sync"
)

// Use at most this many bytes to determine Content Type.
const sniffLen = 1024
const SniffContentSize = 1024

const (
MimeTypeImageSvg = "image/svg+xml"
Expand All @@ -26,22 +22,30 @@ const (
MimeTypeApplicationOctetStream = "application/octet-stream"
)

var (
svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
)

// SniffedType contains information about a blobs type.
var globalVars = sync.OnceValue(func() (ret struct {
svgComment, svgTagRegex, svgTagInXMLRegex *regexp.Regexp
},
) {
ret.svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
ret.svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
ret.svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
return ret
})

// SniffedType contains information about a blob's type.
type SniffedType struct {
contentType string
}

// IsText etects if content format is plain text.
// IsText detects if the content format is text family, including text/plain, text/html, text/css, etc.
func (ct SniffedType) IsText() bool {
return strings.Contains(ct.contentType, "text/")
}

func (ct SniffedType) IsTextPlain() bool {
return strings.Contains(ct.contentType, "text/plain")
}

// IsImage detects if data is an image format
func (ct SniffedType) IsImage() bool {
return strings.Contains(ct.contentType, "image/")
Expand All @@ -57,12 +61,12 @@ func (ct SniffedType) IsPDF() bool {
return strings.Contains(ct.contentType, "application/pdf")
}

// IsVideo detects if data is an video format
// IsVideo detects if data is a video format
func (ct SniffedType) IsVideo() bool {
return strings.Contains(ct.contentType, "video/")
}

// IsAudio detects if data is an video format
// IsAudio detects if data is a video format
func (ct SniffedType) IsAudio() bool {
return strings.Contains(ct.contentType, "audio/")
}
Expand Down Expand Up @@ -103,33 +107,34 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) {
return brands, true
}

// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty.
func DetectContentType(data []byte) SniffedType {
if len(data) == 0 {
return SniffedType{"text/unknown"}
return SniffedType{"text/plain"}
}

ct := http.DetectContentType(data)

if len(data) > sniffLen {
data = data[:sniffLen]
if len(data) > SniffContentSize {
data = data[:SniffContentSize]
}

vars := globalVars()
// SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
detectByXML := strings.Contains(ct, "text/xml")
if detectByHTML || detectByXML {
dataProcessed := svgComment.ReplaceAll(data, nil)
dataProcessed := vars.svgComment.ReplaceAll(data, nil)
dataProcessed = bytes.TrimSpace(dataProcessed)
if detectByHTML && svgTagRegex.Match(dataProcessed) ||
detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
if detectByHTML && vars.svgTagRegex.Match(dataProcessed) ||
detectByXML && vars.svgTagInXMLRegex.Match(dataProcessed) {
ct = MimeTypeImageSvg
}
}

if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
// So remove the "ID3" prefix and detect again, then if the result is "text", it must be text content.
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
ct2 := http.DetectContentType(data[3:])
if strings.HasPrefix(ct2, "text/") {
Expand All @@ -155,15 +160,3 @@ func DetectContentType(data []byte) SniffedType {
}
return SniffedType{ct}
}

// DetectContentTypeFromReader guesses the content type contained in the reader.
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
buf := make([]byte, sniffLen)
n, err := util.ReadAtMost(r, buf)
if err != nil {
return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err)
}
buf = buf[:n]

return DetectContentType(buf), nil
}
16 changes: 3 additions & 13 deletions modules/typesniffer/typesniffer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package typesniffer

import (
"bytes"
"encoding/base64"
"encoding/hex"
"strings"
Expand All @@ -17,7 +16,7 @@ func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
// Pre-condition: Shorter than sniffLen detects SVG.
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
// Longer than sniffLen detects something else.
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", SniffContentSize)+` --><svg></svg>`)).contentType)
}

func TestIsTextFile(t *testing.T) {
Expand Down Expand Up @@ -116,22 +115,13 @@ func TestIsAudio(t *testing.T) {
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
}

func TestDetectContentTypeFromReader(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
st, err := DetectContentTypeFromReader(bytes.NewReader(mp3))
assert.NoError(t, err)
assert.True(t, st.IsAudio())
}

func TestDetectContentTypeOgg(t *testing.T) {
oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000")
st, err := DetectContentTypeFromReader(bytes.NewReader(oggAudio))
assert.NoError(t, err)
st := DetectContentType(oggAudio)
assert.True(t, st.IsAudio())

oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001")
st, err = DetectContentTypeFromReader(bytes.NewReader(oggVideo))
assert.NoError(t, err)
st = DetectContentType(oggVideo)
assert.True(t, st.IsVideo())
}

Expand Down
Loading