mudler · siddimore · Oct 6, 2024 · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/go.mod b/go.mod
@@ -82,9 +82,13 @@ require (
 	github.com/google/s2a-go v0.1.7 // indirect
 	github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
 	github.com/googleapis/gax-go/v2 v2.12.4 // indirect
+	github.com/gpustack/gguf-parser-go v0.11.1 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/labstack/echo/v4 v4.12.0 // indirect
 	github.com/labstack/gommon v0.4.2 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/pion/datachannel v1.5.8 // indirect
 	github.com/pion/dtls/v2 v2.2.12 // indirect
@@ -102,6 +106,7 @@ require (
 	github.com/pion/transport/v2 v2.2.10 // indirect
 	github.com/pion/turn/v2 v2.1.6 // indirect
 	github.com/pion/webrtc/v3 v3.3.0 // indirect
+	github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/shirou/gopsutil/v4 v4.24.7 // indirect
 	github.com/urfave/cli/v2 v2.27.4 // indirect
@@ -272,7 +277,7 @@ require (
 	github.com/shoenig/go-m1cpu v0.1.6 // indirect
 	github.com/shopspring/decimal v1.3.1 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
-	github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
+	github.com/smallnest/ringbuffer v0.0.0-20240802023544-f37d4ed3648b // indirect
 	github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
 	github.com/spf13/cast v1.5.0 // indirect
@@ -320,3 +325,19 @@ require (
 	howett.net/plist v1.0.0 // indirect
 	lukechampine.com/blake3 v1.3.0 // indirect
 )
+
+replace github.com/donomii/go-rwkv.cpp => /Users/siddharthmore/localAI/LocalAI/sources/go-rwkv.cpp
+
+replace github.com/ggerganov/whisper.cpp => /Users/siddharthmore/localAI/LocalAI/sources/whisper.cpp
+
+replace github.com/ggerganov/whisper.cpp/bindings/go => /Users/siddharthmore/localAI/LocalAI/sources/whisper.cpp/bindings/go
+
+replace github.com/go-skynet/go-bert.cpp => /Users/siddharthmore/localAI/LocalAI/sources/go-bert.cpp
+
+replace github.com/M0Rf30/go-tiny-dream => /Users/siddharthmore/localAI/LocalAI/sources/go-tiny-dream
+
+replace github.com/mudler/go-piper => /Users/siddharthmore/localAI/LocalAI/sources/go-piper
+
+replace github.com/mudler/go-stable-diffusion => /Users/siddharthmore/localAI/LocalAI/sources/go-stable-diffusion
+
+replace github.com/go-skynet/go-llama.cpp => /Users/siddharthmore/localAI/LocalAI/sources/go-llama.cpp
diff --git a/go.sum b/go.sum
@@ -266,6 +266,7 @@ github.com/google/go-containerregistry v0.19.2 h1:TannFKE1QSajsP6hPWb5oJNgKe1IKj
 github.com/google/go-containerregistry v0.19.2/go.mod h1:YCMFNQeeXeLF+dnhhWkqDItx/JSkH01j1Kis4PsjzFI=
 github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ=
 github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
 github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
@@ -295,6 +296,8 @@ github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
 github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
 github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
 github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/gpustack/gguf-parser-go v0.11.1 h1:6kEt4a+O2AeG3yS1KvyTvqZD8ut73z6ddY+8VAhae/M=
+github.com/gpustack/gguf-parser-go v0.11.1/go.mod h1:FWtM5FF/6JKF3tIuW4gbuq1yk7N3v3CIfWypnxo1KpY=
 github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
 github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
 github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
@@ -355,6 +358,8 @@ github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwA
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
@@ -483,8 +488,12 @@ github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5
 github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
 github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
 github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
 github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
 github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
@@ -659,6 +668,8 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
 github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
+github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8=
+github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA=
 github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
 github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
 github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
@@ -713,6 +724,8 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b h1:e9eeuSYSLmUKxy7ALzKcxo7ggTceQaVcBhjDIcewa9c=
 github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
+github.com/smallnest/ringbuffer v0.0.0-20240802023544-f37d4ed3648b h1:p5SKp3b2tRcYoVve/bZxTl9EcUq9RpODRf8CBIfI1Bg=
+github.com/smallnest/ringbuffer v0.0.0-20240802023544-f37d4ed3648b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
 github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
 github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs=
 github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8=

diff --git a/pkg/model/gguf_parser_wrapper.go b/pkg/model/gguf_parser_wrapper.go
@@ -0,0 +1,233 @@
+package model
+
+import (
+	"context"
+	"fmt"
+	"net/url"
+	"os"
+	"strings"
+
+	ggufparser "github.com/gpustack/gguf-parser-go"
+)
+
+// Structs for parsing GGUF data from Parser
+type ModelEstimate struct {
+	Estimate     ModelEstimateItems `json:"estimate"`
+	Architecture Architecture       `json:"architecture"`
+	Metadata     Metadata           `json:"metadata"`
+	Tokenizer    Tokenizer          `json:"tokenizer"`
+}
+
+type ModelEstimateItems struct {
+	Items             []ModelMemory `json:"items"`
+	Type              string        `json:"type"`
+	Architecture      string        `json:"architecture"`
+	ContextSize       int           `json:"contextSize"`
+	FlashAttention    bool          `json:"flashAttention"`
+	NoMMap            bool          `json:"noMMap"`
+	EmbeddingOnly     bool          `json:"embeddingOnly"`
+	Distributable     bool          `json:"distributable"`
+	LogicalBatchSize  int32         `json:"logicalBatchSize"`
+	PhysicalBatchSize int32         `json:"physicalBatchSize"`
+}
+
+type ModelMemory struct {
+	OffloadLayers uint64         `json:"offloadLayers"`
+	FullOffloaded bool           `json:"fullOffloaded"`
+	RAM           EstimateRAM    `json:"ram"`
+	VRAMs         []EstimateVRAM `json:"vrams"`
+}
+
+type EstimateRAM struct {
+	UMA    uint64 `json:"uma"`
+	NonUMA uint64 `json:"nonuma"`
+}
+
+type EstimateVRAM struct {
+	UMA    uint64 `json:"uma"`
+	NonUMA uint64 `json:"nonuma"`
+}
+
+type Architecture struct {
+	Type                 string `json:"type"`
+	Architecture         string `json:"architecture"`
+	MaximumContextLength int    `json:"maximumContextLength"`
+	EmbeddingLength      int    `json:"embeddingLength"`
+	VocabularyLength     int    `json:"vocabularyLength"`
+}
+
+type Metadata struct {
+	Type                string `json:"type"`
+	Architecture        string `json:"architecture"`
+	QuantizationVersion int    `json:"quantizationVersion"`
+	Alignment           int    `json:"alignment"`
+	Name                string `json:"name"`
+	License             string `json:"license"`
+	FileType            int    `json:"fileType"`
+	LittleEndian        bool   `json:"littleEndian"`
+	FileSize            int64  `json:"fileSize"`
+	Size                int64  `json:"size"`
+	Parameters          int64  `json:"parameters"`
+}
+
+type Tokenizer struct {
+	Model        string `json:"model"`
+	TokensLength int    `json:"tokensLength"`
+	TokensSize   int    `json:"tokensSize"`
+}
+
+// Default platform footprint from ggufparser
+const nonUMARamFootprint = uint64(150 * 1024 * 1024)  // 150 MiB
+const nonUMAVramFootprint = uint64(250 * 1024 * 1024) // 250 MiB
+
+func GetModelGGufData(modelPath string) (*ModelEstimate, error) {
+	ctx := context.Background()
+
+	// Check if the input is a valid URL
+	if isURL(modelPath) {
+		fmt.Println("Input is a URL.")
+		ggufRemoteData, err := ggufparser.ParseGGUFFileRemote(ctx, modelPath)
+		if err != nil {
+			return nil, fmt.Errorf("error parsing GGUF file from remote URL: %v", err)
+		}
+		return estimateModelMemoryUsage(ggufRemoteData)
+
+		// Check if the input is an Ollama model
+	} else if strings.HasSuffix(modelPath, "ollama") {
+		fmt.Println("Input is an Ollama model.")
+		ggufOllamaData, err := ggufparser.ParseGGUFFileFromOllama(ctx, modelPath)
+		if err != nil {
+			return nil, fmt.Errorf("error parsing GGUF file from Ollama model: %v", err)
+		}
+		return estimateModelMemoryUsage(ggufOllamaData)
+
+		// Check if the input is a Hugging Face model reference (format: huggingface.co/<repo>/<file>)
+	} else if strings.Contains(modelPath, "huggingface.co") {
+		fmt.Println("Input is a Hugging Face model.")
+
+		// Parse the URL to extract the repository and filename
+		u, err := url.Parse(modelPath)
+		if err != nil {
+			return nil, fmt.Errorf("invalid Hugging Face URL: %v", err)
+		}
+
+		// Example URL: https://huggingface.co/<repo>/<file>.gguf
+		parts := strings.Split(u.Path, "/")
+		if len(parts) < 3 {
+			return nil, fmt.Errorf("invalid Hugging Face model format. Expected format: huggingface.co/<repo>/<file>")
+		}
+
+		repo := parts[1] // Repository name
+		file := parts[2] // File name
+
+		ggufHuggingFaceData, err := ggufparser.ParseGGUFFileFromHuggingFace(ctx, repo, file)
+		if err != nil {
+			return nil, fmt.Errorf("error parsing GGUF file from Hugging Face: %v", err)
+		}
+		return estimateModelMemoryUsage(ggufHuggingFaceData)
+
+		// Otherwise, assume the input is a file path
+	} else if fileExists(modelPath) {
+		fmt.Println("Input is a file path.")
+		ggufData, err := ggufparser.ParseGGUFFile(modelPath)
+		if err != nil {
+			return nil, fmt.Errorf("error parsing GGUF file from file path: %v", err)
+		}
+		return estimateModelMemoryUsage(ggufData)
+	}
+
+	return nil, fmt.Errorf("unsupported input type")
+}
+
+// Helper function to check if the string is a valid URL
+func isURL(input string) bool {
+	_, err := url.ParseRequestURI(input)
+	return err == nil
+}
+
+// Helper function to check if the input is a valid file path
+func fileExists(filename string) bool {
+	info, err := os.Stat(filename)
+	if os.IsNotExist(err) {
+		return false
+	}
+	return !info.IsDir()
+}
+
+func estimateModelMemoryUsage(ggufFile *ggufparser.GGUFFile) (*ModelEstimate, error) {
+
+	if ggufFile == nil {
+		fmt.Printf("Error Invalid GGUF File \n")
+
+		// Invalid ModelPath return nil and use default values
+		return nil, nil
+	}
+
+	//
+	llamacppRunEstimateOpts := []ggufparser.LLaMACppRunEstimateOption{}
+	//
+	llamacppRunEstimate := ggufFile.EstimateLLaMACppRun(llamacppRunEstimateOpts...)
+
+	// Summarize the item with mmap and footprint values
+	summary := llamacppRunEstimate.SummarizeItem(true, nonUMARamFootprint, nonUMAVramFootprint)
+	// Fetch architecture, metadata, and tokenizer from GGUF file
+	architecture := ggufFile.Architecture()
+	metadata := ggufFile.Metadata()
+	tokenizer := ggufFile.Tokenizer()
+
+	// Construct the JSON payload
+	payload := ModelEstimate{
+		Estimate: ModelEstimateItems{
+			Items: []ModelMemory{
+				{
+					OffloadLayers: summary.OffloadLayers,
+					FullOffloaded: summary.FullOffloaded,
+					RAM: EstimateRAM{
+						UMA:    uint64(summary.RAM.UMA),
+						NonUMA: uint64(summary.RAM.NonUMA),
+					},
+					VRAMs: []EstimateVRAM{
+						{
+							UMA:    uint64(summary.VRAMs[0].UMA),
+							NonUMA: uint64(summary.VRAMs[0].NonUMA),
+						},
+					},
+				},
+			},
+			Type:              architecture.Type,
+			Architecture:      architecture.Architecture,
+			ContextSize:       int(llamacppRunEstimate.ContextSize),
+			FlashAttention:    llamacppRunEstimate.FlashAttention,
+			NoMMap:            llamacppRunEstimate.NoMMap,
+			EmbeddingOnly:     llamacppRunEstimate.EmbeddingOnly,
+			Distributable:     llamacppRunEstimate.Distributable,
+			LogicalBatchSize:  llamacppRunEstimate.LogicalBatchSize,
+			PhysicalBatchSize: llamacppRunEstimate.PhysicalBatchSize,
+		},
+		Architecture: Architecture{
+			Type:                 metadata.Type,
+			Architecture:         architecture.Architecture,
+			MaximumContextLength: int(architecture.MaximumContextLength),
+			EmbeddingLength:      int(architecture.EmbeddingLength),
+			VocabularyLength:     int(architecture.VocabularyLength),
+		},
+		Metadata: Metadata{
+			Type:                metadata.Type,
+			Architecture:        metadata.Architecture,
+			QuantizationVersion: int(metadata.QuantizationVersion),
+			Name:                metadata.Name,
+			License:             metadata.License,
+			FileType:            int(metadata.FileType),
+			LittleEndian:        metadata.LittleEndian,
+			FileSize:            int64(metadata.FileSize),
+			Parameters:          int64(metadata.Parameters),
+		},
+		Tokenizer: Tokenizer{
+			Model:        tokenizer.Model,
+			TokensLength: int(tokenizer.TokensLength),
+			TokensSize:   int(tokenizer.TokensSize),
+		},
+	}
+
+	return &payload, nil
+}