Skip to content

Commit 3fc0bf1

Browse files
authored
Merge branch 'main' into gemini-usage
2 parents dd885e2 + a0e4c0e commit 3fc0bf1

38 files changed

+543
-740
lines changed

examples/inference-pool/README.md

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,19 @@
22

33
This example demonstrates how to use AI Gateway with the InferencePool feature, which enables intelligent request routing across multiple inference endpoints with load balancing and health checking capabilities.
44

5+
The setup includes **three distinct backends**:
6+
7+
- Two `InferencePool` resources for LLMs (`Llama-3.1-8B-Instruct` and `Mistral`)
8+
- One standard `Backend` for non-InferencePool traffic
9+
10+
Routing is controlled by the `x-ai-eg-model` HTTP header.
11+
512
## Files in This Directory
613

714
- **`envoy-gateway-values-addon.yaml`**: Envoy Gateway values addon for InferencePool support. Combine with `../../manifests/envoy-gateway-values.yaml`.
8-
- **`base.yaml`**: Complete example that includes Gateway, AIServiceBackend, InferencePool CRDs, and a sample application deployment.
15+
- **`base.yaml`**: Deploys all inference backends and supporting resources using the **standard approach documented in the official guide**. This includes:
16+
- A `mistral` backend with custom Endpoint Picker configuration
17+
- A standard fallback backend (`envoy-ai-gateway-basic-testupstream`) for non-InferencePool routing
918
- **`aigwroute.yaml`**: Example AIGatewayRoute that uses InferencePool as a backend.
1019
- **`httproute.yaml`**: Example HTTPRoute for traditional HTTP routing to InferencePool endpoints.
1120
- **`with-annotations.yaml`**: Advanced example showing InferencePool with Kubernetes annotations for fine-grained control.
@@ -27,16 +36,63 @@ This example demonstrates how to use AI Gateway with the InferencePool feature,
2736

2837
```bash
2938
kubectl apply -f base.yaml
39+
kubectl apply -f aigwroute.yaml
3040
```
3141

42+
> Note: The `aigwroute.yaml` file defines the InferencePool and routing logic, but does not deploy the actual inference backend (e.g., the vLLM server for Llama-3.1-8B-Instruct).
43+
> You must deploy the backend separately by following [Step 3: Deploy Inference Backends](https://aigateway.envoyproxy.io/docs/capabilities/inference/aigatewayroute-inferencepool#step-3-deploy-inference-backends)
44+
3245
3. Test the setup:
3346

34-
```bash
35-
GATEWAY_HOST=$(kubectl get gateway/ai-gateway -o jsonpath='{.status.addresses[0].value}')
36-
curl -X POST "http://${GATEWAY_HOST}/v1/chat/completions" \
37-
-H "Content-Type: application/json" \
38-
-d '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Hello!"}]}'
39-
```
47+
You can access the gateway in two ways, depending on your environment.
48+
49+
✅ Option A: Using External IP (e.g., cloud LoadBalancer, MetalLB)
50+
If your cluster assigns an external address to the Gateway:
51+
52+
```bash
53+
GATEWAY_HOST=$(kubectl get gateway/inference-pool-with-aigwroute -n default -o jsonpath='{.status.addresses[0].value}')
54+
echo "Gateway available at: http://${GATEWAY_HOST}"
55+
```
56+
57+
Then send a request:
58+
59+
```bash
60+
curl -X POST "http://${GATEWAY_HOST}/v1/chat/completions" \
61+
-H "x-ai-eg-model: meta-llama/Llama-3.1-8B-Instruct" \
62+
-H "Authorization: sk-abcdefghijklmnopqrstuvwxyz" \
63+
-H "Content-Type: application/json" \
64+
-d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "messages": [{"role": "user", "content": "Hello!"}]}'
65+
```
66+
67+
✅ Option B: Using kubectl port-forward (ideal for local clusters like Minikube/Kind)
68+
In one terminal, forward the gateway service:
69+
70+
```bash
71+
kubectl port-forward svc/envoy-default-inference-pool-with-aigwroute-d416582c 8080:80 -n envoy-gateway-system
72+
```
73+
74+
In another terminal, send requests to localhost:8080:
75+
76+
```bash
77+
# Route to Llama (InferencePool)
78+
curl -X POST "http://localhost:8080/v1/chat/completions" \
79+
-H "x-ai-eg-model: meta-llama/Llama-3.1-8B-Instruct" \
80+
-H "Authorization: sk-abcdefghijklmnopqrstuvwxyz" \
81+
-H "Content-Type: application/json" \
82+
-d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "messages": [{"role": "user", "content": "Hello!"}]}'
83+
84+
# Route to Mistral (InferencePool)
85+
curl -X POST "http://localhost:8080/v1/chat/completions" \
86+
-H "x-ai-eg-model: mistral:latest" \
87+
-H "Content-Type: application/json" \
88+
-d '{"model": "mistral:latest", "messages": [{"role": "user", "content": "Hello!"}]}'
89+
90+
# Route to fallback backend (Standard Backend)
91+
curl -X POST "http://localhost:8080/v1/chat/completions" \
92+
-H "x-ai-eg-model: some-cool-self-hosted-model" \
93+
-H "Content-Type: application/json" \
94+
-d '{"model": "some-cool-self-hosted-model", "messages": [{"role": "user", "content": "Hello!"}]}'
95+
```
4096

4197
### Combining with Other Features
4298

internal/extproc/backendauth/anthropicapikey.go renamed to internal/backendauth/anthropicapikey.go

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ import (
99
"context"
1010
"strings"
1111

12-
corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
13-
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
14-
1512
"github.com/envoyproxy/ai-gateway/internal/filterapi"
13+
"github.com/envoyproxy/ai-gateway/internal/internalapi"
1614
)
1715

1816
type anthropicAPIKeyHandler struct {
@@ -27,13 +25,7 @@ func newAnthropicAPIKeyHandler(auth *filterapi.AnthropicAPIKeyAuth) (Handler, er
2725
// Anthropic uses "x-api-key" header instead of "Authorization: Bearer".
2826
//
2927
// https://docs.claude.com/en/api/overview#authentication
30-
func (a *anthropicAPIKeyHandler) Do(_ context.Context, requestHeaders map[string]string, headerMut *extprocv3.HeaderMutation, _ *extprocv3.BodyMutation) error {
28+
func (a *anthropicAPIKeyHandler) Do(_ context.Context, requestHeaders map[string]string, _ []byte) ([]internalapi.Header, error) {
3129
requestHeaders["x-api-key"] = a.apiKey
32-
headerMut.SetHeaders = append(headerMut.SetHeaders, &corev3.HeaderValueOption{
33-
Header: &corev3.HeaderValue{
34-
Key: "x-api-key",
35-
RawValue: []byte(a.apiKey),
36-
},
37-
})
38-
return nil
30+
return []internalapi.Header{{"x-api-key", a.apiKey}}, nil
3931
}

internal/extproc/backendauth/anthropicapikey_test.go renamed to internal/backendauth/anthropicapikey_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"context"
1010
"testing"
1111

12-
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
1312
"github.com/stretchr/testify/require"
1413

1514
"github.com/envoyproxy/ai-gateway/internal/filterapi"
@@ -21,30 +20,31 @@ func TestAnthropicAPIKeyHandler(t *testing.T) {
2120
require.NoError(t, err)
2221

2322
headers := make(map[string]string)
24-
headerMut := &extprocv3.HeaderMutation{}
2523

26-
err = handler.Do(context.Background(), headers, headerMut, nil)
24+
hders, err := handler.Do(context.Background(), headers, nil)
2725
require.NoError(t, err)
2826

2927
// Verify header in map
3028
require.Equal(t, "test-azure-key", headers["x-api-key"])
3129

3230
// Verify header in mutation
33-
require.Len(t, headerMut.SetHeaders, 1)
34-
require.Equal(t, "x-api-key", headerMut.SetHeaders[0].Header.Key)
35-
require.Equal(t, "test-azure-key", string(headerMut.SetHeaders[0].Header.RawValue))
31+
require.Len(t, hders, 1)
32+
require.Equal(t, "x-api-key", hders[0][0])
33+
require.Equal(t, "test-azure-key", hders[0][1])
3634
})
3735

3836
t.Run("trims whitespace", func(t *testing.T) {
3937
handler, err := newAnthropicAPIKeyHandler(&filterapi.AnthropicAPIKeyAuth{Key: " key-with-spaces "})
4038
require.NoError(t, err)
4139

4240
headers := make(map[string]string)
43-
headerMut := &extprocv3.HeaderMutation{}
4441

45-
err = handler.Do(context.Background(), headers, headerMut, nil)
42+
hdrs, err := handler.Do(context.Background(), headers, nil)
4643
require.NoError(t, err)
4744

4845
require.Equal(t, "key-with-spaces", headers["x-api-key"])
46+
require.Len(t, hdrs, 1)
47+
require.Equal(t, "x-api-key", hdrs[0][0])
48+
require.Equal(t, "key-with-spaces", hdrs[0][1])
4949
})
5050
}

internal/extproc/backendauth/api_key.go renamed to internal/backendauth/api_key.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,8 @@ import (
1010
"fmt"
1111
"strings"
1212

13-
corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
14-
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
15-
1613
"github.com/envoyproxy/ai-gateway/internal/filterapi"
14+
"github.com/envoyproxy/ai-gateway/internal/internalapi"
1715
)
1816

1917
// apiKeyHandler implements [Handler] for api key authz.
@@ -28,10 +26,7 @@ func newAPIKeyHandler(auth *filterapi.APIKeyAuth) (Handler, error) {
2826
// Do implements [Handler.Do].
2927
//
3028
// Extracts the api key from the local file and set it as an authorization header.
31-
func (a *apiKeyHandler) Do(_ context.Context, requestHeaders map[string]string, headerMut *extprocv3.HeaderMutation, _ *extprocv3.BodyMutation) error {
29+
func (a *apiKeyHandler) Do(_ context.Context, requestHeaders map[string]string, _ []byte) ([]internalapi.Header, error) {
3230
requestHeaders["Authorization"] = fmt.Sprintf("Bearer %s", a.apiKey)
33-
headerMut.SetHeaders = append(headerMut.SetHeaders, &corev3.HeaderValueOption{
34-
Header: &corev3.HeaderValue{Key: "Authorization", RawValue: []byte(requestHeaders["Authorization"])},
35-
})
36-
return nil
31+
return []internalapi.Header{{"Authorization", fmt.Sprintf("Bearer %s", a.apiKey)}}, nil
3732
}

internal/extproc/backendauth/api_key_test.go renamed to internal/backendauth/api_key_test.go

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ package backendauth
88
import (
99
"testing"
1010

11-
corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
12-
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
1311
"github.com/stretchr/testify/require"
1412

1513
"github.com/envoyproxy/ai-gateway/internal/filterapi"
@@ -30,28 +28,15 @@ func TestApiKeyHandler_Do(t *testing.T) {
3028
require.NoError(t, err)
3129
require.NotNil(t, handler)
3230

33-
requestHeaders := map[string]string{":method": "POST"}
34-
headerMut := &extprocv3.HeaderMutation{
35-
SetHeaders: []*corev3.HeaderValueOption{
36-
{Header: &corev3.HeaderValue{
37-
Key: ":path",
38-
Value: "/model/some-random-model/converse",
39-
}},
40-
},
41-
}
42-
bodyMut := &extprocv3.BodyMutation{
43-
Mutation: &extprocv3.BodyMutation_Body{
44-
Body: []byte(`{"messages": [{"role": "user", "content": [{"text": "Say this is a test!"}]}]}`),
45-
},
46-
}
47-
err = handler.Do(t.Context(), requestHeaders, headerMut, bodyMut)
31+
requestHeaders := map[string]string{":method": "POST", ":path": "/model/some-random-model/converse"}
32+
hdrs, err := handler.Do(t.Context(), requestHeaders, nil)
4833
require.NoError(t, err)
4934

5035
bearerToken, ok := requestHeaders["Authorization"]
5136
require.True(t, ok)
5237
require.Equal(t, "Bearer test", bearerToken)
5338

54-
require.Len(t, headerMut.SetHeaders, 2)
55-
require.Equal(t, "Authorization", headerMut.SetHeaders[1].Header.Key)
56-
require.Equal(t, []byte("Bearer test"), headerMut.SetHeaders[1].Header.GetRawValue())
39+
require.Len(t, hdrs, 1)
40+
require.Equal(t, "Authorization", hdrs[0][0])
41+
require.Equal(t, "Bearer test", hdrs[0][1])
5742
}

internal/extproc/backendauth/auth.go renamed to internal/backendauth/auth.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@ import (
99
"context"
1010
"errors"
1111

12-
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
13-
1412
"github.com/envoyproxy/ai-gateway/internal/filterapi"
13+
"github.com/envoyproxy/ai-gateway/internal/internalapi"
1514
)
1615

1716
// Handler is the interface that deals with the backend auth for a specific backend.
1817
//
1918
// TODO: maybe this can be just "post-transformation" handler, as it is not really only about auth.
2019
type Handler interface {
21-
// Do performs the backend auth, and make changes to the request headers and body mutations.
22-
Do(ctx context.Context, requestHeaders map[string]string, headerMut *extprocv3.HeaderMutation, bodyMut *extprocv3.BodyMutation) error
20+
// Do performs the backend auth, and make changes to the request headers passed in as `requestHeaders`.
21+
// It also returns a list of headers that were added or modified as a slice of key-value pairs.
22+
Do(ctx context.Context, requestHeaders map[string]string, mutatedBody []byte) ([]internalapi.Header, error)
2323
}
2424

2525
// NewHandler returns a new implementation of [Handler] based on the configuration.
File renamed without changes.

internal/extproc/backendauth/aws.go renamed to internal/backendauth/aws.go

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,13 @@ import (
1515
"os"
1616
"strings"
1717
"time"
18-
"unsafe"
1918

2019
"github.com/aws/aws-sdk-go-v2/aws"
2120
v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
2221
"github.com/aws/aws-sdk-go-v2/config"
23-
corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
24-
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
2522

2623
"github.com/envoyproxy/ai-gateway/internal/filterapi"
24+
"github.com/envoyproxy/ai-gateway/internal/internalapi"
2725
)
2826

2927
// awsHandler implements [Handler] for AWS Bedrock authz.
@@ -84,34 +82,21 @@ func newAWSHandler(ctx context.Context, awsAuth *filterapi.AWSAuth) (Handler, er
8482
//
8583
// This assumes that during the transformation, the path is set in the header mutation as well as
8684
// the body in the body mutation.
87-
func (a *awsHandler) Do(ctx context.Context, requestHeaders map[string]string, headerMut *extprocv3.HeaderMutation, bodyMut *extprocv3.BodyMutation) error {
85+
func (a *awsHandler) Do(ctx context.Context, requestHeaders map[string]string, mutatedBody []byte) ([]internalapi.Header, error) {
8886
method := requestHeaders[":method"]
89-
path := ""
90-
if headerMut.SetHeaders != nil {
91-
for _, h := range headerMut.SetHeaders {
92-
if h.Header.Key == ":path" {
93-
if len(h.Header.Value) > 0 {
94-
path = h.Header.Value
95-
} else {
96-
rv := h.Header.RawValue
97-
path = unsafe.String(&rv[0], len(rv))
98-
}
99-
break
100-
}
101-
}
102-
}
87+
path := requestHeaders[":path"]
10388

10489
var body []byte
105-
if _body := bodyMut.GetBody(); len(_body) > 0 {
106-
body = _body
90+
if len(mutatedBody) > 0 {
91+
body = mutatedBody
10792
}
10893

10994
payloadHash := sha256.Sum256(body)
11095
req, err := http.NewRequest(method,
11196
fmt.Sprintf("https://bedrock-runtime.%s.amazonaws.com%s", a.region, path),
11297
bytes.NewReader(body))
11398
if err != nil {
114-
return fmt.Errorf("cannot create request: %w", err)
99+
return nil, fmt.Errorf("cannot create request: %w", err)
115100
}
116101
// By setting the content length to -1, we can avoid the inclusion of the `Content-Length` header in the signature.
117102
// https://github.com/aws/aws-sdk-go-v2/blob/755839b2eebb246c7eec79b65404aee105196d5b/aws/signer/v4/v4.go#L427-L431
@@ -124,21 +109,21 @@ func (a *awsHandler) Do(ctx context.Context, requestHeaders map[string]string, h
124109

125110
credentials, err := a.credentialsProvider.Retrieve(ctx)
126111
if err != nil {
127-
return fmt.Errorf("cannot retrieve AWS credentials: %w", err)
112+
return nil, fmt.Errorf("cannot retrieve AWS credentials: %w", err)
128113
}
129114

130115
err = a.signer.SignHTTP(ctx, credentials, req,
131116
hex.EncodeToString(payloadHash[:]), "bedrock", a.region, time.Now())
132117
if err != nil {
133-
return fmt.Errorf("cannot sign request: %w", err)
118+
return nil, fmt.Errorf("cannot sign request: %w", err)
134119
}
135120

121+
var headers []internalapi.Header
136122
for key, hdr := range req.Header {
137123
if key == "Authorization" || strings.HasPrefix(key, "X-Amz-") {
138-
headerMut.SetHeaders = append(headerMut.SetHeaders, &corev3.HeaderValueOption{
139-
Header: &corev3.HeaderValue{Key: key, RawValue: []byte(hdr[0])}, // Assume aws-go-sdk always returns a single value.
140-
})
124+
headers = append(headers, internalapi.Header{key, hdr[0]})
125+
requestHeaders[key] = hdr[0]
141126
}
142127
}
143-
return nil
128+
return headers, nil
144129
}

0 commit comments

Comments
 (0)