Skip to content

Commit c7696fc

Browse files
committed
move all reusable cluster config into a json that can be saved in secret
Signed-off-by: Avi Deitcher <avi@deitcher.net>
1 parent 4bc4d24 commit c7696fc

File tree

12 files changed

+362
-218
lines changed

12 files changed

+362
-218
lines changed

cmd/root.go

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/spf13/cobra"
1515

1616
"github.com/aifoundry-org/oxide-controller/pkg/cluster"
17+
"github.com/aifoundry-org/oxide-controller/pkg/config"
1718
logpkg "github.com/aifoundry-org/oxide-controller/pkg/log"
1819
oxidepkg "github.com/aifoundry-org/oxide-controller/pkg/oxide"
1920
"github.com/aifoundry-org/oxide-controller/pkg/server"
@@ -163,11 +164,6 @@ func rootCmd() (*cobra.Command, error) {
163164
oxideToken = profileToken
164165
}
165166

166-
oxideConfig := &oxide.Config{
167-
Host: oxideAPIURL,
168-
Token: string(oxideToken),
169-
}
170-
171167
if strings.HasPrefix(oxideToken, "file:") {
172168
tokenFilePath := strings.TrimPrefix(oxideToken, "file:")
173169
oxideToken = ""
@@ -216,18 +212,56 @@ func rootCmd() (*cobra.Command, error) {
216212
cmd.SilenceUsage = true
217213

218214
ctx := context.Background()
219-
220-
c := cluster.New(logentry, oxideConfig, clusterProject,
221-
controlPlanePrefix, workerPrefix, int(controlPlaneCount), int(workerCount),
222-
cluster.NodeSpec{Image: cluster.Image{Name: controlPlaneImageName, Source: controlPlaneImageSource, Blocksize: controlPlaneImageBlocksize}, MemoryGB: int(controlPlaneMemory), CPUCount: int(controlPlaneCPU), ExternalIP: controlPlaneExternalIP, RootDiskSize: int(controlPlaneRootDiskSizeGB * cluster.GB), ExtraDiskSize: int(controlPlaneExtraDiskSizeGB * cluster.GB), TailscaleAuthKey: tailscaleAuthKey, TailscaleTag: tailscaleTag},
223-
cluster.NodeSpec{Image: cluster.Image{Name: workerImageName, Source: workerImageSource, Blocksize: workerImageBlocksize}, MemoryGB: int(workerMemory), CPUCount: int(workerCPU), ExternalIP: workerExternalIP, RootDiskSize: int(workerRootDiskSizeGB * cluster.GB), ExtraDiskSize: int(workerExtraDiskSizeGB * cluster.GB), TailscaleAuthKey: tailscaleAuthKey, TailscaleTag: tailscaleTag},
224-
imageParallelism,
225-
controlPlaneNamespace, controlPlaneSecret, pubkey,
226-
time.Duration(clusterInitWait)*time.Minute,
215+
controllerConfig := &config.ControllerConfig{
216+
UserSSHPublicKey: string(pubkey),
217+
OxideToken: oxideToken,
218+
OxideURL: oxideAPIURL,
219+
ClusterProject: clusterProject,
220+
ControlPlaneCount: controlPlaneCount,
221+
ControlPlaneSpec: config.NodeSpec{
222+
Image: config.Image{Name: controlPlaneImageName, Source: controlPlaneImageSource, Blocksize: controlPlaneImageBlocksize},
223+
Prefix: controlPlanePrefix,
224+
MemoryGB: int(controlPlaneMemory),
225+
CPUCount: int(controlPlaneCPU),
226+
ExternalIP: controlPlaneExternalIP,
227+
RootDiskSize: int(controlPlaneRootDiskSizeGB * cluster.GB),
228+
ExtraDiskSize: int(controlPlaneExtraDiskSizeGB * cluster.GB),
229+
TailscaleAuthKey: tailscaleAuthKey,
230+
TailscaleTag: tailscaleTag,
231+
},
232+
WorkerCount: workerCount,
233+
WorkerSpec: config.NodeSpec{
234+
Image: config.Image{Name: workerImageName, Source: workerImageSource, Blocksize: workerImageBlocksize},
235+
Prefix: workerPrefix,
236+
MemoryGB: int(workerMemory),
237+
CPUCount: int(workerCPU),
238+
ExternalIP: workerExternalIP,
239+
RootDiskSize: int(workerRootDiskSizeGB * cluster.GB),
240+
ExtraDiskSize: int(workerExtraDiskSizeGB * cluster.GB),
241+
TailscaleAuthKey: tailscaleAuthKey,
242+
TailscaleTag: tailscaleTag,
243+
},
244+
245+
ControlPlaneNamespace: controlPlaneNamespace,
246+
SecretName: controlPlaneSecret,
247+
Address: address,
248+
ControlLoopMins: controlLoopMins,
249+
ImageParallelism: imageParallelism,
250+
TailscaleAuthKey: tailscaleAuthKey,
251+
TailscaleAPIKey: tailscaleAPIKey,
252+
TailscaleTag: tailscaleTag,
253+
TailscaleTailnet: tailscaleTailnet,
254+
}
255+
c := cluster.New(
256+
logentry,
257+
controllerConfig,
227258
kubeconfigOverwrite,
228-
tailscaleAPIKey,
229-
tailscaleTailnet,
230259
controllerOCIImage,
260+
time.Duration(clusterInitWait)*time.Minute,
261+
262+
/*
263+
pubkey,
264+
*/
231265
)
232266
// we perform 2 execution loops of the cluster execute function:
233267
// - the first one is to create the cluster and get the kubeconfig

pkg/cluster/cluster.go

Lines changed: 47 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010
"time"
1111

12+
"github.com/aifoundry-org/oxide-controller/pkg/config"
1213
"github.com/aifoundry-org/oxide-controller/pkg/util"
1314
"k8s.io/client-go/kubernetes"
1415
"tailscale.com/client/tailscale/v2"
@@ -18,51 +19,36 @@ import (
1819
)
1920

2021
type Cluster struct {
21-
logger *log.Entry
22-
oxideConfig *oxide.Config
23-
projectID string
24-
controlPlanePrefix string
25-
workerPrefix string
26-
controlPlaneCount int
27-
clusterInitWait time.Duration
22+
// logger
23+
logger *log.Entry
24+
25+
// reusable config that should be loaded into the secret and shared, whether running locally or in-cluster
26+
config *config.ControllerConfig
27+
28+
// config that is derived locally
2829
kubeconfigOverwrite bool
29-
// workerCount per the CLI flags; once cluster is up and running, relies solely on amount stored in secret
30-
workerCount int
31-
controlPlaneSpec, workerSpec NodeSpec
32-
secretName string
33-
namespace string
34-
userPubkey []byte
35-
controlPlaneIP string
36-
imageParallelism int
37-
tailscaleAPIKey string
38-
tailscaleTailnet string
39-
clientset *kubernetes.Clientset
40-
apiConfig *Config
41-
ociImage string
30+
ociImage string // OCI image to use for the controller
31+
oxideConfig *oxide.Config
32+
clientset *kubernetes.Clientset
33+
apiConfig *Config
34+
projectID string // ID of the Oxide project
35+
initWait time.Duration // time to wait for the cluster to initialize
4236
}
4337

4438
// New creates a new Cluster instance
45-
func New(logger *log.Entry, oxideConfig *oxide.Config, projectID string, controlPlanePrefix, workerPrefix string, controlPlaneCount, workerCount int, controlPlaneSpec, workerSpec NodeSpec, imageParallelism int, namespace, secretName string, pubkey []byte, clusterInitWait time.Duration, kubeconfigOverwrite bool, tailscaleAPIKey, tailscaleTailnet, OCIimage string) *Cluster {
39+
func New(logger *log.Entry, ctrlrConfig *config.ControllerConfig, kubeconfigOverwrite bool, ociImage string, initWait time.Duration) *Cluster {
40+
//oxideConfig *oxide.Config, projectID string, controlPlanePrefix, workerPrefix string, controlPlaneCount, workerCount int, controlPlaneSpec, workerSpec NodeSpec, imageParallelism int, namespace, secretName string, pubkey []byte, clusterInitWait time.Duration, kubeconfigOverwrite bool, tailscaleAPIKey, tailscaleTailnet, OCIimage string)
4641
c := &Cluster{
47-
logger: logger.WithField("component", "cluster"),
48-
oxideConfig: oxideConfig,
49-
projectID: projectID,
50-
controlPlanePrefix: controlPlanePrefix,
51-
workerPrefix: workerPrefix,
52-
controlPlaneSpec: controlPlaneSpec,
53-
workerSpec: workerSpec,
54-
secretName: secretName,
55-
namespace: namespace,
56-
userPubkey: pubkey,
57-
clusterInitWait: clusterInitWait,
42+
logger: logger.WithField("component", "cluster"),
43+
config: ctrlrConfig,
44+
oxideConfig: &oxide.Config{
45+
Token: ctrlrConfig.OxideToken,
46+
Host: ctrlrConfig.OxideURL,
47+
},
5848
kubeconfigOverwrite: kubeconfigOverwrite,
59-
imageParallelism: imageParallelism,
60-
tailscaleAPIKey: tailscaleAPIKey,
61-
tailscaleTailnet: tailscaleTailnet,
62-
ociImage: OCIimage,
49+
ociImage: ociImage,
50+
initWait: initWait,
6351
}
64-
c.workerCount = workerCount
65-
c.controlPlaneCount = controlPlaneCount
6652
return c
6753
}
6854

@@ -74,18 +60,18 @@ func (c *Cluster) ensureClusterExists(ctx context.Context) (newKubeconfig []byte
7460
return nil, fmt.Errorf("failed to create Oxide API client: %v", err)
7561
}
7662
projectID := c.projectID
77-
controlPlanePrefix := c.controlPlanePrefix
78-
controlPlaneCount := c.controlPlaneCount
79-
secretName := c.secretName
63+
controlPlanePrefix := c.config.ControlPlaneSpec.Prefix
64+
controlPlaneCount := c.config.ControlPlaneCount
65+
secretName := c.config.SecretName
8066

8167
c.logger.Debugf("Checking if control plane IP %s exists", controlPlanePrefix)
8268
controlPlaneIP, err := c.ensureControlPlaneIP(ctx, controlPlanePrefix)
8369
if err != nil {
8470
return nil, fmt.Errorf("failed to get control plane IP: %w", err)
8571
}
8672

87-
if c.controlPlaneIP == "" {
88-
c.controlPlaneIP = controlPlaneIP.Ip
73+
if c.config.ControlPlaneIP == "" {
74+
c.config.ControlPlaneIP = controlPlaneIP.Ip
8975
}
9076

9177
c.logger.Debugf("Checking if %d control plane nodes exist with prefix %s", controlPlaneCount, controlPlanePrefix)
@@ -146,8 +132,8 @@ func (c *Cluster) ensureClusterExists(ctx context.Context) (newKubeconfig []byte
146132
return nil, fmt.Errorf("failed to generate SSH key pair: %w", err)
147133
}
148134
var pubkeyList []string
149-
if c.userPubkey != nil {
150-
pubkeyList = append(pubkeyList, string(c.userPubkey))
135+
if c.config.UserSSHPublicKey != "" {
136+
pubkeyList = append(pubkeyList, c.config.UserSSHPublicKey)
151137
}
152138
pubkeyList = append(pubkeyList, string(pub))
153139
// add the public key to the node in addition to the user one
@@ -166,7 +152,7 @@ func (c *Cluster) ensureClusterExists(ctx context.Context) (newKubeconfig []byte
166152
externalIP string
167153
fipAttached bool
168154
)
169-
if c.controlPlaneSpec.ExternalIP {
155+
if c.config.ControlPlaneSpec.ExternalIP {
170156
c.logger.Debugf("Control plane node %s has external IP, using that", hostid)
171157
ipList, err := client.InstanceExternalIpList(ctx, oxide.InstanceExternalIpListParams{
172158
Instance: oxide.NameOrId(hostid),
@@ -215,18 +201,18 @@ func (c *Cluster) ensureClusterExists(ctx context.Context) (newKubeconfig []byte
215201
clusterAccessIP := externalIP
216202

217203
// wait for the control plane node to be up and running
218-
timeLeft := c.clusterInitWait
204+
timeLeft := c.initWait
219205
for {
220206
c.logger.Infof("Waiting %s for control plane node to be up and running...", timeLeft)
221207
sleepTime := 30 * time.Second
222208
time.Sleep(sleepTime)
223209
timeLeft -= sleepTime
224210

225-
if c.tailscaleAPIKey != "" {
211+
if c.config.TailscaleAPIKey != "" {
226212
c.logger.Infof("Checking if control plane node has joined tailnet")
227213
client := &tailscale.Client{
228-
Tailnet: c.tailscaleTailnet,
229-
APIKey: c.tailscaleAPIKey,
214+
Tailnet: c.config.TailscaleTailnet,
215+
APIKey: c.config.TailscaleAPIKey,
230216
}
231217
ctx := context.Background()
232218
devices, err := client.Devices().List(ctx)
@@ -286,16 +272,9 @@ func (c *Cluster) ensureClusterExists(ctx context.Context) (newKubeconfig []byte
286272
return nil, fmt.Errorf("failed to run command to retrieve join token on control plane node: %w", err)
287273
}
288274
// save the private key and public key to the secret
289-
secrets[secretKeySystemSSHPublic] = pub
290-
secrets[secretKeySystemSSHPrivate] = priv
291-
secrets[secretKeyJoinToken] = joinToken
292-
secrets[secretKeyOxideToken] = []byte(c.oxideConfig.Token)
293-
secrets[secretKeyOxideURL] = []byte(c.oxideConfig.Host)
294-
295-
// save the user ssh public key to the secrets map
296-
if c.userPubkey != nil {
297-
secrets[secretKeyUserSSH] = c.userPubkey
298-
}
275+
c.config.K3sJoinToken = string(joinToken)
276+
c.config.SystemSSHPublicKey = string(pub)
277+
c.config.SystemSSHPrivateKey = string(priv)
299278

300279
// get the kubeconfig
301280
kubeconfig, err := util.RunSSHCommand("root", fmt.Sprintf("%s:22", clusterAccessIP), priv, "cat /etc/rancher/k3s/k3s.yaml")
@@ -310,10 +289,6 @@ func (c *Cluster) ensureClusterExists(ctx context.Context) (newKubeconfig []byte
310289
re := regexp.MustCompile(`(server:\s*\w+://)(\d+\.\d+\.\d+\.\d+)(:\d+)`)
311290
kubeconfigString = re.ReplaceAllString(kubeconfigString, fmt.Sprintf("${1}%s${3}", clusterAccessIP))
312291

313-
// if we have worker node count explicitly defined, save it
314-
if c.workerCount > 0 {
315-
secrets[secretKeyWorkerCount] = []byte(fmt.Sprintf("%d", c.workerCount))
316-
}
317292
newKubeconfig = []byte(kubeconfigString)
318293

319294
// get a Kubernetes client
@@ -329,12 +304,18 @@ func (c *Cluster) ensureClusterExists(ctx context.Context) (newKubeconfig []byte
329304
c.clientset = clientset
330305

331306
// ensure we have the namespace we need
332-
namespace := c.namespace
307+
namespace := c.config.ControlPlaneNamespace
333308
if err := createNamespace(ctx, clientset, namespace); err != nil {
334309
return nil, fmt.Errorf("failed to create namespace: %w", err)
335310
}
336311

337-
// save the join token, system ssh key pair, user ssh key to the Kubernetes secret
312+
configJson, err := c.config.ToJSON()
313+
if err != nil {
314+
return nil, fmt.Errorf("failed to convert config to JSON: %w", err)
315+
}
316+
secrets[secretKeyConfig] = configJson
317+
318+
// save the config to the Kubernetes secret
338319
c.logger.Debugf("Saving secret %s/%s to Kubernetes", namespace, secretName)
339320
if err := saveSecret(ctx, clientset, c.logger, namespace, secretName, secrets); err != nil {
340321
return nil, fmt.Errorf("failed to save secret: %w", err)

pkg/cluster/const.go

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,46 @@ const (
77

88
blockSize = 4096
99

10-
secretKeyUserSSH = "user-ssh-public-key"
11-
secretKeyJoinToken = "k3s-join-token"
12-
secretKeySystemSSHPublic = "system-ssh-public-key"
13-
secretKeySystemSSHPrivate = "system-ssh-private-key"
14-
secretKeyWorkerCount = "worker-count"
15-
secretKeyOxideToken = "oxide-token"
16-
secretKeyOxideURL = "oxide-url"
17-
maximumChunkSize = 512 * KB
10+
secretKeyConfig = "config"
11+
/*
12+
secretKeyUserSSH = "user-ssh-public-key"
13+
secretKeyJoinToken = "k3s-join-token"
14+
secretKeySystemSSHPublic = "system-ssh-public-key"
15+
secretKeySystemSSHPrivate = "system-ssh-private-key"
16+
secretKeyWorkerCount = "worker-count"
17+
secretKeyOxideToken = "oxide-token"
18+
secretKeyOxideURL = "oxide-url"
19+
secretKeyClusterProject = "cluster-project"
20+
secretKeyControlPlanePrefix = "control-plane-prefix"
21+
secretKeyWorkerPrefix = "worker-prefix"
22+
secretKeyControlPlaneCount = "control-plane-count"
23+
secretKeyControlPlaneImageName = "control-plane-image-name"
24+
secretKeyControlPlaneImageSource = "control-plane-image-source"
25+
secretKeyControlPlaneImageBlocksize = "control-plane-image-blocksize"
26+
secretKeyWorkerImageName = "worker-image-name"
27+
secretKeyWorkerImageSource = "worker-image-source"
28+
secretKeyWorkerImageBlocksize = "worker-image-blocksize"
29+
secretKeyWorkerRootDiskSizeGB = "worker-root-disk-size-gb"
30+
secretKeyWorkerExtraDiskSizeGB = "worker-extra-disk-size-gb"
31+
secretKeyControlPlaneRootDiskSizeGB = "control-plane-root-disk-size-gb"
32+
secretKeyControlPlaneExtraDiskSizeGB = "control-plane-extra-disk-size-gb"
33+
secretKeyControlPlaneMemory = "control-plane-memory"
34+
secretKeyWorkerMemory = "worker-memory"
35+
secretKeyControlPlaneCPU = "control-plane-cpu"
36+
secretKeyWorkerCPU = "worker-cpu"
37+
secretKeyControlPlaneNamespace = "control-plane-namespace"
38+
secretKeyWorkerExternalIP = "worker-external-ip"
39+
secretKeyControlPlaneExternalIP = "control-plane-external-ip"
40+
secretKeyAddress = "address"
41+
secretKeyControlLoopMins = "control-loop-mins"
42+
secretKeyImageParallelism = "image-parallelism"
43+
secretKeyTailscaleAuthKey = "tailscale-auth-key"
44+
secretKeyTailscaleAPIKey = "tailscale-api-key"
45+
secretKeyTailscaleTag = "tailscale-tag"
46+
secretKeyTailscaleTailnet = "tailscale-tailnet"
47+
*/
48+
49+
maximumChunkSize = 512 * KB
1850

1951
devModeOCIImage = "dev"
2052
utilityImageName = "alpine:3.21"

pkg/cluster/copy.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ func (c *Cluster) LoadControllerToClusterNodes(ctx context.Context, infile io.Re
3333
labelKey := "app"
3434
labelValue := "preload-binary"
3535
labels := map[string]string{labelKey: labelValue}
36-
if err := deployPreloadBinaryDaemonSet(c.clientset, c.namespace, preloadBinaryName, labels); err != nil {
36+
if err := deployPreloadBinaryDaemonSet(c.clientset, c.config.ControlPlaneNamespace, preloadBinaryName, labels); err != nil {
3737
return fmt.Errorf("deploying preload-binary DaemonSet: %w", err)
3838
}
39-
if err := copyToAllDaemonSetPods(c.clientset, c.apiConfig.Config, c.namespace, fmt.Sprintf("%s=%s", labelKey, labelValue), "writer", filepath.Join(containerDir, binaryName), infile); err != nil {
39+
if err := copyToAllDaemonSetPods(c.clientset, c.apiConfig.Config, c.config.ControlPlaneNamespace, fmt.Sprintf("%s=%s", labelKey, labelValue), "writer", filepath.Join(containerDir, binaryName), infile); err != nil {
4040
return fmt.Errorf("copying to all DaemonSet pods: %w", err)
4141
}
42-
if err := removePreloadBinaryDaemonSet(c.clientset, c.namespace, preloadBinaryName); err != nil {
42+
if err := removePreloadBinaryDaemonSet(c.clientset, c.config.ControlPlaneNamespace, preloadBinaryName); err != nil {
4343
return fmt.Errorf("removing preload-binary DaemonSet: %w", err)
4444
}
4545
// update our OCI image to point to the new image

0 commit comments

Comments
 (0)