Skip to content

Commit 0b5f667

Browse files
tollsimyMarcoRikiandreabuon
authored andcommitted
Inactive workloads optimization
Co-authored-by: Riccardo Marco Miracapillo s324163 <s324163@studenti.polito.it> Co-authored-by: Andrea Buonaurio <65712954+andreabuon@users.noreply.github.com>
1 parent 4c33ac7 commit 0b5f667

File tree

64 files changed

+5810
-569
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+5810
-569
lines changed

.github/workflows/build-matrix.json

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
"build-args": "COMPONENT=instance-operator",
77
"harbor-project": "crownlabs-core"
88
},
9+
{
10+
"component": "instance-automation",
11+
"context": "./operators",
12+
"dockerfile": "./operators/build/golang-common/Dockerfile",
13+
"build-args": "COMPONENT=instance-automation",
14+
"harbor-project": "crownlabs-core"
15+
},
916
{
1017
"component": "operator",
1118
"context": "./operators",
@@ -54,12 +61,6 @@
5461
"dockerfile": "./operators/build/crownlabs-image-list/Dockerfile",
5562
"harbor-project": "crownlabs-core"
5663
},
57-
{
58-
"component": "delete-stale-instances",
59-
"context": "./operators",
60-
"dockerfile": "./operators/build/delete-stale-instances/Dockerfile",
61-
"harbor-project": "crownlabs-core"
62-
},
6364
{
6465
"component": "frontend-app",
6566
"context": "./frontend",

.github/workflows/test.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ jobs:
9191
working-directory: operators/
9292
run: |
9393
make test
94-
make test-python
9594
9695
- name: Send coverage
9796
if: steps.pathFilter.outputs.operators == 'true'

deploy/crownlabs/Chart.lock

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@ dependencies:
55
- name: qlkube
66
repository: file://../../qlkube/deploy/qlkube
77
version: 0.1.0
8+
- name: crownmail
9+
repository: file://../../operators/deploy/crownmail
10+
version: 0.1.0
811
- name: instance-operator
912
repository: file://../../operators/deploy/instance-operator
10-
version: 0.1.1
13+
version: 0.1.2
1114
- name: tenant-operator
1215
repository: file://../../operators/deploy/tenant-operator
1316
version: 0.1.0
@@ -17,9 +20,6 @@ dependencies:
1720
- name: image-list
1821
repository: file://../../operators/deploy/image-list
1922
version: 0.1.0
20-
- name: delete-stale-instances
21-
repository: file://../../operators/deploy/delete-stale-instances
22-
version: 0.1.0
2323
- name: exam-agent
2424
repository: file://../../operators/deploy/exam-agent
2525
version: 0.1.0
@@ -29,5 +29,5 @@ dependencies:
2929
- name: policies
3030
repository: file://../../policies
3131
version: 0.1.0
32-
digest: sha256:94f282fd3eb152d3693b5d98117d3cd48ca6570be13a48552de2b3b036696f46
33-
generated: "2022-07-08T12:10:33.942195264+02:00"
32+
digest: sha256:3ef8ee147fe7efb16c335870531e1a71deecbbe87d85671734d71c101560e235
33+
generated: "2025-08-22T12:30:28.50327485+02:00"

deploy/crownlabs/Chart.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,13 @@ dependencies:
3535
repository: file://../../operators/deploy/operator
3636
condition: operator.enabled
3737

38+
- name: crownmail
39+
version: "0.1.0"
40+
repository: file://../../operators/deploy/crownmail
41+
condition: crownmail.enabled
42+
3843
- name: instance-operator
39-
version: "0.1.1"
44+
version: "0.1.2"
4045
repository: file://../../operators/deploy/instance-operator
4146
condition: instance-operator.enabled
4247

@@ -50,11 +55,6 @@ dependencies:
5055
repository: file://../../operators/deploy/image-list
5156
condition: image-list.enabled
5257

53-
- name: delete-stale-instances
54-
version: "0.1.0"
55-
repository: file://../../operators/deploy/delete-stale-instances
56-
condition: delete-stale-instances.enabled
57-
5858
- name: exam-agent
5959
version: "0.1.0"
6060
repository: file://../../operators/deploy/exam-agent

deploy/crownlabs/values.yaml

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,21 @@ operator:
7777
baseWorkspaces: utilities
7878
enableMutating: true
7979

80+
crownmail:
81+
templates:
82+
name: crownmail-templates
83+
sourceFolder: mail-templates
84+
configs:
85+
name: crownmail-configs
86+
sourceFolder: mail-configs
87+
smtp:
88+
server: override.smtp.example.com
89+
port: "123"
90+
identity: override-debug
91+
username: override-debug@example.com
92+
password: override-debugpassword
93+
from: override-crownlabs@example.com
94+
8095
instance-operator:
8196
replicaCount: 1
8297
image:
@@ -89,8 +104,10 @@ instance-operator:
89104
instancesAuthUrl: https://crownlabs.example.com/auth
90105
containerEnvironmentOptions:
91106
tag: ""
92-
vncImage: crownlabs/tigervnc
93107
websockifyImage: crownlabs/websockify
108+
vncImage: crownlabs/tigervnc
109+
contentDownloaderImage: crownlabs/content-downloader
110+
contentUploaderImage: crownlabs/content-uploader
94111
instmetricsServerEndpoint: crownlabs-instmetrics.crownlabs-production:9090
95112
containerVmSnapshots:
96113
kanikoImage: gcr.io/kaniko-project/executor
@@ -100,6 +117,35 @@ instance-operator:
100117
url: registry.crownlabs.example.com
101118
secretName: registry-credentials
102119
maxConcurrentReconciles: 1
120+
maxAutomationConcurrentReconciles: 1
121+
monitoring:
122+
prometheusURL: http://kube-prometheus-stack-prometheus.monitoring:9090
123+
queryNginxAvailable: count(up{service="ingress-nginx-external-controller-metrics"})
124+
queryBastionSSHAvailable: count(up{container="bastion-operator-tracker-sidecar"})
125+
queryWebSSHAvailable: count(up{container="webssh"})
126+
queryNginxData: nginx_ingress_controller_requests{exported_namespace="%s", exported_service="%s"}
127+
queryBastionSSHData: bastion_ssh_connections{destination_ip="%s"}
128+
queryWebSSHData: bastion_web_ssh_connections{destination_ip="%s"}
129+
queryStep: 5m
130+
mailTemplateDir: /etc/crownmail/templates
131+
mailConfigDir: /etc/crownmail/configs
132+
automation:
133+
enableInstanceSubmission: true
134+
enableInstanceTermination: true
135+
enableInstanceInactiveTermination: true
136+
enableInstanceExpiration: true
137+
maxConcurrentTerminationReconciles: 1
138+
maxConcurrentInactiveTerminationReconciles: 1
139+
maxConcurrentExpirationReconciles: 1
140+
maxConcurrentSubmissionReconciles: 1
141+
terminationStatusCheckTimeout: "3s"
142+
terminationStatusCheckInterval: "2m"
143+
inactiveTerminationStatusCheckTimeout: "3s"
144+
inactiveTerminationMaxNumberOfAlerts: 3
145+
enableInactivityNotifications: true
146+
enableExpirationNotifications: true
147+
inactiveTerminationNotificationInterval: "24h"
148+
expirationNotificationInterval: "24h"
103149

104150
bastion-operator:
105151
replicaCount: 1
@@ -139,14 +185,6 @@ image-list:
139185
imageListName: crownlabs-virtual-machine-images
140186
updateInterval: 60
141187

142-
delete-stale-instances:
143-
image:
144-
repository: crownlabs/delete-stale-instances
145-
rbacResourcesName: crownlabs-delete-stale-instances
146-
configurations:
147-
dryRun: true
148-
schedule: "*/15 * * * *"
149-
150188
exam-agent:
151189
replicaCount: 1
152190
configurations:

operators/Makefile

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@ gen: generate fmt vet manifests
2020
test:
2121
go test ./... -coverprofile coverage.out -covermode=count
2222

23-
test-python: python-dependencies
24-
python3 ./cmd/delete-stale-instances/test_delete_stale_instances.py
25-
2623
# Install CRDs into a cluster
2724
install: manifests
2825
kubectl apply -f deploy/crds
@@ -79,10 +76,25 @@ run-instance: generate
7976
--instances-auth-url=crownlabs.polito.it/app/instances/auth\
8077
--container-env-sidecars-tag=v0.14.5
8178

79+
run-instance-automation: generate
80+
go run cmd/instance-automation/main.go\
81+
--namespace-whitelist=crownlabs.polito.it/operator-selector=local\
82+
--container-env-sidecars-tag=v0.14.5\
83+
--enable-instance-submission=false\
84+
--enable-instance-termination=false\
85+
--enable-instance-inactive-termination=true\
86+
--enable-instance-expiration=false\
87+
--mail-template-dir=deploy/crownmail/mail-templates\
88+
--mail-config-dir=deploy/crownmail/mail-configs\
89+
--enable-inactivity-notifications=false\
90+
--enable-expiration-notifications=false
91+
8292
#the double target below is used to set DOMAIN for local targets
8393
#reference: https://www.gnu.org/software/make/manual/html_node/Target_002dspecific.html
8494
run-instance-local: DOMAIN="crownlabsfake.polito.it"
85-
run-instance-local: samples-local install-local run-instance
95+
run-instance-local: install-local samples-res-local run-instance
96+
97+
run-instance-automation-local: install-local samples-res-local run-instance-automation
8698

8799
run-operator: generate
88100
go run cmd/operator/main.go\
@@ -98,14 +110,27 @@ run-operator: generate
98110
run-tenant: run-operator
99111

100112
install-local: manifests
101-
kubectl apply -f deploy/crds
102-
kubectl apply -f tests/crds
113+
kubectl apply -f deploy/crds --wait
114+
kubectl apply -f tests/crds --wait
103115

104-
python-dependencies:
105-
pip3 install -r ./build/delete-stale-instances/requirements.txt
116+
uninstall-local: manifests
117+
kubectl delete -f deploy/crds
118+
kubectl delete -f tests/crds
106119

107-
samples-local:
120+
samples-res-local:
108121
kubectl apply -f ./samples/
109122

110-
clean-local:
123+
clean-res-local:
111124
kubectl delete -f ./samples/
125+
126+
clean-local: clean-res-local uninstall-local
127+
128+
force-clean-local:
129+
-for resource in $$(kubectl get tenants.crownlabs.polito.it -o name 2>/dev/null); do kubectl patch $$resource --type='merge' -p='{"metadata":{"finalizers":[]}}' 2>/dev/null || true; done
130+
-for resource in $$(kubectl get workspaces.crownlabs.polito.it -o name 2>/dev/null); do kubectl patch $$resource --type='merge' -p='{"metadata":{"finalizers":[]}}' 2>/dev/null || true; done
131+
-for resource in $$(kubectl get instances.crownlabs.polito.it -o name 2>/dev/null); do kubectl patch $$resource --type='merge' -p='{"metadata":{"finalizers":[]}}' 2>/dev/null || true; done
132+
-for resource in $$(kubectl get templates.crownlabs.polito.it -o name 2>/dev/null); do kubectl patch $$resource --type='merge' -p='{"metadata":{"finalizers":[]}}' 2>/dev/null || true; done
133+
-kubectl delete tenants.crownlabs.polito.it,workspaces.crownlabs.polito.it,instances.crownlabs.polito.it,templates.crownlabs.polito.it --all --force --grace-period=0 2>/dev/null || true
134+
-kubectl delete configmap crownlabs-mail-config --namespace=default --ignore-not-found
135+
-kubectl delete -f deploy/crds --ignore-not-found
136+
-kubectl delete -f tests/crds --ignore-not-found

operators/README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,46 @@ The Instance Operator requires Golang 1.16 and `make`. To build the operator:
134134
go build ./cmd/instance-operator/main.go
135135
```
136136

137+
### Instance Automation Operator
138+
139+
The **Instance Automation Controller** (`instautoctrl` package) is responsible for all the automation tasks with regard to Instances, focusing on four main actions:
140+
141+
- instance inactivity
142+
- instance expiration
143+
- instance termination
144+
- instance submission
145+
146+
The first two actions ensure that unused or expired resources are efficiently managed, improving resource utilization and reducing unnecessary costs, while still providing tenants with proper notifications as well as flexibility to the administrator through configuration options and annotations.
147+
148+
The last two actions are instead related to exam automation, allowing to automatically terminate Instances and submit exam files.
149+
150+
You can find the full documentation [here](/operators/pkg/instautoctrl/README.md).
151+
152+
#### Instance Inactive Termination controller
153+
154+
This controller periodically checks running Instances to determine if they are still in use or can be terminated because of inactivity.
155+
Each **Template** resource associated with an Instance defines an `InactivityTimeout` field, which represents the period of inactivity after which the Instance is considered unused.
156+
If omitted, this field is automatically added in the Template resource with a `never` value set by default, meaning that Instances created from that template will be ignored by this controller.
157+
158+
To evaluate whether an Instance is active, the controller relies on **Prometheus** metrics.
159+
It verifies whether the tenant has accessed the Instance recently, either through the frontend (by analyzing Ingress metrics) or via SSH (using a specific SSH bastion tracker metric).
160+
If activity is detected, the controller postpones the check.
161+
If no activity is recorded for a time longer than the `InactivityTimeout`, the process of inactivity handling begins.
162+
When an Instance has been marked as inactive, the controller starts sending email notifications to tenants, warning them that the Instance will be paused or deleted if they do not access it.
163+
The number of notifications sent is defined by the `inactiveTerminationMaxNumberOfAlerts` parameter in the Helm chart.
164+
Once this limit is reached, the controller takes action: **persistent Instances are paused**, while **non-persistent Instances are deleted**.
165+
After the final action, an additional email is sent to inform the tenant.
166+
Both the controller and the email notifications can be enabled or disabled through the Helm chart using the `enableInstanceInactiveTermination` and `enableInactivityNotifications` parameters.
167+
In addition, the behavior can be customized using annotations. For example, the `CustomNumberOfAlertsAnnotation` on a Template allows overriding the default number of notifications for a specific Instance type, while the `InstanceInactivityIgnoreNamespace` annotation, set to `True` on a Namespace completely excludes its Instances from the inactivity termination logic.
168+
169+
#### Instance Expiration controller
170+
While the Instance Inactive Termination Controller deletes Instances when these are not used for an extended period of time, this controller (_Instance Expiration Controller_) introduces an orthogonal feature, i.e., the capability to delete an Instance when its maximum lifespan has expired, no matter if the instance has been used or not.
171+
Each Template defines a `DeleteAfter` field that specifies how long an Instance can exist before it must be removed. When an Instance reaches this limit, the controller automatically deletes it.
172+
Analogously to the Instance Inactive Termination Controller, omitting the `DeleteAfter` field means it is automatically set to `never` by default, meaning that Instances created from that template will be ignored by this controller.
173+
As with inactivity termination, this feature can be managed through Helm chart parameters: `enableInstanceExpiration` controls whether the controller is active, while `enableExpirationNotifications` enables or disables email alerts to inform tenants before deletion.
174+
This feature can be used when we know already that an Instance will not be needed after a given period; a possible example is the instance used to carry out an exam, which can be safely deleted when the exam has finished.
175+
The `ExpirationIgnoreNamespace` annotation, when set to `True`, allows to ignore all Instances in a Namespace, preventing them from being deleted due to expiration.
176+
137177
## SSH bastion
138178

139179
The SSH bastion is composed of three basic blocks:

operators/api/v1alpha2/template_types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@ type TemplateSpec struct {
7171
// or stopped to save resources. If set to "never", the instance will not be
7272
// automatically terminated.
7373
DeleteAfter string `json:"deleteAfter,omitempty"`
74+
75+
// +kubebuilder:validation:Pattern="^(never|[0-9]+[mhd])$"
76+
// +kubebuilder:default="never"
77+
// The maximum period of inactivity after which an Instance referencing
78+
// the current Template will be automatically stopped or deleted to
79+
// save resources.
80+
InactivityTimeout string `json:"inactivityTimeout,omitempty"`
7481
}
7582

7683
// TemplateStatus reflects the most recently observed status of the Template.

operators/build/delete-stale-instances/Dockerfile

Lines changed: 0 additions & 12 deletions
This file was deleted.

operators/build/delete-stale-instances/requirements.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)