CommonWorkflowScheduler
diff --git a/‎README.md
Lines changed: 86 additions & 13 deletions b/‎README.md
Lines changed: 86 additions & 13 deletions
diff --git a/‎pom.xml
Lines changed: 11 additions & 8 deletions b/‎pom.xml
Lines changed: 11 additions & 8 deletions
diff --git a/‎src/main/java/cws/k8s/scheduler/Main.java
Lines changed: 1 addition & 1 deletion b/‎src/main/java/cws/k8s/scheduler/Main.java
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/main/java/cws/k8s/scheduler/client/CWSKubernetesClient.java
Lines changed: 90 additions & 6 deletions b/‎src/main/java/cws/k8s/scheduler/client/CWSKubernetesClient.java
Lines changed: 90 additions & 6 deletions
diff --git a/‎src/main/java/cws/k8s/scheduler/client/CannotPatchException.java
Lines changed: 9 additions & 0 deletions b/‎src/main/java/cws/k8s/scheduler/client/CannotPatchException.java
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/main/java/cws/k8s/scheduler/dag/Process.java
Lines changed: 20 additions & 0 deletions b/‎src/main/java/cws/k8s/scheduler/dag/Process.java
Lines changed: 20 additions & 0 deletions
@@ -13,19 +13,20 @@ docker push <your docker account>/cws:<version>
 
 #### API Endpoints
 
-| #  | Resource                            | Method |
-| -- | :---------------------------------- | :----: |
-| 1  | /{version}/{execution}              | POST   |
-| 2  | /{version}/{execution}              | DELETE |
-| 3  | /{version}/{execution}/DAG/vertices | POST   |
-| 4  | /{version}/{execution}/DAG/vertices | DELETE |
-| 5  | /{version}/{execution}/DAG/edges    | POST   |
-| 6  | /{version}/{execution}/DAG/edges    | DELETE |
-| 7  | /{version}/{execution}/startBatch   | PUT    |
-| 8  | /{version}/{execution}/endBatch     | PUT    |
-| 9  | /{version}/{execution}/task/{id}    | POST   |
-| 10 | /{version}/{execution}/task/{id}    | GET    |
-| 11 | /{version}/{execution}/task/{id}    | DELETE |
+| #  | Resource                                           | Method |
+|----|:---------------------------------------------------|:------:|
+| 1  | /{version}/scheduler/{execution}                   |  POST  |
+| 2  | /{version}/scheduler/{execution}                   | DELETE |
+| 3  | /{version}/scheduler/{execution}/DAG/vertices      |  POST  |
+| 4  | /{version}/scheduler/{execution}/DAG/vertices      | DELETE |
+| 5  | /{version}/scheduler/{execution}/DAG/edges         |  POST  |
+| 6  | /{version}/scheduler/{execution}/DAG/edges         | DELETE |
+| 7  | /{version}/scheduler/{execution}/startBatch        |  PUT   |
+| 8  | /{version}/scheduler/{execution}/endBatch          |  PUT   |
+| 9  | /{version}/scheduler/{execution}/task/{id}         |  POST  |
+| 10 | /{version}/scheduler/{execution}/task/{id}         |  GET   |
+| 11 | /{version}/scheduler/{execution}/task/{id}         | DELETE |
+| 12 | /{version}/scheduler/{execution}/metrics/task/{id} |  POST  |
 
 SWAGGER:  /swagger-ui.html <br>
 API-DOCS: /v3/api-docs
@@ -122,6 +123,78 @@ spec:
       claimName: api-exp-data
 ```
 
+#### Profiles
+This is a Spring Boot application, that can be run with profiles. The "default" profile is used if no configuration is set. The "dev" profile can be enabled by setting the JVM System Parameter
+
+        -Dspring.profiles.active=dev
+or Environment Variable
+
+        export spring_profiles_active=dev
+or via the corresponding setting in your development environment or within the pod definition.
+
+Example:
+
+        $ SCHEDULER_NAME=workflow-scheduler java -Dspring.profiles.active=dev -jar cws-k8s-scheduler-1.2.jar
+
+The "dev" profile is useful for debugging and reporting problems because it increases the log-level.
+
+---
+#### Memory Prediction and Task Scaling
+- Kubernetes Feature InPlacePodVerticalScaling must be enabled. This is available starting from Kubernetes v1.27. See [KEP 1287](https://github.com/kubernetes/enhancements/issues/1287) for the current status.
+- Supported if used together with [nf-cws](https://github.com/CommonWorkflowScheduler/nf-cws) version 1.0.5 or newer.
+
+The memory predictor that shall be used for task scaling is set via the configuration. If not set, task scaling is disabled.
+The memory predictor is provided as a string following the pattern "&lt;memory predictor&gt;-[&lt;additional&gt;=&lt;parameter&gt;]", e.g., "linear-offset=std".
+The following strategies are available:
+
+| Memory Predictor | Behaviour                                                                                                                                                                               |
+|------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| linear/lr        | The Linear predictor, will try to predict a memory usage that is linear to the task input size.                                                                                         |
+| linear2/lr2      | The Linear predictor with an unequal loss function. The loss penalizes underprediction more than overprediction.                                                                        |
+| mean             | The Mean predictor predicts the mean memory seen so far. Prediction is independent of the input size.                                                                                   |
+| ponder           | The Ponder predictor is an advanced memory prediction strategy that ponders between linear regression with unequal loss and historic values. Details are provided in our paper [tbd](). |
+| constX           | Predicts a constant value (X), if no X is given, it predicts 0.                                                                                                                         |
+| polyX            | Prediction will be based on the Xth polynomial function based on a task's input size. If no X is provided, it uses X=2.                                                                 |
+
+
+The offset uses the current prediction model and based on that it predicts the memory for all finished tasks.
+Then, it calculates the difference between the observed memory and the predicted memory.
+
+| Offset      | Behaviour                                                                                                     |
+|-------------|---------------------------------------------------------------------------------------------------------------|
+| none        | No additional offset will be applied.                                                                         |
+| ""          | If no offset is defined, the max offset will be used.                                                         |
+| max         | The max offset returns the largest underprediction.                                                           |
+| Xpercentile | X is an integer between 1 and 100, over all prediction differences, it will use the Xth percentile as offset. |
+| var         | This offset applies the variance as an offset.                                                                |
+| Xstd        | This offset applies X times the standard deviation as an offset. If no X is provided, it uses X=1.            |
+
+#### Scheduling strategies
+
+The scheduling strategy can be set via the configuration.
+The scheduling strategy is provided as a string following the pattern "&lt;scheduling strategy&gt;[-&lt;node assignment strategy&gt;]".
+The following strategies are available:
+
+| Scheduling Strategy | Behaviour                                                                                                                                                |
+|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|
+| fifo                | Tasks that have been submitted earlier, will be scheduled earlier.                                                                                       |
+| rank                | Tasks will be prioritized based on their rank in the DAG.                                                                                                |
+| rank_min            | Rank (min) Same as rank but solves ties such that tasks with smaller input size are preferred.                                                           |
+| rank_max            | Rank (max) Same as rank but solves ties such that tasks with larger input size are preferred.                                                            |
+| lff_min             | Least finished first (min): prioritizes abstract tasks where less instances have finished, solves ties with rank_min                                     |
+| lff_max             | Least finished first (max): prioritizes abstract tasks where less instances have finished, solves ties with rank_max                                     |
+| gs_min              | Generate Samples (min) Hybrid of LFF (min) and Rank (max), prioritize abstract tasks with less than five finished instances. Afterwards, use Rank (max). |
+| gs_max              | Generate Samples (max) Hybrid of LFF (max) and Rank (max), prioritize abstract tasks with less than five finished instances. Afterwards, use Rank (max). |
+| random              | Randomly prioritize tasks.                                                                                                                               |
+| max                 | Prioritize tasks with larger input size.                                                                                                                 |
+| min                 | Prioritize tasks with smaller input size.                                                                                                                |
+
+| Node Assignment Strategy | Behaviour                                                                               |
+|--------------------------|-----------------------------------------------------------------------------------------|
+| random                   | Randomly distributes the tasks to nodes.                                                |
+| roundrobin               | (default) Assigns tasks in a round robin fashion to the nodes.                          |
+| fair                     | Distributes the tasks fairly to the nodes trying to achieve equal load on all machines. |
+
 ---
 
 If you use this software or artifacts in a publication, please cite it as:
 
@@ -45,13 +45,7 @@
         <dependency>
             <groupId>io.fabric8</groupId>
             <artifactId>kubernetes-client</artifactId>
-            <version>6.9.0</version>
-        </dependency>
-
-        <dependency>
-            <groupId>org.javatuples</groupId>
-            <artifactId>javatuples</artifactId>
-            <version>1.2</version>
+            <version>6.13.1</version>
         </dependency>
 
         <dependency>
@@ -75,12 +69,14 @@
         <dependency>
             <groupId>org.springframework.boot</groupId>
             <artifactId>spring-boot-starter-web</artifactId>
+            <version>3.3.1</version>
         </dependency>
 
         <dependency>
             <groupId>org.springframework.boot</groupId>
             <artifactId>spring-boot-starter-test</artifactId>
             <scope>test</scope>
+            <version>3.3.1</version>
         </dependency>
 
         <dependency>
@@ -106,7 +102,7 @@
         <dependency>
             <groupId>commons-net</groupId>
             <artifactId>commons-net</artifactId>
-            <version>3.8.0</version>
+            <version>3.10.0</version>
         </dependency>
 
        <dependency>
@@ -118,6 +114,7 @@
         <dependency>
             <groupId>ch.qos.logback</groupId>
             <artifactId>logback-core</artifactId>
+            <version>1.5.3</version>
         </dependency>
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
@@ -134,6 +131,12 @@
             <artifactId>jackson-annotations</artifactId>
         </dependency>
 
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-math3</artifactId>
+            <version>3.6.1</version>
+        </dependency>
+
     </dependencies>
 
     <build>
 
@@ -1,12 +1,12 @@
 package cws.k8s.scheduler;
 
+import jakarta.annotation.PostConstruct;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.SpringApplication;
 import org.springframework.boot.autoconfigure.SpringBootApplication;
 import org.springframework.boot.info.BuildProperties;
 
-import jakarta.annotation.PostConstruct;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.Date;
 
@@ -2,7 +2,11 @@
 
 import cws.k8s.scheduler.model.NodeWithAlloc;
 import cws.k8s.scheduler.model.PodWithAge;
+import cws.k8s.scheduler.model.Task;
 import io.fabric8.kubernetes.api.model.*;
+import io.fabric8.kubernetes.client.KubernetesClientException;
+import io.fabric8.kubernetes.client.Watcher;
+import io.fabric8.kubernetes.client.WatcherException;
 import io.fabric8.kubernetes.client.*;
 import io.fabric8.kubernetes.client.Config;
 import io.fabric8.kubernetes.client.dsl.MixedOperation;
@@ -12,6 +16,7 @@
 import lombok.extern.slf4j.Slf4j;
 
 import java.math.BigDecimal;
+import java.math.RoundingMode;
 import java.util.*;
 
 @Slf4j
@@ -22,7 +27,6 @@ public class CWSKubernetesClient {
     private final Map<String, NodeWithAlloc> nodeHolder = new HashMap<>();
     private final List<Informable> informables = new LinkedList<>();
 
-
     public CWSKubernetesClient() {
         KubernetesClientBuilder builder = new KubernetesClientBuilder();
         this.client = builder.build();
@@ -146,6 +150,13 @@ public void eventReceived(Action action, Node node) {
             boolean change = false;
             NodeWithAlloc processedNode = null;
             switch (action) {
+                case MODIFIED:
+                    final NodeWithAlloc nodeWithAlloc = kubernetesClient.nodeHolder.get( node.getMetadata().getName() );
+                    if ( nodeWithAlloc != null ){
+                        nodeWithAlloc.update( node );
+                        break;
+                    }
+                    // If the node is not in the nodeHolder, it is a new node
                 case ADDED:
                     log.info("New Node {} was added", node.getMetadata().getName());
                     synchronized ( kubernetesClient.nodeHolder ){
@@ -175,10 +186,6 @@ public void eventReceived(Action action, Node node) {
                     log.info("Node {} has an error", node.getMetadata().getName());
                     //todo deal with error
                     break;
-                case MODIFIED:
-                    log.info("Node {} was modified", node.getMetadata().getName());
-                    //todo deal with changed state
-                    break;
                 default: log.warn("No implementation for {}", action);
             }
         }
@@ -205,7 +212,7 @@ public void eventReceived(Action action, Pod pod) {
                 switch ( action ){
                     case ADDED:
                         if ( !PodWithAge.hasFinishedOrFailed( pod ) ) {
-                            node.addPod(new PodWithAge(pod), false);
+                            node.addPod(new PodWithAge(pod));
                         }
                         break;
                     case MODIFIED:
@@ -236,4 +243,81 @@ public void onClose(WatcherException cause) {
 
     }
 
+    public boolean inPlacePodVerticalScalingActive() {
+        return featureGateActive("InPlacePodVerticalScaling");
+    }
+
+    public boolean featureGateActive( String featureGate ){
+        return pods()
+                .inNamespace( "kube-system" )
+                .list()
+                .getItems()
+                .stream()
+                .filter( p -> p.getMetadata().getName().startsWith( "kube-apiserver" ) )
+                .anyMatch( p -> p
+                        .getSpec()
+                        .getContainers()
+                        .stream()
+                        .anyMatch( c -> c
+                                .getCommand()
+                                .contains( "--feature-gates=" + featureGate + "=true" )
+                        )
+                );
+    }
+
+    /**
+     * It will create a patch for the memory limits and request values and submit it
+     * to the cluster.
+     * Moreover, it updates the task with the new pod.
+     * 
+     * @param t          the task to be patched
+     * @return false if patching failed because of InPlacePodVerticalScaling
+     */
+    public boolean patchTaskMemory( Task t ) {
+        try {
+            final String valueAsString = t.getPlanedRequirements().getRam()
+                    .divide( BigDecimal.valueOf( 1024L * 1024L ) )
+                    .setScale( 0, RoundingMode.CEILING ).toPlainString() + "Mi";
+            final PodWithAge pod = t.getPod();
+            String namespace = pod.getMetadata().getNamespace();
+            String podname = pod.getName();
+            Resource<Pod> podResource = pods()
+                    .inNamespace( namespace )
+                    .withName( podname );
+            Container container = podResource.get().getSpec().getContainers().get(0); // Assuming only one container
+            Container modifiedContainer = new ContainerBuilder(container)
+                    .editOrNewResources()
+                    .removeFromLimits( "memory" )
+                    .removeFromRequests( "memory" )
+                    .addToLimits("memory", new Quantity(valueAsString))
+                    .addToRequests("memory", new Quantity(valueAsString))
+                    .endResources()
+                    .build();
+
+            Pod modifiedPod = new PodBuilder( podResource.get() )
+                    .editOrNewSpec()
+                    .removeFromContainers( container )
+                    .addToContainers(modifiedContainer)
+                    .endSpec()
+                    .editOrNewMetadata()
+                    .addToLabels( "commonworkflowscheduler/memoryscaled", "true" )
+                    .endMetadata()
+                    .build();
+
+            t.setPod( new PodWithAge( modifiedPod ) );
+
+            podResource.patch(modifiedPod);
+
+        } catch ( KubernetesClientException e ) {
+            // this typically happens when the feature gate InPlacePodVerticalScaling was not enabled
+            if (e.toString().contains("Forbidden: pod updates may not change fields other than")) {
+                log.error("Could not patch task. Please make sure that the feature gate 'InPlacePodVerticalScaling' is enabled in Kubernetes. See https://github.com/kubernetes/enhancements/issues/1287 for details. Task scaling will now be disabled for the rest of this workflow execution.");
+            } else {
+                log.error("Could not patch task: {}", t.getConfig().getName(), e);
+            }
+            throw new CannotPatchException( e.getMessage() );
+        }
+        return true;
+    }
+
 }
@@ -0,0 +1,9 @@
+package cws.k8s.scheduler.client;
+
+public class CannotPatchException extends RuntimeException {
+
+    public CannotPatchException(String message) {
+        super(message);
+    }
+
+}
@@ -4,13 +4,33 @@
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
 
 public class Process extends Vertex {
 
 
     private final Set<Process> descendants;
     private final Set<Process> ancestors;
 
+    private AtomicInteger successfullyFinished = new AtomicInteger(0);
+    private AtomicInteger failed = new AtomicInteger(0);
+
+    public int getSuccessfullyFinished() {
+        return successfullyFinished.get();
+    }
+
+    public int getFailed() {
+        return failed.get();
+    }
+
+    public void incrementSuccessfullyFinished() {
+        successfullyFinished.incrementAndGet();
+    }
+
+    public void incrementFailed() {
+        failed.incrementAndGet();
+    }
+
     void addDescendant( Process p ) {
         synchronized (descendants) {
             descendants.add( p );