From 384e34e55574489217c75dd0fc47e83b658926d7 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Mon, 5 May 2025 21:37:37 +0000
Subject: [PATCH 1/3] feat: add formula and contrasts to limma

---
 modules/nf-core/limma/differential/main.nf    |   2 +-
 .../limma/differential/templates/limma_de.R   |  26 ++++
 .../limma/differential/tests/main.nf.test     | 130 +++++++++++++++++-
 3 files changed, 154 insertions(+), 4 deletions(-)

diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf
index b7106c397b6..e753ad41ad6 100644
--- a/modules/nf-core/limma/differential/main.nf
+++ b/modules/nf-core/limma/differential/main.nf
@@ -8,7 +8,7 @@ process LIMMA_DIFFERENTIAL {
         'community.wave.seqera.io/library/bioconductor-edger_bioconductor-limma:176c202c82450990' }"
 
     input:
-    tuple val(meta), val(contrast_variable), val(reference), val(target)
+    tuple val(meta), val(contrast_variable), val(reference), val(target), val(formula), val(comparison)
     tuple val(meta2), path(samplesheet), path(intensities)
 
     output:
diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R
index 7db3967677e..9885a48797b 100644
--- a/modules/nf-core/limma/differential/templates/limma_de.R
+++ b/modules/nf-core/limma/differential/templates/limma_de.R
@@ -52,6 +52,17 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.nam
     )
 }
 
+#
+#' Turn “null” or empty strings into actual NULL
+#'
+#' @param x Input option
+#'
+#' @return NULL or x
+#'
+nullify <- function(x) {
+  if (is.character(x) && (tolower(x) == "null" || x == "")) NULL else x
+}
+
 ################################################
 ################################################
 ## PARSE PARAMETERS FROM NEXTFLOW             ##
@@ -71,6 +82,8 @@ opt <- list(
     contrast_variable = '$contrast_variable',
     reference_level = '$reference',
     target_level = '$target',
+    formula                 = '$formula',
+    contrast_string         = '$comparison',
     blocking_variables = NULL,
     probe_id_col = "probe_id",
     sample_id_col = "experiment_accession",
@@ -116,6 +129,10 @@ if ( ! is.null(opt\$round_digits)){
     opt\$round_digits <- as.numeric(opt\$round_digits)
 }
 
+# If there is no option supplied, convert string "null" to NULL
+keys <- c("formula", "contrast_string")
+opt[keys] <- lapply(opt[keys], nullify)
+
 # Check if required parameters have been provided
 
 required_opts <- c('contrast_variable', 'reference_level', 'target_level', 'output_prefix')
@@ -282,6 +299,10 @@ if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_val
 ################################################
 ################################################
 
+if (!is.null(opt\$formula)) {
+    model <- opt\$formula
+} else {
+
 # Build the model formula with blocking variables first
 model_vars <- c()
 
@@ -302,6 +323,7 @@ for (v in vars_to_factor) {
     sample.sheet[[v]] <- as.factor(sample.sheet[[v]])
 }
 
+}
 ################################################
 ################################################
 ## Run Limma processes                        ##
@@ -393,6 +415,9 @@ fit <- do.call(lmFit, lmfit_args)
 # Contrasts bit
 
 # Create the contrast string for the specified comparison
+if (!is.null(opt\$contrast_string)) {
+    contrast_string <- opt\$contrast_string
+} else {
 
 # Construct the expected column names for the target and reference levels in the design matrix
 treatment_target <- paste0(contrast_variable, ".", opt\$target_level)
@@ -418,6 +443,7 @@ if ((treatment_target %in% colnames(design)) && (treatment_reference %in% colnam
     # This indicates an error; the specified levels are not found
     stop(paste0(treatment_target, " and ", treatment_reference, " not found in design matrix"))
 }
+}
 
 # Create the contrast matrix
 contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=design)
diff --git a/modules/nf-core/limma/differential/tests/main.nf.test b/modules/nf-core/limma/differential/tests/main.nf.test
index 551a53f12d7..a8667daf371 100644
--- a/modules/nf-core/limma/differential/tests/main.nf.test
+++ b/modules/nf-core/limma/differential/tests/main.nf.test
@@ -54,7 +54,7 @@ nextflow_process {
                 )
                 input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia'])
                     .map{
-                        tuple(it, it.variable, it.reference, it.target)
+                        tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison)
                     }
                 input[1] =  ch_samplesheet
                     .join(AFFY_JUSTRMA.out.expression)
@@ -72,6 +72,130 @@ nextflow_process {
             )
         }
 
+    }
+
+        test("test_limma_differential - null formula and null complex contrasts") {
+
+        config "./nextflow.config"
+
+        setup {
+            run("UNTAR") {
+                script "../../../untar/main.nf"
+                process {
+                    """
+                    input[0] = [
+                        [id: 'test'],
+                        file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true)
+                    ]
+                    """
+                }
+            }
+            run("AFFY_JUSTRMA") {
+                script "../../../affy/justrma/main.nf"
+                process {
+                    """
+                    ch_samplesheet = Channel.of([
+                        [ id:'test' ],
+                        file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true)
+                        ]
+                    )
+                    input[0] = ch_samplesheet.join(UNTAR.out.untar)
+                    input[1] = [[],[]]
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                ch_samplesheet = Channel.of([
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true)
+                    ]
+                )
+                input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia'])
+                    .map{
+                        tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison)
+                    }
+                input[1] =  ch_samplesheet
+                    .join(AFFY_JUSTRMA.out.expression)
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.model, process.out.versions).match() },
+                { assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") },
+                { assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.2775254") },
+                { assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.071547786") }
+            )
+        }
+
+    }
+
+    test("test_limma_differential - with formula and with complex contrasts") {
+
+        config "./nextflow.config"
+
+        setup {
+            run("UNTAR") {
+                script "../../../untar/main.nf"
+                process {
+                    """
+                    input[0] = [
+                        [id: 'test'],
+                        file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true)
+                    ]
+                    """
+                }
+            }
+            run("AFFY_JUSTRMA") {
+                script "../../../affy/justrma/main.nf"
+                process {
+                    """
+                    ch_samplesheet = Channel.of([
+                        [ id:'test' ],
+                        file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true)
+                        ]
+                    )
+                    input[0] = ch_samplesheet.join(UNTAR.out.untar)
+                    input[1] = [[],[]]
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                ch_samplesheet = Channel.of([
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true)
+                    ]
+                )
+                input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia', 'formula': '~ diagnosis', 'comparison': 'diagnosis.uremia'])
+                    .map{
+                        tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison)
+                    }
+                input[1] =  ch_samplesheet
+                    .join(AFFY_JUSTRMA.out.expression)
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.model, process.out.versions).match() },
+                { assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") },
+                { assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.27752") },
+                { assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.0715477") }
+            )
+        }
+
     }
 
     test("test_limma_differential - exclude_samples") {
@@ -116,7 +240,7 @@ nextflow_process {
                 )
                 input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia'])
                     .map{
-                        tuple(it, it.variable, it.reference, it.target)
+                        tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison)
                     }
                 input[1] =  ch_samplesheet
                     .join(AFFY_JUSTRMA.out.expression)
@@ -179,7 +303,7 @@ nextflow_process {
                 )
                 input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia'])
                     .map{
-                        tuple(it, it.variable, it.reference, it.target)
+                        tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison)
                     }
                 input[1] =  ch_samplesheet
                     .join(AFFY_JUSTRMA.out.expression)

From 37f5e2c5e81fed81e0f53a263a5080362df70ed5 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Fri, 9 May 2025 15:09:04 +0000
Subject: [PATCH 2/3] test: update snapshots

---
 .../differential/tests/main.nf.test.snap      | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/modules/nf-core/limma/differential/tests/main.nf.test.snap b/modules/nf-core/limma/differential/tests/main.nf.test.snap
index 8678f660a9f..02626988cc2 100644
--- a/modules/nf-core/limma/differential/tests/main.nf.test.snap
+++ b/modules/nf-core/limma/differential/tests/main.nf.test.snap
@@ -91,6 +91,31 @@
         },
         "timestamp": "2024-10-31T12:34:25.24499"
     },
+    "test_limma_differential - with formula and with complex contrasts": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "diagnosis_normal_uremia",
+                        "variable": "diagnosis",
+                        "reference": "normal",
+                        "target": "uremia",
+                        "formula": "~ diagnosis",
+                        "comparison": "diagnosis.uremia"
+                    },
+                    "diagnosis_normal_uremia.limma.model.txt:md5,660fe42c7c13e47524344eaf5b7d2e7c"
+                ]
+            ],
+            [
+                "versions.yml:md5,88a6e42d753077edab8daf829cd4d943"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.6"
+        },
+        "timestamp": "2025-05-09T13:31:02.129502295"
+    },
     "test_limma_differential - stub": {
         "content": [
             [
@@ -138,6 +163,29 @@
         },
         "timestamp": "2024-10-31T12:36:56.462834"
     },
+    "test_limma_differential - null formula and null complex contrasts": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "diagnosis_normal_uremia",
+                        "variable": "diagnosis",
+                        "reference": "normal",
+                        "target": "uremia"
+                    },
+                    "diagnosis_normal_uremia.limma.model.txt:md5,70b000f632b8bdba4917046362dd876b"
+                ]
+            ],
+            [
+                "versions.yml:md5,88a6e42d753077edab8daf829cd4d943"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.6"
+        },
+        "timestamp": "2025-05-09T13:30:02.217053547"
+    },
     "test_limma_differential - voom_mixed": {
         "content": [
             [

From 4fb55dae7bb6a79919e30140161cc02d774c29c4 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Fri, 9 May 2025 15:25:08 +0000
Subject: [PATCH 3/3] docs: update meta.yml

---
 modules/nf-core/limma/differential/meta.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/nf-core/limma/differential/meta.yml b/modules/nf-core/limma/differential/meta.yml
index dbf904305a1..3f652710e01 100644
--- a/modules/nf-core/limma/differential/meta.yml
+++ b/modules/nf-core/limma/differential/meta.yml
@@ -37,6 +37,12 @@ input:
         description: |
           The value within the contrast_variable column of the sample sheet that
           should be used to derive the target samples
+    - formula:
+        type: string
+        description: (Mandatory) R formula string used for modeling, e.g. '~ treatment + (1 | sample_number)'.
+    - comparison:
+        type: string
+        description: (Optional) Literal string passed to `limma::makeContrasts`, e.g. 'treatmenthND6 - treatmentmCherry'.
   - - meta2:
         type: map
         description: |