From 384e34e55574489217c75dd0fc47e83b658926d7 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 5 May 2025 21:37:37 +0000 Subject: [PATCH 1/3] feat: add formula and contrasts to limma --- modules/nf-core/limma/differential/main.nf | 2 +- .../limma/differential/templates/limma_de.R | 26 ++++ .../limma/differential/tests/main.nf.test | 130 +++++++++++++++++- 3 files changed, 154 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf index b7106c397b6..e753ad41ad6 100644 --- a/modules/nf-core/limma/differential/main.nf +++ b/modules/nf-core/limma/differential/main.nf @@ -8,7 +8,7 @@ process LIMMA_DIFFERENTIAL { 'community.wave.seqera.io/library/bioconductor-edger_bioconductor-limma:176c202c82450990' }" input: - tuple val(meta), val(contrast_variable), val(reference), val(target) + tuple val(meta), val(contrast_variable), val(reference), val(target), val(formula), val(comparison) tuple val(meta2), path(samplesheet), path(intensities) output: diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 7db3967677e..9885a48797b 100644 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -52,6 +52,17 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.nam ) } +# +#' Turn “null” or empty strings into actual NULL +#' +#' @param x Input option +#' +#' @return NULL or x +#' +nullify <- function(x) { + if (is.character(x) && (tolower(x) == "null" || x == "")) NULL else x +} + ################################################ ################################################ ## PARSE PARAMETERS FROM NEXTFLOW ## @@ -71,6 +82,8 @@ opt <- list( contrast_variable = '$contrast_variable', reference_level = '$reference', target_level = '$target', + formula = '$formula', + contrast_string = '$comparison', blocking_variables = NULL, probe_id_col = "probe_id", sample_id_col = "experiment_accession", @@ -116,6 +129,10 @@ if ( ! is.null(opt\$round_digits)){ opt\$round_digits <- as.numeric(opt\$round_digits) } +# If there is no option supplied, convert string "null" to NULL +keys <- c("formula", "contrast_string") +opt[keys] <- lapply(opt[keys], nullify) + # Check if required parameters have been provided required_opts <- c('contrast_variable', 'reference_level', 'target_level', 'output_prefix') @@ -282,6 +299,10 @@ if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_val ################################################ ################################################ +if (!is.null(opt\$formula)) { + model <- opt\$formula +} else { + # Build the model formula with blocking variables first model_vars <- c() @@ -302,6 +323,7 @@ for (v in vars_to_factor) { sample.sheet[[v]] <- as.factor(sample.sheet[[v]]) } +} ################################################ ################################################ ## Run Limma processes ## @@ -393,6 +415,9 @@ fit <- do.call(lmFit, lmfit_args) # Contrasts bit # Create the contrast string for the specified comparison +if (!is.null(opt\$contrast_string)) { + contrast_string <- opt\$contrast_string +} else { # Construct the expected column names for the target and reference levels in the design matrix treatment_target <- paste0(contrast_variable, ".", opt\$target_level) @@ -418,6 +443,7 @@ if ((treatment_target %in% colnames(design)) && (treatment_reference %in% colnam # This indicates an error; the specified levels are not found stop(paste0(treatment_target, " and ", treatment_reference, " not found in design matrix")) } +} # Create the contrast matrix contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=design) diff --git a/modules/nf-core/limma/differential/tests/main.nf.test b/modules/nf-core/limma/differential/tests/main.nf.test index 551a53f12d7..a8667daf371 100644 --- a/modules/nf-core/limma/differential/tests/main.nf.test +++ b/modules/nf-core/limma/differential/tests/main.nf.test @@ -54,7 +54,7 @@ nextflow_process { ) input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) .map{ - tuple(it, it.variable, it.reference, it.target) + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) } input[1] = ch_samplesheet .join(AFFY_JUSTRMA.out.expression) @@ -72,6 +72,130 @@ nextflow_process { ) } + } + + test("test_limma_differential - null formula and null complex contrasts") { + + config "./nextflow.config" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true) + ] + """ + } + } + run("AFFY_JUSTRMA") { + script "../../../affy/justrma/main.nf" + process { + """ + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) + ] + ) + input[0] = ch_samplesheet.join(UNTAR.out.untar) + input[1] = [[],[]] + """ + } + } + } + + when { + process { + """ + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) + ] + ) + input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) + .map{ + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) + } + input[1] = ch_samplesheet + .join(AFFY_JUSTRMA.out.expression) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.model, process.out.versions).match() }, + { assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") }, + { assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.2775254") }, + { assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.071547786") } + ) + } + + } + + test("test_limma_differential - with formula and with complex contrasts") { + + config "./nextflow.config" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true) + ] + """ + } + } + run("AFFY_JUSTRMA") { + script "../../../affy/justrma/main.nf" + process { + """ + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) + ] + ) + input[0] = ch_samplesheet.join(UNTAR.out.untar) + input[1] = [[],[]] + """ + } + } + } + + when { + process { + """ + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) + ] + ) + input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia', 'formula': '~ diagnosis', 'comparison': 'diagnosis.uremia']) + .map{ + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) + } + input[1] = ch_samplesheet + .join(AFFY_JUSTRMA.out.expression) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.model, process.out.versions).match() }, + { assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") }, + { assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.27752") }, + { assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.0715477") } + ) + } + } test("test_limma_differential - exclude_samples") { @@ -116,7 +240,7 @@ nextflow_process { ) input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) .map{ - tuple(it, it.variable, it.reference, it.target) + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) } input[1] = ch_samplesheet .join(AFFY_JUSTRMA.out.expression) @@ -179,7 +303,7 @@ nextflow_process { ) input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) .map{ - tuple(it, it.variable, it.reference, it.target) + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) } input[1] = ch_samplesheet .join(AFFY_JUSTRMA.out.expression) From 37f5e2c5e81fed81e0f53a263a5080362df70ed5 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 9 May 2025 15:09:04 +0000 Subject: [PATCH 2/3] test: update snapshots --- .../differential/tests/main.nf.test.snap | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/modules/nf-core/limma/differential/tests/main.nf.test.snap b/modules/nf-core/limma/differential/tests/main.nf.test.snap index 8678f660a9f..02626988cc2 100644 --- a/modules/nf-core/limma/differential/tests/main.nf.test.snap +++ b/modules/nf-core/limma/differential/tests/main.nf.test.snap @@ -91,6 +91,31 @@ }, "timestamp": "2024-10-31T12:34:25.24499" }, + "test_limma_differential - with formula and with complex contrasts": { + "content": [ + [ + [ + { + "id": "diagnosis_normal_uremia", + "variable": "diagnosis", + "reference": "normal", + "target": "uremia", + "formula": "~ diagnosis", + "comparison": "diagnosis.uremia" + }, + "diagnosis_normal_uremia.limma.model.txt:md5,660fe42c7c13e47524344eaf5b7d2e7c" + ] + ], + [ + "versions.yml:md5,88a6e42d753077edab8daf829cd4d943" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-05-09T13:31:02.129502295" + }, "test_limma_differential - stub": { "content": [ [ @@ -138,6 +163,29 @@ }, "timestamp": "2024-10-31T12:36:56.462834" }, + "test_limma_differential - null formula and null complex contrasts": { + "content": [ + [ + [ + { + "id": "diagnosis_normal_uremia", + "variable": "diagnosis", + "reference": "normal", + "target": "uremia" + }, + "diagnosis_normal_uremia.limma.model.txt:md5,70b000f632b8bdba4917046362dd876b" + ] + ], + [ + "versions.yml:md5,88a6e42d753077edab8daf829cd4d943" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-05-09T13:30:02.217053547" + }, "test_limma_differential - voom_mixed": { "content": [ [ From 4fb55dae7bb6a79919e30140161cc02d774c29c4 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 9 May 2025 15:25:08 +0000 Subject: [PATCH 3/3] docs: update meta.yml --- modules/nf-core/limma/differential/meta.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/nf-core/limma/differential/meta.yml b/modules/nf-core/limma/differential/meta.yml index dbf904305a1..3f652710e01 100644 --- a/modules/nf-core/limma/differential/meta.yml +++ b/modules/nf-core/limma/differential/meta.yml @@ -37,6 +37,12 @@ input: description: | The value within the contrast_variable column of the sample sheet that should be used to derive the target samples + - formula: + type: string + description: (Mandatory) R formula string used for modeling, e.g. '~ treatment + (1 | sample_number)'. + - comparison: + type: string + description: (Optional) Literal string passed to `limma::makeContrasts`, e.g. 'treatmenthND6 - treatmentmCherry'. - - meta2: type: map description: |