-
Notifications
You must be signed in to change notification settings - Fork 858
feat: add formula and contrasts to limma #8429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,17 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.nam | |
) | ||
} | ||
|
||
# | ||
#' Turn “null” or empty strings into actual NULL | ||
#' | ||
#' @param x Input option | ||
#' | ||
#' @return NULL or x | ||
#' | ||
nullify <- function(x) { | ||
if (is.character(x) && (tolower(x) == "null" || x == "")) NULL else x | ||
} | ||
|
||
################################################ | ||
################################################ | ||
## PARSE PARAMETERS FROM NEXTFLOW ## | ||
|
@@ -71,6 +82,8 @@ opt <- list( | |
contrast_variable = '$contrast_variable', | ||
reference_level = '$reference', | ||
target_level = '$target', | ||
formula = '$formula', | ||
contrast_string = '$comparison', | ||
blocking_variables = NULL, | ||
probe_id_col = "probe_id", | ||
sample_id_col = "experiment_accession", | ||
|
@@ -116,6 +129,10 @@ if ( ! is.null(opt\$round_digits)){ | |
opt\$round_digits <- as.numeric(opt\$round_digits) | ||
} | ||
|
||
# If there is no option supplied, convert string "null" to NULL | ||
keys <- c("formula", "contrast_string") | ||
opt[keys] <- lapply(opt[keys], nullify) | ||
|
||
# Check if required parameters have been provided | ||
|
||
required_opts <- c('contrast_variable', 'reference_level', 'target_level', 'output_prefix') | ||
|
@@ -282,6 +299,10 @@ if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_val | |
################################################ | ||
################################################ | ||
|
||
if (!is.null(opt\$formula)) { | ||
model <- opt\$formula | ||
} else { | ||
|
||
# Build the model formula with blocking variables first | ||
model_vars <- c() | ||
|
||
|
@@ -302,6 +323,7 @@ for (v in vars_to_factor) { | |
sample.sheet[[v]] <- as.factor(sample.sheet[[v]]) | ||
} | ||
|
||
} | ||
################################################ | ||
################################################ | ||
## Run Limma processes ## | ||
|
@@ -393,6 +415,9 @@ fit <- do.call(lmFit, lmfit_args) | |
# Contrasts bit | ||
|
||
# Create the contrast string for the specified comparison | ||
if (!is.null(opt\$contrast_string)) { | ||
contrast_string <- opt\$contrast_string | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, indents for conditional |
||
|
||
# Construct the expected column names for the target and reference levels in the design matrix | ||
treatment_target <- paste0(contrast_variable, ".", opt\$target_level) | ||
|
@@ -418,6 +443,7 @@ if ((treatment_target %in% colnames(design)) && (treatment_reference %in% colnam | |
# This indicates an error; the specified levels are not found | ||
stop(paste0(treatment_target, " and ", treatment_reference, " not found in design matrix")) | ||
} | ||
} | ||
|
||
# Create the contrast matrix | ||
contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=design) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,7 @@ nextflow_process { | |
) | ||
input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) | ||
.map{ | ||
tuple(it, it.variable, it.reference, it.target) | ||
tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) | ||
} | ||
input[1] = ch_samplesheet | ||
.join(AFFY_JUSTRMA.out.expression) | ||
|
@@ -72,6 +72,130 @@ nextflow_process { | |
) | ||
} | ||
|
||
} | ||
|
||
test("test_limma_differential - null formula and null complex contrasts") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs an indent fix. Sorry if I'm not seeing it, but isn't this the same as one of the tests above? |
||
|
||
config "./nextflow.config" | ||
|
||
setup { | ||
run("UNTAR") { | ||
script "../../../untar/main.nf" | ||
process { | ||
""" | ||
input[0] = [ | ||
[id: 'test'], | ||
file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true) | ||
] | ||
""" | ||
} | ||
} | ||
run("AFFY_JUSTRMA") { | ||
script "../../../affy/justrma/main.nf" | ||
process { | ||
""" | ||
ch_samplesheet = Channel.of([ | ||
[ id:'test' ], | ||
file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) | ||
] | ||
) | ||
input[0] = ch_samplesheet.join(UNTAR.out.untar) | ||
input[1] = [[],[]] | ||
""" | ||
} | ||
} | ||
} | ||
|
||
when { | ||
process { | ||
""" | ||
ch_samplesheet = Channel.of([ | ||
[ id:'test' ], | ||
file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) | ||
] | ||
) | ||
input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) | ||
.map{ | ||
tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) | ||
} | ||
input[1] = ch_samplesheet | ||
.join(AFFY_JUSTRMA.out.expression) | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out.model, process.out.versions).match() }, | ||
{ assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") }, | ||
{ assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.2775254") }, | ||
{ assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.071547786") } | ||
) | ||
} | ||
|
||
} | ||
|
||
test("test_limma_differential - with formula and with complex contrasts") { | ||
|
||
config "./nextflow.config" | ||
|
||
setup { | ||
run("UNTAR") { | ||
script "../../../untar/main.nf" | ||
process { | ||
""" | ||
input[0] = [ | ||
[id: 'test'], | ||
file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true) | ||
] | ||
""" | ||
} | ||
} | ||
run("AFFY_JUSTRMA") { | ||
script "../../../affy/justrma/main.nf" | ||
process { | ||
""" | ||
ch_samplesheet = Channel.of([ | ||
[ id:'test' ], | ||
file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) | ||
] | ||
) | ||
input[0] = ch_samplesheet.join(UNTAR.out.untar) | ||
input[1] = [[],[]] | ||
""" | ||
} | ||
} | ||
} | ||
|
||
when { | ||
process { | ||
""" | ||
ch_samplesheet = Channel.of([ | ||
[ id:'test' ], | ||
file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) | ||
] | ||
) | ||
input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia', 'formula': '~ diagnosis', 'comparison': 'diagnosis.uremia']) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this test maybe have null target etc to ensure they're not being used? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, this is a very good observation that led me to realise a few things about the current design. |
||
.map{ | ||
tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) | ||
} | ||
input[1] = ch_samplesheet | ||
.join(AFFY_JUSTRMA.out.expression) | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out.model, process.out.versions).match() }, | ||
{ assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") }, | ||
{ assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.27752") }, | ||
{ assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.0715477") } | ||
) | ||
} | ||
|
||
} | ||
|
||
test("test_limma_differential - exclude_samples") { | ||
|
@@ -116,7 +240,7 @@ nextflow_process { | |
) | ||
input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) | ||
.map{ | ||
tuple(it, it.variable, it.reference, it.target) | ||
tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) | ||
} | ||
input[1] = ch_samplesheet | ||
.join(AFFY_JUSTRMA.out.expression) | ||
|
@@ -179,7 +303,7 @@ nextflow_process { | |
) | ||
input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) | ||
.map{ | ||
tuple(it, it.variable, it.reference, it.target) | ||
tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) | ||
} | ||
input[1] = ch_samplesheet | ||
.join(AFFY_JUSTRMA.out.expression) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indents below please?