From 2e5af03ea7b91002b65670d01674dbf3a23b04bb Mon Sep 17 00:00:00 2001 From: Tiago Jesus Date: Tue, 30 Jul 2019 16:42:07 +0100 Subject: [PATCH 1/8] Create CODE_OF_CONDUCT.md (#214) --- CODE_OF_CONDUCT.md | 76 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..45cb1077 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at odiogosilva@gmail.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq From d3a670affae7af33eea593dbe04192b11db05451 Mon Sep 17 00:00:00 2001 From: Tiago Jesus Date: Wed, 31 Jul 2019 12:02:50 +0100 Subject: [PATCH 2/8] Added gitter badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5273a198..9ddffcce 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ [![Documentation Status](https://readthedocs.org/projects/flowcraft/badge/?version=latest)](http://flowcraft.readthedocs.io/en/latest/?badge=latest) [![PyPI version](https://badge.fury.io/py/flowcraft.svg)](https://badge.fury.io/py/flowcraft) [![Anaconda-Server Badge](https://anaconda.org/bioconda/flowcraft/badges/version.svg)](https://anaconda.org/bioconda/flowcraft) +[![Gitter](https://badges.gitter.im/flowcraft-community/community.svg)](https://gitter.im/flowcraft-community/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)

nextflow_logo From c0b997a618cb5940d22fbbe781d7f7de36d46a8d Mon Sep 17 00:00:00 2001 From: cimendes Date: Mon, 9 Sep 2019 18:20:04 +0100 Subject: [PATCH 3/8] move renamePE_samtoolsFASTQ.py from bin. adjust retrieved_mapped and remove_host processes. --- flowcraft/bin/renamePE_samtoolsFASTQ.py | 140 ------------- flowcraft/generator/components/mapping.py | 3 +- flowcraft/generator/templates/remove_host.nf | 26 ++- .../generator/templates/retrieve_mapped.nf | 24 ++- flowcraft/templates/renamePE_samtoolsFASTQ.py | 190 ++++++++++++++++++ 5 files changed, 231 insertions(+), 152 deletions(-) delete mode 100755 flowcraft/bin/renamePE_samtoolsFASTQ.py create mode 100755 flowcraft/templates/renamePE_samtoolsFASTQ.py diff --git a/flowcraft/bin/renamePE_samtoolsFASTQ.py b/flowcraft/bin/renamePE_samtoolsFASTQ.py deleted file mode 100755 index 052046d1..00000000 --- a/flowcraft/bin/renamePE_samtoolsFASTQ.py +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env python2 - -#TODO - change to py3 -# -*- coding: utf-8 -*- - -""" -renamePE_samtoolsFASTQ.py - Rename the fastq headers with PE terminations -that were not include in samtools fastq command - -Copyright (C) 2017 Miguel Machado -Last modified: January 10, 2017 -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program. If not, see . -""" - -import os -import sys -import time -import argparse -import itertools - - -version = '0.1' - - -def formartFastqHeaders(in_fastq_1, in_fastq_2, outdir): - out_fastq_1 = os.path.join(outdir, os.path.splitext(os.path.basename(in_fastq_1))[0] + '.headersRenamed_1.fq') - out_fastq_2 = os.path.join(outdir, os.path.splitext(os.path.basename(in_fastq_2))[0] + '.headersRenamed_2.fq') - writer_in_fastq_1 = open(out_fastq_1, 'wt') - writer_in_fastq_2 = open(out_fastq_2, 'wt') - outfiles = [out_fastq_1, out_fastq_2] - with open(in_fastq_1, 'rtU') as reader_in_fastq_1, open(in_fastq_2, 'rtU') as reader_in_fastq_2: - plus_line = True - quality_line = True - number_reads = 0 - for in_1, in_2 in itertools.izip(reader_in_fastq_1, reader_in_fastq_2): - if len(in_1) > 0: - in_1 = in_1.splitlines()[0] - in_2 = in_2.splitlines()[0] - if in_1.startswith('@') and plus_line and quality_line: - if in_1 != in_2: - sys.exit('The PE fastq files are not aligned properly!') - in_1 += '/1' + '\n' - in_2 += '/2' + '\n' - writer_in_fastq_1.write(in_1) - writer_in_fastq_2.write(in_2) - plus_line = False - quality_line = False - elif in_1.startswith('+') and not plus_line: - in_1 += '\n' - writer_in_fastq_1.write(in_1) - writer_in_fastq_2.write(in_1) - plus_line = True - elif plus_line and not quality_line: - in_1 += '\n' - in_2 += '\n' - writer_in_fastq_1.write(in_1) - writer_in_fastq_2.write(in_2) - writer_in_fastq_1.flush() - writer_in_fastq_2.flush() - number_reads += 1 - quality_line = True - else: - in_1 += '\n' - in_2 += '\n' - writer_in_fastq_1.write(in_1) - writer_in_fastq_2.write(in_2) - return number_reads, outfiles - - -def compressionType(file_to_test): - magic_dict = {'\x1f\x8b\x08': ['gzip', 'gunzip'], '\x42\x5a\x68': ['bzip2', 'bunzip2']} - - max_len = max(len(x) for x in magic_dict) - - with open(file_to_test, 'r') as reader: - file_start = reader.read(max_len) - - for magic, filetype in magic_dict.items(): - if file_start.startswith(magic): - return filetype - return None - - -def runTime(start_time): - end_time = time.time() - time_taken = end_time - start_time - hours, rest = divmod(time_taken, 3600) - minutes, seconds = divmod(rest, 60) - print 'Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's' - return time_taken - - -def main(): - parser = argparse.ArgumentParser(prog='renamePE_samtoolsFASTQ.py', description='Rename the fastq headers with PE terminations that were not include in samtools fastq command', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version)) - - parser_required = parser.add_argument_group('Required options') - parser_required.add_argument('-1', '--fastq_1', type=argparse.FileType('r'), metavar='/path/to/input/file_1.fq', help='Uncompressed fastq file containing mate 1 reads', required=True) - parser_required.add_argument('-2', '--fastq_2', type=argparse.FileType('r'), metavar='/path/to/input/file_2.fq', help='Uncompressed fastq file containing mate 2 reads', required=True) - - parser_optional_general = parser.add_argument_group('General facultative options') - parser_optional_general.add_argument('-o', '--outdir', type=str, metavar='/output/directory/', help='Path for output directory', required=False, default='.') - - args = parser.parse_args() - - print '\n' + 'STARTING renamePE_samtoolsFASTQ.py' + '\n' - start_time = time.time() - - fastq_files = [os.path.abspath(args.fastq_1.name), os.path.abspath(args.fastq_2.name)] - - print 'Check if files are compressed' + '\n' - for fastq in fastq_files: - if compressionType(fastq) is not None: - sys.exit('Compressed fastq files found') - - outdir = os.path.abspath(args.outdir) - if not os.path.isdir(outdir): - os.makedirs(outdir) - - print 'Renaming fastq headers' + '\n' - number_reads, outfiles = formartFastqHeaders(fastq_files[0], fastq_files[1], outdir) - - print 'It was written ' + str(number_reads) + ' read pairs in ' + str(outfiles) + ' files' + '\n' - - print '\n' + 'END renamePE_samtoolsFASTQ.py' - time_taken = runTime(start_time) - del time_taken - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/flowcraft/generator/components/mapping.py b/flowcraft/generator/components/mapping.py index a310b198..f29ab6b0 100644 --- a/flowcraft/generator/components/mapping.py +++ b/flowcraft/generator/components/mapping.py @@ -106,7 +106,8 @@ def __init__(self, **kwargs): } self.status_channels = [ - "retrieve_mapped" + "retrieve_mapped", + "renamePE" ] diff --git a/flowcraft/generator/templates/remove_host.nf b/flowcraft/generator/templates/remove_host.nf index f070c546..825f8f64 100644 --- a/flowcraft/generator/templates/remove_host.nf +++ b/flowcraft/generator/templates/remove_host.nf @@ -17,7 +17,7 @@ process remove_host_{{ pid }} { val clear from checkpointClear_{{ pid }} output: - set sample_id , file("${sample_id}*.headersRenamed_*.fq.gz") into {{ output_channel }} + set sample_id , file("${sample_id}*.headersRenamed_*.fq.gz") into OUT_remove_host_{{ pid }} set sample_id, file("*_bowtie2.log") into into_json_{{ pid }} {% with task_name="remove_host" %} {%- include "compiler_channels.txt" ignore missing -%} @@ -36,11 +36,6 @@ process remove_host_{{ pid }} { rm ${sample_id}_samtools.bam - renamePE_samtoolsFASTQ.py -1 ${sample_id}_unmapped_1.fq -2 ${sample_id}_unmapped_2.fq - - gzip *.headersRenamed_*.fq - rm *.fq - if [ "$clear" = "true" ]; then work_regex=".*/work/.{2}/.{30}/.*" @@ -58,6 +53,25 @@ process remove_host_{{ pid }} { } +process renamePE_{{ pid }} { + + tag { sample_id } + publishDir '' + + input: + set sample_if, file(fastq_pair} from OUT_remove_host_{{ pid }} + + output: + set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }} + {% with task_name="renamePE" %} + {%- include "compiler_channels.txt" ignore missing -%} + {% endwith %} + + script: + template "renamePE_samtoolsFASTQ.py" + +} + process report_remove_host_{{ pid }} { diff --git a/flowcraft/generator/templates/retrieve_mapped.nf b/flowcraft/generator/templates/retrieve_mapped.nf index 4a906c11..ee4356e1 100644 --- a/flowcraft/generator/templates/retrieve_mapped.nf +++ b/flowcraft/generator/templates/retrieve_mapped.nf @@ -10,7 +10,7 @@ process retrieve_mapped_{{ pid }} { set sample_id, file(bam) from {{ input_channel }} output: - set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }} + set sample_id , file("*_mapped_*.fq") into OUT_retrieve_mapped_{{ pid }} {% with task_name="retrieve_mapped" %} {%- include "compiler_channels.txt" ignore missing -%} {% endwith %} @@ -25,12 +25,26 @@ process retrieve_mapped_{{ pid }} { rm ${sample_id}_samtools.bam - renamePE_samtoolsFASTQ.py -1 ${sample_id}_mapped_1.fq -2 ${sample_id}_mapped_2.fq + """ +} - gzip *.headersRenamed_*.fq +process renamePE_{{ pid }} { + + tag { sample_id } + publishDir '' + + input: + set sample_if, file(fastq_pair} from OUT_retrieve_mapped_{{ pid }} + + output: + set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }} + {% with task_name="renamePE" %} + {%- include "compiler_channels.txt" ignore missing -%} + {% endwith %} + + script: + template "renamePE_samtoolsFASTQ.py" - rm *.fq - """ } {{ forks }} \ No newline at end of file diff --git a/flowcraft/templates/renamePE_samtoolsFASTQ.py b/flowcraft/templates/renamePE_samtoolsFASTQ.py new file mode 100755 index 00000000..c3fa582f --- /dev/null +++ b/flowcraft/templates/renamePE_samtoolsFASTQ.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 + +# -*- coding: utf-8 -*- + +import os +import sys +import bz2 +import gzip +import zipfile +from flowcraft_utils.flowcraft_base import get_logger, MainWrapper + +""" +Purpose +------- + +This module renames the fastq headers with PE terminations +that were not include in samtools fastq command + + +Expected input +-------------- + +The following variables are expected whether using NextFlow or the +:py:func:`main` executor. + +- ``sample_id``: Sample Identification string. + - e.g.: ``'SampleA'`` +- ``fastq_pair`` : Pair of FastQ file paths. + - e.g.: ``'SampleA_1.fastq.gz SampleA_2.fastq.gz'` + +Generated output +---------------- +- ``fastq_pair`` : Pair of FastQ file paths with rename headers. + - e.g.: ``'SampleA_1.fastq.gz SampleA_2.fastq.gz'` + +Code documentation +------------------ + +""" + +__version__ = "1.0.1" +__build__ = "09.09.2019" +__template__ = "retrieved_mapped-nf" + +if __file__.endswith(".command.sh"): + SAMPLE_ID = '$sample_id' + FASTQ_PAIR = '$fastq_pair'.split() + logger.debug("Running {} with parameters:".format( + os.path.basename(__file__))) + logger.debug("SAMPLE_ID: {}".format(SAMPLE_ID)) + logger.debug("FASTQ_PAIR: {}".format(FASTQ_PAIR)) + + +COPEN = { + "gz": gzip.open, + "bz2": bz2.open, + "zip": zipfile.ZipFile +} + +MAGIC_DICT = { + b"\\x1f\\x8b\\x08": "gz", + b"\\x42\\x5a\\x68": "bz2", + b"\\x50\\x4b\\x03\\x04": "zip" +} + + +def guess_file_compression(file_path, magic_dict=None): + """Guesses the compression of an input file. + + This function guesses the compression of a given file by checking for + a binary signature at the beginning of the file. These signatures are + stored in the :py:data:`MAGIC_DICT` dictionary. The supported compression + formats are gzip, bzip2 and zip. If none of the signatures in this + dictionary are found at the beginning of the file, it returns ``None``. + + Parameters + ---------- + file_path : str + Path to input file. + magic_dict : dict, optional + Dictionary containing the signatures of the compression types. The + key should be the binary signature and the value should be the + compression format. If left ``None``, it falls back to + :py:data:`MAGIC_DICT`. + + Returns + ------- + file_type : str or None + If a compression type is detected, returns a string with the format. + If not, returns ``None``. + """ + + if not magic_dict: + magic_dict = MAGIC_DICT + + max_len = max(len(x) for x in magic_dict) + + with open(file_path, "rb") as f: + file_start = f.read(max_len) + + logger.debug("Binary signature start: {}".format(file_start)) + + for magic, file_type in magic_dict.items(): + if file_start.startswith(magic): + return file_type + + return None + + +def formartFastqHeaders(sample_name, in_fastq_1, in_fastq_2): + out_fastq_1 = os.path.join(os.getcwd(), sample_name + '.headersRenamed_1.fq') + out_fastq_2 = os.path.join(os.getcwd(), sample_name + '.headersRenamed_2.fq') + + writer_in_fastq_1 = open(out_fastq_1, 'wt') + writer_in_fastq_2 = open(out_fastq_2, 'wt') + + outfiles = [out_fastq_1, out_fastq_2] + + with open(in_fastq_1, 'rtU') as reader_in_fastq_1, open(in_fastq_2, 'rtU') as reader_in_fastq_2: + plus_line = True + quality_line = True + number_reads = 0 + for in_1, in_2 in zip(reader_in_fastq_1, reader_in_fastq_2): + if len(in_1) > 0: + in_1 = in_1.splitlines()[0] + in_2 = in_2.splitlines()[0] + + if in_1.startswith('@') and plus_line and quality_line: + if in_1 != in_2: + sys.exit('The PE fastq files are not aligned properly!') + in_1 += '/1' + '\n' + in_2 += '/2' + '\n' + writer_in_fastq_1.write(in_1) + writer_in_fastq_2.write(in_2) + plus_line = False + quality_line = False + elif in_1.startswith('+') and not plus_line: + in_1 += '\n' + writer_in_fastq_1.write(in_1) + writer_in_fastq_2.write(in_1) + plus_line = True + elif plus_line and not quality_line: + in_1 += '\n' + in_2 += '\n' + writer_in_fastq_1.write(in_1) + writer_in_fastq_2.write(in_2) + writer_in_fastq_1.flush() + writer_in_fastq_2.flush() + number_reads += 1 + quality_line = True + else: + in_1 += '\n' + in_2 += '\n' + writer_in_fastq_1.write(in_1) + writer_in_fastq_2.write(in_2) + return number_reads, outfiles + + +def main(sample_id, fastq_files): + + logger.info("STARTING renamePE_samtoolsFASTQ.py") + + file_objects = [] + + for fastq in fastq_files: + + logger.info("Processing file {}".format(fastq)) + + logger.info("[{}] Guessing file compression".format(fastq)) + ftype = guess_file_compression(fastq) + + # This can guess the compression of gz, bz2 and zip. If it cannot + # find the compression type, it tries to open a regular file. + if ftype: + logger.info("[{}] Found file compression: {}".format(fastq, ftype)) + file_objects.append(COPEN[ftype](fastq, "rt")) + else: + logger.info("[{}] File compression not found. Assuming an uncompressed file".format(fastq)) + file_objects.append(open(fastq)) + + logger.info('Renaming fastq headers') + number_reads, outfiles = formartFastqHeaders(sample_id, file_objects[0], file_objects[1]) + + logger.info('{} read pairs were written in {} outfiles'.format(number_reads, outfiles)) + + os.remove(fastq_files) + + +if __name__ == "__main__": + main(SAMPLE_ID, FASTQ_PAIR) From b03b42770b6da465368a3ff1c41524bc527cdd94 Mon Sep 17 00:00:00 2001 From: cimendes Date: Mon, 9 Sep 2019 18:20:51 +0100 Subject: [PATCH 4/8] add missing status channel to remove_host --- flowcraft/generator/components/metagenomics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flowcraft/generator/components/metagenomics.py b/flowcraft/generator/components/metagenomics.py index 30eaa60b..880ea700 100644 --- a/flowcraft/generator/components/metagenomics.py +++ b/flowcraft/generator/components/metagenomics.py @@ -475,6 +475,7 @@ def __init__(self, **kwargs): self.status_channels = [ "remove_host", + "renamePE", "report_remove_host" ] From 22985ae69df86de2e88dc81e0aa59182a5960598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?In=C3=AAs=20Mendes?= Date: Mon, 16 Sep 2019 14:41:48 +0100 Subject: [PATCH 5/8] update with changes in dev (#223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Dag files (#209) * move DAG JSON files to the resources directory * added manifest information to the `nextflow.config` file to allow for remote execution (#204) - Partial solve to #194 issue - Deprecation of the `manifest.config“ file * Set phred encoding when it fails to be determined - trimmomatic (#211) * fix bug publishdir (downsample_fastq component) * add pphred33 when encoding fails to be determined, if still fails retry with phred64 encoding (trimmomatic component) * Fix downsample (#222) * edited file names for downsample fastqs * stringified depth for file name --- changelog.md | 15 ++++ flowcraft/flowcraft.py | 9 +- flowcraft/generator/components/mapping.py | 2 +- .../generator/components/metagenomics.py | 61 ++++++++++++- .../generator/components/variant_calling.py | 2 +- flowcraft/generator/engine.py | 29 +++--- flowcraft/generator/error_handling.py | 1 + flowcraft/generator/inspect.py | 8 +- flowcraft/generator/templates/Helper.groovy | 4 +- .../generator/templates/downsample_fastq.nf | 7 +- flowcraft/generator/templates/vamb.nf | 50 +++++++++++ flowcraft/templates/downsample_fastq.py | 15 ++-- flowcraft/templates/trimmomatic.py | 90 +++++++++++++------ ...est_assemblerflow.py => test_flowcraft.py} | 7 +- 14 files changed, 235 insertions(+), 65 deletions(-) create mode 100644 flowcraft/generator/templates/vamb.nf rename flowcraft/tests/{test_assemblerflow.py => test_flowcraft.py} (87%) diff --git a/changelog.md b/changelog.md index 645f2d1a..9e897e58 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,20 @@ # Changelog +## Upcoming in `dev` + +### Bug fixes + +- Fix bug in `downsample_fastq` where the resulting output files was not being saved in the `results` directory +- Fix bug in `downsample_fastq` where the output files were being saved as broken symlinks when there was no +down-sampling occurring + +### Minor/Other changes + +- `treeDag.json` and `forktree.json` files are no longer hidden and are now stored in the +`resources` directory +- `trimmomatic` now forces `-phred33` when the encoding can't be determined. If it still fails, the encoding is +changed to `-phred64` and the run is retried. + ## 1.4.2 ### New components diff --git a/flowcraft/flowcraft.py b/flowcraft/flowcraft.py index 69a5d5ff..cde04c26 100755 --- a/flowcraft/flowcraft.py +++ b/flowcraft/flowcraft.py @@ -358,6 +358,11 @@ def build(args): logger.info(colored_print("Building your awesome pipeline...")) + # copy template to cwd, to allow for immediate execution + # this is executed before the build() method to avoid issues with the resources/ folder + if not args.pipeline_only: + copy_project(parsed_output_nf) + if args.export_params: nfg.export_params() sys.exit(0) @@ -371,10 +376,6 @@ def build(args): # building the actual pipeline nf file nfg.build() - # copy template to cwd, to allow for immediate execution - if not args.pipeline_only: - copy_project(parsed_output_nf) - logger.info(colored_print("DONE!", "green_bold")) diff --git a/flowcraft/generator/components/mapping.py b/flowcraft/generator/components/mapping.py index f29ab6b0..f178ab11 100644 --- a/flowcraft/generator/components/mapping.py +++ b/flowcraft/generator/components/mapping.py @@ -246,4 +246,4 @@ def __init__(self, **kwargs): self.status_channels = [ "base_recalibrator", "apply_bqsr" - ] \ No newline at end of file + ] diff --git a/flowcraft/generator/components/metagenomics.py b/flowcraft/generator/components/metagenomics.py index 880ea700..5cd6b377 100644 --- a/flowcraft/generator/components/metagenomics.py +++ b/flowcraft/generator/components/metagenomics.py @@ -28,7 +28,7 @@ def __init__(self, **kwargs): self.input_type = "fasta" self.output_type = "fasta" - self.link_end.append({"link": "__fastq", "alias": "_LAST_fastq"}) + self.link_end.append({"link": "SIDE_max_len", "alias": "SIDE_max_len"}) self.params = { "clusters": { @@ -562,3 +562,62 @@ def __init__(self, **kwargs): self.status_channels = [ "split_assembly" ] + + +class Vamb(Process): + """ + Vamb process template interface for the + taxonomic independent binning of metagenomic + assemblies. + + This process is set with: + - ``input_type``: assembly + - ``output_type``: assembly + - ``ptype``: post_assembly + + It contains one **dependency process**: + + - ``assembly_mapping``: Requires the BAM file generated by the + assembly mapping process + + """ + def __init__(self, **kwargs): + + super().__init__(**kwargs) + + self.input_type = "fasta" + self.output_type = "fasta" + + self.dependencies = ["assembly_mapping"] + + self.params = { + "minContig": { + "default": 2000, + "description": "Ignore contigs shorter than this. Default: 2000" + }, + "minAlignScore":{ + "default": 50, + "description": "Ignore reads with alignment score below this. Default: 50" + }, + "clearInput": { + "default": "false", + "description": + "Permanently removes temporary input files. This option " + "is only useful to remove temporary files in large " + "workflows and prevents nextflow's resume functionality. " + "Use with caution." + } + } + + self.directives = { + "vamb": { + "container": "flowcraft/vamb", + "version": "1.0.1-1", + "cpus": 4, + "memory": "{ 5.GB * task.attempt }" + } + } + + self.status_channels = [ + "vamb" + ] diff --git a/flowcraft/generator/components/variant_calling.py b/flowcraft/generator/components/variant_calling.py index dd42c1bb..85f31d53 100644 --- a/flowcraft/generator/components/variant_calling.py +++ b/flowcraft/generator/components/variant_calling.py @@ -49,4 +49,4 @@ def __init__(self, **kwargs): self.status_channels = [ "haplotypecaller", "merge_vcfs" - ] \ No newline at end of file + ] diff --git a/flowcraft/generator/engine.py b/flowcraft/generator/engine.py index f2ff2e32..f844cc26 100644 --- a/flowcraft/generator/engine.py +++ b/flowcraft/generator/engine.py @@ -1300,21 +1300,23 @@ def _set_configurations(self): }) self.user_config = self._render_config("user.config", {}) - def dag_to_file(self, dict_viz, output_file=".treeDag.json"): - """Writes dag to output file + def dag_info_to_file(self, dict_viz, output_folder, output_file): + """Writes dag or fork information to output file Parameters ---------- dict_viz: dict Tree like dictionary that is used to export tree data of processes - to html file and here for the dotfile .treeDag.json + to html file and here for the treeDag.json, stored in the resources directory + output_folder: str + Path folder to save the output file + output_file: str + Output file name """ - outfile_dag = open(os.path.join(dirname(self.nf_file), output_file) - , "w") - outfile_dag.write(json.dumps(dict_viz)) - outfile_dag.close() + with open(os.path.join(output_folder, output_file), "w") as outfile: + outfile.write(json.dumps(dict_viz)) def render_pipeline(self): """Write pipeline attributes to json @@ -1379,13 +1381,16 @@ def render_pipeline(self): last_of_us[p.lane] = lst[-1]["children"] + # check if resources dir exists - necessary for dag files + resources_dir = os.path.join(dirname(self.nf_file), "resources") + if not os.path.exists(resources_dir): + os.mkdir(resources_dir) + # write to file dict_viz - self.dag_to_file(dict_viz) + self.dag_info_to_file(dict_viz, resources_dir, "treeDag.json") - # Write tree forking information for dotfile - with open(os.path.join(dirname(self.nf_file), - ".forkTree.json"), "w") as fh: - fh.write(json.dumps(self._fork_tree)) + # Write tree forking information + self.dag_info_to_file(self._fork_tree, resources_dir, "forkTree.json") # send with jinja to html resource return self._render_config("pipeline_graph.html", {"data": dict_viz}) diff --git a/flowcraft/generator/error_handling.py b/flowcraft/generator/error_handling.py index 8cb3a515..a6e10c6a 100644 --- a/flowcraft/generator/error_handling.py +++ b/flowcraft/generator/error_handling.py @@ -34,6 +34,7 @@ def __init__(self, value): # def __str__(self): # return repr(self.value) + class LogError(Exception): def __init__(self, value): self.value = "Log ERROR: {}".format(value) diff --git a/flowcraft/generator/inspect.py b/flowcraft/generator/inspect.py index c3a4e8b1..3838c5ec 100644 --- a/flowcraft/generator/inspect.py +++ b/flowcraft/generator/inspect.py @@ -1465,8 +1465,8 @@ def _prepare_static_info(self): return pipeline_files def _dag_file_to_dict(self): - """Function that opens the dotfile named .treeDag.json in the current - working directory + """Function that opens the accessory file treeDag.json in the + resources directory and loads it's contents to a dictionary Returns ------- @@ -1475,11 +1475,11 @@ def _dag_file_to_dict(self): """ try: - dag_file = open(os.path.join(self.workdir, ".treeDag.json")) + dag_file = open(os.path.join(self.workdir, "resources", "treeDag.json")) dag_json = json.load(dag_file) except (FileNotFoundError, json.decoder.JSONDecodeError): logger.warning(colored_print( - "WARNING: dotfile named .treeDag.json not found or corrupted", + "WARNING: JSON file named treeDag.json not found or corrupted", "red_bold")) dag_json = {} diff --git a/flowcraft/generator/templates/Helper.groovy b/flowcraft/generator/templates/Helper.groovy index 58a2ef28..6c7f3b1e 100644 --- a/flowcraft/generator/templates/Helper.groovy +++ b/flowcraft/generator/templates/Helper.groovy @@ -65,8 +65,8 @@ class CollectInitialMetadata { public static void print_metadata(nextflow.script.WorkflowMetadata workflow){ - def treeDag = new File("${workflow.projectDir}/.treeDag.json").text - def forkTree = new File("${workflow.projectDir}/.forkTree.json").text + def treeDag = new File("${workflow.projectDir}/resources/treeDag.json").text + def forkTree = new File("${workflow.projectDir}/resources/forkTree.json").text def metadataJson = "{'nfMetadata':{'scriptId':'${workflow.scriptId}',\ 'scriptName':'${workflow.scriptName}',\ diff --git a/flowcraft/generator/templates/downsample_fastq.nf b/flowcraft/generator/templates/downsample_fastq.nf index db7fcc3b..fea53010 100644 --- a/flowcraft/generator/templates/downsample_fastq.nf +++ b/flowcraft/generator/templates/downsample_fastq.nf @@ -17,7 +17,8 @@ process downsample_fastq_{{ pid }} { {% include "post.txt" ignore missing %} tag { "${sample_id}" } - publishDir "results/downsample_fastq_{{ pid }}/", pattern: "_ss.*" + + publishDir "results/downsample_fastq_{{ pid }}/", pattern: "*_ss_*.*" input: set sample_id, file(fastq_pair) from {{ input_channel }} @@ -27,7 +28,7 @@ process downsample_fastq_{{ pid }} { val clear from checkpointClear_{{ pid }} output: - set sample_id, file('*_ss.*') into {{ output_channel }} + set sample_id, file('*_ss_*.*') into {{ output_channel }} {% with task_name="downsample_fastq" %} {%- include "compiler_channels.txt" ignore missing -%} {% endwith %} @@ -37,4 +38,4 @@ process downsample_fastq_{{ pid }} { } -{{ forks }} \ No newline at end of file +{{ forks }} diff --git a/flowcraft/generator/templates/vamb.nf b/flowcraft/generator/templates/vamb.nf new file mode 100644 index 00000000..78c34375 --- /dev/null +++ b/flowcraft/generator/templates/vamb.nf @@ -0,0 +1,50 @@ +IN_min_contig_{{ pid }} = Channel.value(params.minContig{{ param_id }}) +IN_min_align_score_{{ pid }} = Channel.value(params.minAlignScore{{ param_id }}) + +clear = params.clearInput{{ param_id }} ? "true" : "false" +checkpointClear_{{ pid }} = Channel.value(clear) + +process vamb_{{ pid }} { + + // Send POST request to platform + {% include "post.txt" ignore missing %} + + tag { sample_id } + + //publishDir "results/assembly/binning/vamb_{{ pid }}/${sample_id}/" + + input: + set sample_id, file(assembly), file(bam_file), file(bam_index) from {{ input_channel }} + val length_threshold from IN_min_contig_{{ pid }} + val min_score from IN_min_align_score_{{ pid }} + val clear from checkpointClear_{{ pid }} + + output: + + {% with task_name="vamb"%} + {%- include "compiler_channels.txt" ignore missing -%} + {% endwith %} + + script: + """ + { + # run METABAT2 + run.py results/ ${assembly} ${bam_file} -m ${length_threshold} -s ${min_score} + + # In case no sequences are binned + if [ -z "\$(ls -A *metabat-bins*/)" ]; then + echo "false" > false_bin.fa + mv false_bin.fa *metabat-bins*/ + echo "false" > bin_status.txt; + else + echo "true" > bin_status.txt + fi + + } || { + echo fail > .status + } + """ +} + + +{{ forks }} \ No newline at end of file diff --git a/flowcraft/templates/downsample_fastq.py b/flowcraft/templates/downsample_fastq.py index 3f7bf746..d8e35c3c 100755 --- a/flowcraft/templates/downsample_fastq.py +++ b/flowcraft/templates/downsample_fastq.py @@ -36,14 +36,15 @@ """ -__version__ = "1.0.0" -__build__ = "30072018" +__version__ = "1.0.1" +__build__ = "21062019" __template__ = "sample_fastq-nf" import os import re import json import subprocess +import shutil from os.path import basename @@ -133,7 +134,8 @@ def main(sample_id, fastq_pair, genome_size, depth, clear, seed): # print ("Writing R1.fq.gz") ps = subprocess.Popen(('seqtk', 'sample', parsed_seed, p1, str(ratio)), stdout=subprocess.PIPE) - with open('{}_ss.fq.gz'.format(bn1), 'w') as outfile: + with open('{}_ss_{}.fq.gz'.format(bn1, str(target_depth)), 'w') as \ + outfile: subprocess.Popen(('gzip', '--fast', '-c'), stdin=ps.stdout, stdout=outfile ) ps.wait() @@ -141,7 +143,8 @@ def main(sample_id, fastq_pair, genome_size, depth, clear, seed): # print ("Writing R2.fq.gz") ps = subprocess.Popen(('seqtk', 'sample', parsed_seed, p2, str(ratio)), stdout=subprocess.PIPE) - with open('{}_ss.fq.gz'.format(bn2), 'w') as outfile: + with open('{}_ss_{}.fq.gz'.format(bn2, str(target_depth)), 'w') as \ + outfile: subprocess.Popen(('gzip', '--fast', '-c'), stdin=ps.stdout, stdout=outfile) ps.wait() @@ -156,8 +159,8 @@ def main(sample_id, fastq_pair, genome_size, depth, clear, seed): os.remove(rp) else: - os.symlink(p1, "{}._ss.fq.gz".format(bn1)) - os.symlink(p2, "{}._ss.fq.gz".format(bn2)) + os.symlink(p1, "{}_ss_{}.fq.gz".format(bn1, str(target_depth))) + os.symlink(p2, "{}_ss_{}.fq.gz".format(bn2, str(target_depth))) # Record the original estimated coverage with open(".report.json", "w") as fh: diff --git a/flowcraft/templates/trimmomatic.py b/flowcraft/templates/trimmomatic.py index eefb3cce..0c905fc0 100644 --- a/flowcraft/templates/trimmomatic.py +++ b/flowcraft/templates/trimmomatic.py @@ -46,7 +46,7 @@ # TODO: More control over read trimming # TODO: Add option to remove adapters -# TODO: What to do when there is encoding failure +# TODO: What to do when there is encoding failure - forcing phred33 at the moment __version__ = "1.0.3" __build__ = "29062018" @@ -283,6 +283,43 @@ def merge_default_adapters(): return filepath +def run_trimmomatic(cli, logfile, sample_id): + """ + Runs trimmomatic command + Parameters + ---------- + cli : lst + list containing trimmomatic command + logfile : str + Path to file for trimmomatic to write log + sample_id: str + Sample Identification string. + """ + + logger.debug("Running trimmomatic subprocess with command: {}".format(cli)) + + p = subprocess.Popen(cli, stdout=PIPE, stderr=PIPE) + stdout, stderr = p.communicate() + + # Attempt to decode STDERR output from bytes. If unsuccessful, coerce to + # string + try: + stderr = stderr.decode("utf8") + except (UnicodeDecodeError, AttributeError): + stderr = str(stderr) + + logger.info("Finished trimmomatic subprocess with STDOUT:\\n" + "======================================\\n{}".format(stdout)) + logger.info("Finished trimmomatic subprocesswith STDERR:\\n" + "======================================\\n{}".format(stderr)) + logger.info("Finished trimmomatic with return code: {}".format( + p.returncode)) + + trimmomatic_log(logfile, sample_id) + + return p.returncode + + @MainWrapper def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file, clear): @@ -329,10 +366,12 @@ def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file, phred = int(phred) phred_flag = "-phred{}".format(str(phred)) cli += [phred_flag] - # Could not detect phred encoding. Do not add explicit encoding to - # trimmomatic and let it guess + # Could not detect phred encoding. + # Forcing as phred33 to avoid encoding errors except ValueError: - pass + logger.info("Could not detect quality encoding. Setting it to phred33") + phred_flag = "-phred33" + cli += [phred_flag] # Add input samples to CLI cli += fastq_pair @@ -378,37 +417,32 @@ def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file, logfile ] - logger.debug("Running trimmomatic subprocess with command: {}".format(cli)) - - p = subprocess.Popen(cli, stdout=PIPE, stderr=PIPE) - stdout, stderr = p.communicate() - - # Attempt to decode STDERR output from bytes. If unsuccessful, coerce to - # string - try: - stderr = stderr.decode("utf8") - except (UnicodeDecodeError, AttributeError): - stderr = str(stderr) - - logger.info("Finished trimmomatic subprocess with STDOUT:\\n" - "======================================\\n{}".format(stdout)) - logger.info("Finished trimmomatic subprocesswith STDERR:\\n" - "======================================\\n{}".format(stderr)) - logger.info("Finished trimmomatic with return code: {}".format( - p.returncode)) - - trimmomatic_log(logfile, sample_id) + returncode = run_trimmomatic(cli, logfile, sample_id) - if p.returncode == 0 and os.path.exists("{}_1_trim.fastq.gz".format( + if returncode == 0 and os.path.exists("{}_1_trim.fastq.gz".format( SAMPLE_ID)): clean_up(fastq_pair, clear) # Check if trimmomatic ran successfully. If not, write the error message # to the status channel and exit. with open(".status", "w") as status_fh: - if p.returncode != 0: - status_fh.write("fail") - return + if returncode != 0: + # retry to run trimmomatic by changing the encoding from phred33 to phred64 + if "-phred33" in cli: + + logger.info("Trimmomatic failed while running with phred33. Setting it to phred64 and trying again...") + cli[7] = "-phred64" + + returncode = run_trimmomatic(cli, logfile, sample_id) + + if returncode != 0: + status_fh.write("fail") + return + else: + status_fh.write("pass") + else: + status_fh.write("fail") + return else: status_fh.write("pass") diff --git a/flowcraft/tests/test_assemblerflow.py b/flowcraft/tests/test_flowcraft.py similarity index 87% rename from flowcraft/tests/test_assemblerflow.py rename to flowcraft/tests/test_flowcraft.py index 684b6ed6..5810978d 100644 --- a/flowcraft/tests/test_assemblerflow.py +++ b/flowcraft/tests/test_flowcraft.py @@ -51,12 +51,13 @@ def test_build_file_2(tmp): "{}".format(p), "--pipeline-only"]) af.build(args) - assert sorted(os.listdir(tmp)) == [".forkTree.json", ".treeDag.json", - "containers.config", + assert sorted(os.listdir(tmp)) == ["containers.config", "lib", "nextflow.config", "params.config", - "resources.config", "teste.html", + "resources", "resources.config", "teste.html", "teste.nf", "user.config"] + assert sorted(os.listdir(os.path.join(tmp, "resources"))) == ["forkTree.json", "treeDag.json"] + def test_build_recipe(tmp): From 0e534cc07778cca33c5b9f7b0e5c863bd0fbc24c Mon Sep 17 00:00:00 2001 From: cimendes Date: Mon, 16 Sep 2019 14:53:48 +0100 Subject: [PATCH 6/8] update changelog and fix bug on remove_host component --- changelog.md | 2 ++ flowcraft/generator/templates/remove_host.nf | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/changelog.md b/changelog.md index 9e897e58..9dadd041 100644 --- a/changelog.md +++ b/changelog.md @@ -7,6 +7,8 @@ - Fix bug in `downsample_fastq` where the resulting output files was not being saved in the `results` directory - Fix bug in `downsample_fastq` where the output files were being saved as broken symlinks when there was no down-sampling occurring +- Moved `renamePE_samtoolsFASTQ.py` from `flowcraft/bin/` to the `flowcraft/templates` folder and updated it to +python3 - fix issue #219 ### Minor/Other changes diff --git a/flowcraft/generator/templates/remove_host.nf b/flowcraft/generator/templates/remove_host.nf index 825f8f64..2ef44a0b 100644 --- a/flowcraft/generator/templates/remove_host.nf +++ b/flowcraft/generator/templates/remove_host.nf @@ -17,7 +17,7 @@ process remove_host_{{ pid }} { val clear from checkpointClear_{{ pid }} output: - set sample_id , file("${sample_id}*.headersRenamed_*.fq.gz") into OUT_remove_host_{{ pid }} + set sample_id , file("${sample_id}_unmapped_*.fq") into OUT_remove_host_{{ pid }} set sample_id, file("*_bowtie2.log") into into_json_{{ pid }} {% with task_name="remove_host" %} {%- include "compiler_channels.txt" ignore missing -%} @@ -59,7 +59,7 @@ process renamePE_{{ pid }} { publishDir '' input: - set sample_if, file(fastq_pair} from OUT_remove_host_{{ pid }} + set sample_if, file{fastq_pair} from OUT_remove_host_{{ pid }} output: set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }} From cc6ac866f28330890be4b464f61042371abf9683 Mon Sep 17 00:00:00 2001 From: cimendes Date: Mon, 16 Sep 2019 18:47:20 +0100 Subject: [PATCH 7/8] fixed a bunch of bugs in "renamePE" processes and template --- flowcraft/generator/templates/remove_host.nf | 7 ++- .../generator/templates/retrieve_mapped.nf | 3 +- flowcraft/templates/renamePE_samtoolsFASTQ.py | 46 +++++++++++++------ 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/flowcraft/generator/templates/remove_host.nf b/flowcraft/generator/templates/remove_host.nf index 2ef44a0b..b7312454 100644 --- a/flowcraft/generator/templates/remove_host.nf +++ b/flowcraft/generator/templates/remove_host.nf @@ -55,11 +55,14 @@ process remove_host_{{ pid }} { process renamePE_{{ pid }} { + // Send POST request to platform + {% include "post.txt" ignore missing %} + tag { sample_id } - publishDir '' + publishDir 'results/mapping/remove_host_{{ pid }}/' input: - set sample_if, file{fastq_pair} from OUT_remove_host_{{ pid }} + set sample_id, file(fastq_pair) from OUT_remove_host_{{ pid }} output: set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }} diff --git a/flowcraft/generator/templates/retrieve_mapped.nf b/flowcraft/generator/templates/retrieve_mapped.nf index ee4356e1..19424f38 100644 --- a/flowcraft/generator/templates/retrieve_mapped.nf +++ b/flowcraft/generator/templates/retrieve_mapped.nf @@ -4,7 +4,6 @@ process retrieve_mapped_{{ pid }} { {% include "post.txt" ignore missing %} tag { sample_id } - publishDir 'results/mapping/retrieve_mapped_{{ pid }}/' input: set sample_id, file(bam) from {{ input_channel }} @@ -31,7 +30,7 @@ process retrieve_mapped_{{ pid }} { process renamePE_{{ pid }} { tag { sample_id } - publishDir '' + publishDir 'results/mapping/retrieve_mapped_{{ pid }}/' input: set sample_if, file(fastq_pair} from OUT_retrieve_mapped_{{ pid }} diff --git a/flowcraft/templates/renamePE_samtoolsFASTQ.py b/flowcraft/templates/renamePE_samtoolsFASTQ.py index c3fa582f..a9da48d5 100755 --- a/flowcraft/templates/renamePE_samtoolsFASTQ.py +++ b/flowcraft/templates/renamePE_samtoolsFASTQ.py @@ -9,6 +9,8 @@ import zipfile from flowcraft_utils.flowcraft_base import get_logger, MainWrapper +logger = get_logger(__file__) + """ Purpose ------- @@ -116,7 +118,7 @@ def formartFastqHeaders(sample_name, in_fastq_1, in_fastq_2): outfiles = [out_fastq_1, out_fastq_2] - with open(in_fastq_1, 'rtU') as reader_in_fastq_1, open(in_fastq_2, 'rtU') as reader_in_fastq_2: + with open(in_fastq_1, 'r') as reader_in_fastq_1, open(in_fastq_2, 'r') as reader_in_fastq_2: plus_line = True quality_line = True number_reads = 0 @@ -127,21 +129,21 @@ def formartFastqHeaders(sample_name, in_fastq_1, in_fastq_2): if in_1.startswith('@') and plus_line and quality_line: if in_1 != in_2: - sys.exit('The PE fastq files are not aligned properly!') - in_1 += '/1' + '\n' - in_2 += '/2' + '\n' + sys.exit('The PE fastq files are not aligned properly!') + in_1 += '/1' + '\\n' + in_2 += '/2' + '\\n' writer_in_fastq_1.write(in_1) writer_in_fastq_2.write(in_2) plus_line = False quality_line = False elif in_1.startswith('+') and not plus_line: - in_1 += '\n' + in_1 += '\\n' writer_in_fastq_1.write(in_1) writer_in_fastq_2.write(in_1) plus_line = True elif plus_line and not quality_line: - in_1 += '\n' - in_2 += '\n' + in_1 += '\\n' + in_2 += '\\n' writer_in_fastq_1.write(in_1) writer_in_fastq_2.write(in_2) writer_in_fastq_1.flush() @@ -149,10 +151,14 @@ def formartFastqHeaders(sample_name, in_fastq_1, in_fastq_2): number_reads += 1 quality_line = True else: - in_1 += '\n' - in_2 += '\n' + in_1 += '\\n' + in_2 += '\\n' writer_in_fastq_1.write(in_1) writer_in_fastq_2.write(in_2) + + writer_in_fastq_1.close() + writer_in_fastq_2.close() + return number_reads, outfiles @@ -164,26 +170,36 @@ def main(sample_id, fastq_files): for fastq in fastq_files: - logger.info("Processing file {}".format(fastq)) + logger.info("Processing file {}".format(fastq)) - logger.info("[{}] Guessing file compression".format(fastq)) - ftype = guess_file_compression(fastq) + logger.info("[{}] Guessing file compression".format(fastq)) + ftype = guess_file_compression(fastq) # This can guess the compression of gz, bz2 and zip. If it cannot # find the compression type, it tries to open a regular file. if ftype: logger.info("[{}] Found file compression: {}".format(fastq, ftype)) file_objects.append(COPEN[ftype](fastq, "rt")) + else: logger.info("[{}] File compression not found. Assuming an uncompressed file".format(fastq)) - file_objects.append(open(fastq)) + file_objects.append(fastq) logger.info('Renaming fastq headers') number_reads, outfiles = formartFastqHeaders(sample_id, file_objects[0], file_objects[1]) - logger.info('{} read pairs were written in {} outfiles'.format(number_reads, outfiles)) + logger.info('{} read pairs were written in {} and {}. Compressing...'.format(number_reads, outfiles[0], outfiles[1])) + + # compress outfiles + for file in outfiles: + with open(file, 'rb') as f_in: + f_out = gzip.open(file + '.gz', 'wb') + f_out.writelines(f_in) + f_out.close() + logger.info('DONE') - os.remove(fastq_files) + os.remove(outfiles[0]) + os.remove((outfiles[1])) if __name__ == "__main__": From 05b3dcaba4f6d49b3b0df47b73b4555cc70c62fd Mon Sep 17 00:00:00 2001 From: cimendes Date: Mon, 16 Sep 2019 21:31:15 +0100 Subject: [PATCH 8/8] fix issues with bugs in "renamePE" process in the "retrieve_mapped" component --- flowcraft/generator/templates/retrieve_mapped.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flowcraft/generator/templates/retrieve_mapped.nf b/flowcraft/generator/templates/retrieve_mapped.nf index 19424f38..be371b17 100644 --- a/flowcraft/generator/templates/retrieve_mapped.nf +++ b/flowcraft/generator/templates/retrieve_mapped.nf @@ -32,8 +32,10 @@ process renamePE_{{ pid }} { tag { sample_id } publishDir 'results/mapping/retrieve_mapped_{{ pid }}/' + {% include "post.txt" ignore missing %} + input: - set sample_if, file(fastq_pair} from OUT_retrieve_mapped_{{ pid }} + set sample_id, file(fastq_pair) from OUT_retrieve_mapped_{{ pid }} output: set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }}