From 2e5af03ea7b91002b65670d01674dbf3a23b04bb Mon Sep 17 00:00:00 2001
From: Tiago Jesus <tiagofilipe12@gmail.com>
Date: Tue, 30 Jul 2019 16:42:07 +0100
Subject: [PATCH 1/8] Create CODE_OF_CONDUCT.md (#214)

---
 CODE_OF_CONDUCT.md | 76 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 CODE_OF_CONDUCT.md

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000..45cb1077
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,76 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at odiogosilva@gmail.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq

From d3a670affae7af33eea593dbe04192b11db05451 Mon Sep 17 00:00:00 2001
From: Tiago Jesus <tiagofilipe12@gmail.com>
Date: Wed, 31 Jul 2019 12:02:50 +0100
Subject: [PATCH 2/8] Added gitter badge

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 5273a198..9ddffcce 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,7 @@
 [![Documentation Status](https://readthedocs.org/projects/flowcraft/badge/?version=latest)](http://flowcraft.readthedocs.io/en/latest/?badge=latest)
 [![PyPI version](https://badge.fury.io/py/flowcraft.svg)](https://badge.fury.io/py/flowcraft)
 [![Anaconda-Server Badge](https://anaconda.org/bioconda/flowcraft/badges/version.svg)](https://anaconda.org/bioconda/flowcraft)
+[![Gitter](https://badges.gitter.im/flowcraft-community/community.svg)](https://gitter.im/flowcraft-community/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
 
 <p align="center">
   <img width="360" src="docs/resources/logo_large.png" alt="nextflow_logo"/>

From c0b997a618cb5940d22fbbe781d7f7de36d46a8d Mon Sep 17 00:00:00 2001
From: cimendes <iines.mendes@gmail.com>
Date: Mon, 9 Sep 2019 18:20:04 +0100
Subject: [PATCH 3/8] move renamePE_samtoolsFASTQ.py from bin. adjust
 retrieved_mapped and remove_host processes.

---
 flowcraft/bin/renamePE_samtoolsFASTQ.py       | 140 -------------
 flowcraft/generator/components/mapping.py     |   3 +-
 flowcraft/generator/templates/remove_host.nf  |  26 ++-
 .../generator/templates/retrieve_mapped.nf    |  24 ++-
 flowcraft/templates/renamePE_samtoolsFASTQ.py | 190 ++++++++++++++++++
 5 files changed, 231 insertions(+), 152 deletions(-)
 delete mode 100755 flowcraft/bin/renamePE_samtoolsFASTQ.py
 create mode 100755 flowcraft/templates/renamePE_samtoolsFASTQ.py

diff --git a/flowcraft/bin/renamePE_samtoolsFASTQ.py b/flowcraft/bin/renamePE_samtoolsFASTQ.py
deleted file mode 100755
index 052046d1..00000000
--- a/flowcraft/bin/renamePE_samtoolsFASTQ.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/usr/bin/env python2
-
-#TODO - change to py3
-# -*- coding: utf-8 -*-
-
-"""
-renamePE_samtoolsFASTQ.py - Rename the fastq headers with PE terminations
-that were not include in samtools fastq command
-<https://github.com/miguelpmachado/pythonScripts>
-Copyright (C) 2017 Miguel Machado <mpmachado@medicina.ulisboa.pt>
-Last modified: January 10, 2017
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import os
-import sys
-import time
-import argparse
-import itertools
-
-
-version = '0.1'
-
-
-def formartFastqHeaders(in_fastq_1, in_fastq_2, outdir):
-	out_fastq_1 = os.path.join(outdir, os.path.splitext(os.path.basename(in_fastq_1))[0] + '.headersRenamed_1.fq')
-	out_fastq_2 = os.path.join(outdir, os.path.splitext(os.path.basename(in_fastq_2))[0] + '.headersRenamed_2.fq')
-	writer_in_fastq_1 = open(out_fastq_1, 'wt')
-	writer_in_fastq_2 = open(out_fastq_2, 'wt')
-	outfiles = [out_fastq_1, out_fastq_2]
-	with open(in_fastq_1, 'rtU') as reader_in_fastq_1, open(in_fastq_2, 'rtU') as reader_in_fastq_2:
-		plus_line = True
-		quality_line = True
-		number_reads = 0
-		for in_1, in_2 in itertools.izip(reader_in_fastq_1, reader_in_fastq_2):
-			if len(in_1) > 0:
-				in_1 = in_1.splitlines()[0]
-				in_2 = in_2.splitlines()[0]
-				if in_1.startswith('@') and plus_line and quality_line:
-					if in_1 != in_2:
-						sys.exit('The PE fastq files are not aligned properly!')
-					in_1 += '/1' + '\n'
-					in_2 += '/2' + '\n'
-					writer_in_fastq_1.write(in_1)
-					writer_in_fastq_2.write(in_2)
-					plus_line = False
-					quality_line = False
-				elif in_1.startswith('+') and not plus_line:
-					in_1 += '\n'
-					writer_in_fastq_1.write(in_1)
-					writer_in_fastq_2.write(in_1)
-					plus_line = True
-				elif plus_line and not quality_line:
-					in_1 += '\n'
-					in_2 += '\n'
-					writer_in_fastq_1.write(in_1)
-					writer_in_fastq_2.write(in_2)
-					writer_in_fastq_1.flush()
-					writer_in_fastq_2.flush()
-					number_reads += 1
-					quality_line = True
-				else:
-					in_1 += '\n'
-					in_2 += '\n'
-					writer_in_fastq_1.write(in_1)
-					writer_in_fastq_2.write(in_2)
-	return number_reads, outfiles
-
-
-def compressionType(file_to_test):
-	magic_dict = {'\x1f\x8b\x08': ['gzip', 'gunzip'], '\x42\x5a\x68': ['bzip2', 'bunzip2']}
-
-	max_len = max(len(x) for x in magic_dict)
-
-	with open(file_to_test, 'r') as reader:
-		file_start = reader.read(max_len)
-
-	for magic, filetype in magic_dict.items():
-		if file_start.startswith(magic):
-			return filetype
-	return None
-
-
-def runTime(start_time):
-	end_time = time.time()
-	time_taken = end_time - start_time
-	hours, rest = divmod(time_taken, 3600)
-	minutes, seconds = divmod(rest, 60)
-	print 'Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's'
-	return time_taken
-
-
-def main():
-	parser = argparse.ArgumentParser(prog='renamePE_samtoolsFASTQ.py', description='Rename the fastq headers with PE terminations that were not include in samtools fastq command', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-	parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version))
-
-	parser_required = parser.add_argument_group('Required options')
-	parser_required.add_argument('-1', '--fastq_1', type=argparse.FileType('r'), metavar='/path/to/input/file_1.fq', help='Uncompressed fastq file containing mate 1 reads', required=True)
-	parser_required.add_argument('-2', '--fastq_2', type=argparse.FileType('r'), metavar='/path/to/input/file_2.fq', help='Uncompressed fastq file containing mate 2 reads', required=True)
-
-	parser_optional_general = parser.add_argument_group('General facultative options')
-	parser_optional_general.add_argument('-o', '--outdir', type=str, metavar='/output/directory/', help='Path for output directory', required=False, default='.')
-
-	args = parser.parse_args()
-
-	print '\n' + 'STARTING renamePE_samtoolsFASTQ.py' + '\n'
-	start_time = time.time()
-
-	fastq_files = [os.path.abspath(args.fastq_1.name), os.path.abspath(args.fastq_2.name)]
-
-	print 'Check if files are compressed' + '\n'
-	for fastq in fastq_files:
-		if compressionType(fastq) is not None:
-			sys.exit('Compressed fastq files found')
-
-	outdir = os.path.abspath(args.outdir)
-	if not os.path.isdir(outdir):
-		os.makedirs(outdir)
-
-	print 'Renaming fastq headers' + '\n'
-	number_reads, outfiles = formartFastqHeaders(fastq_files[0], fastq_files[1], outdir)
-
-	print 'It was written ' + str(number_reads) + ' read pairs in ' + str(outfiles) + ' files' + '\n'
-
-	print '\n' + 'END renamePE_samtoolsFASTQ.py'
-	time_taken = runTime(start_time)
-	del time_taken
-
-
-if __name__ == "__main__":
-	main()
\ No newline at end of file
diff --git a/flowcraft/generator/components/mapping.py b/flowcraft/generator/components/mapping.py
index a310b198..f29ab6b0 100644
--- a/flowcraft/generator/components/mapping.py
+++ b/flowcraft/generator/components/mapping.py
@@ -106,7 +106,8 @@ def __init__(self, **kwargs):
         }
 
         self.status_channels = [
-            "retrieve_mapped"
+            "retrieve_mapped",
+            "renamePE"
         ]
 
 
diff --git a/flowcraft/generator/templates/remove_host.nf b/flowcraft/generator/templates/remove_host.nf
index f070c546..825f8f64 100644
--- a/flowcraft/generator/templates/remove_host.nf
+++ b/flowcraft/generator/templates/remove_host.nf
@@ -17,7 +17,7 @@ process remove_host_{{ pid }} {
     val clear from checkpointClear_{{ pid }}
 
     output:
-    set sample_id , file("${sample_id}*.headersRenamed_*.fq.gz") into {{ output_channel }}
+    set sample_id , file("${sample_id}*.headersRenamed_*.fq.gz") into OUT_remove_host_{{ pid }}
     set sample_id, file("*_bowtie2.log") into into_json_{{ pid }}
     {% with task_name="remove_host" %}
     {%- include "compiler_channels.txt" ignore missing -%}
@@ -36,11 +36,6 @@ process remove_host_{{ pid }} {
 
         rm ${sample_id}_samtools.bam
 
-        renamePE_samtoolsFASTQ.py -1 ${sample_id}_unmapped_1.fq -2 ${sample_id}_unmapped_2.fq
-
-        gzip *.headersRenamed_*.fq
-        rm *.fq
-
         if [ "$clear" = "true" ];
         then
             work_regex=".*/work/.{2}/.{30}/.*"
@@ -58,6 +53,25 @@ process remove_host_{{ pid }} {
 }
 
 
+process renamePE_{{ pid }} {
+
+    tag { sample_id }
+    publishDir ''
+
+    input:
+    set sample_if, file(fastq_pair} from OUT_remove_host_{{ pid }}
+
+    output:
+    set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }}
+    {% with task_name="renamePE" %}
+    {%- include "compiler_channels.txt" ignore missing -%}
+    {% endwith %}
+
+    script:
+    template "renamePE_samtoolsFASTQ.py"
+
+}
+
 
 process report_remove_host_{{ pid }} {
 
diff --git a/flowcraft/generator/templates/retrieve_mapped.nf b/flowcraft/generator/templates/retrieve_mapped.nf
index 4a906c11..ee4356e1 100644
--- a/flowcraft/generator/templates/retrieve_mapped.nf
+++ b/flowcraft/generator/templates/retrieve_mapped.nf
@@ -10,7 +10,7 @@ process retrieve_mapped_{{ pid }} {
     set sample_id, file(bam) from {{ input_channel }}
 
     output:
-    set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }}
+    set sample_id , file("*_mapped_*.fq") into OUT_retrieve_mapped_{{ pid }}
     {% with task_name="retrieve_mapped" %}
     {%- include "compiler_channels.txt" ignore missing -%}
     {% endwith %}
@@ -25,12 +25,26 @@ process retrieve_mapped_{{ pid }} {
 
     rm ${sample_id}_samtools.bam
 
-    renamePE_samtoolsFASTQ.py -1 ${sample_id}_mapped_1.fq -2 ${sample_id}_mapped_2.fq
+    """
+}
 
-    gzip *.headersRenamed_*.fq
+process renamePE_{{ pid }} {
+
+    tag { sample_id }
+    publishDir ''
+
+    input:
+    set sample_if, file(fastq_pair} from OUT_retrieve_mapped_{{ pid }}
+
+    output:
+    set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }}
+    {% with task_name="renamePE" %}
+    {%- include "compiler_channels.txt" ignore missing -%}
+    {% endwith %}
+
+    script:
+    template "renamePE_samtoolsFASTQ.py"
 
-    rm *.fq
-    """
 }
 
 {{ forks }}
\ No newline at end of file
diff --git a/flowcraft/templates/renamePE_samtoolsFASTQ.py b/flowcraft/templates/renamePE_samtoolsFASTQ.py
new file mode 100755
index 00000000..c3fa582f
--- /dev/null
+++ b/flowcraft/templates/renamePE_samtoolsFASTQ.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import bz2
+import gzip
+import zipfile
+from flowcraft_utils.flowcraft_base import get_logger, MainWrapper
+
+"""
+Purpose
+-------
+
+This module renames the fastq headers with PE terminations
+that were not include in samtools fastq command
+
+
+Expected input
+--------------
+
+The following variables are expected whether using NextFlow or the
+:py:func:`main` executor.
+
+- ``sample_id``: Sample Identification string.
+    - e.g.: ``'SampleA'``
+- ``fastq_pair`` : Pair of FastQ file paths.
+    - e.g.: ``'SampleA_1.fastq.gz SampleA_2.fastq.gz'`
+
+Generated output
+----------------
+- ``fastq_pair`` : Pair of FastQ file paths with rename headers.
+    - e.g.: ``'SampleA_1.fastq.gz SampleA_2.fastq.gz'`
+
+Code documentation
+------------------
+
+"""
+
+__version__ = "1.0.1"
+__build__ = "09.09.2019"
+__template__ = "retrieved_mapped-nf"
+
+if __file__.endswith(".command.sh"):
+    SAMPLE_ID = '$sample_id'
+    FASTQ_PAIR = '$fastq_pair'.split()
+    logger.debug("Running {} with parameters:".format(
+        os.path.basename(__file__)))
+    logger.debug("SAMPLE_ID: {}".format(SAMPLE_ID))
+    logger.debug("FASTQ_PAIR: {}".format(FASTQ_PAIR))
+
+
+COPEN = {
+    "gz": gzip.open,
+    "bz2": bz2.open,
+    "zip": zipfile.ZipFile
+}
+
+MAGIC_DICT = {
+    b"\\x1f\\x8b\\x08": "gz",
+    b"\\x42\\x5a\\x68": "bz2",
+    b"\\x50\\x4b\\x03\\x04": "zip"
+}
+
+
+def guess_file_compression(file_path, magic_dict=None):
+    """Guesses the compression of an input file.
+
+    This function guesses the compression of a given file by checking for
+    a binary signature at the beginning of the file. These signatures are
+    stored in the :py:data:`MAGIC_DICT` dictionary. The supported compression
+    formats are gzip, bzip2 and zip. If none of the signatures in this
+    dictionary are found at the beginning of the file, it returns ``None``.
+
+    Parameters
+    ----------
+    file_path : str
+        Path to input file.
+    magic_dict : dict, optional
+        Dictionary containing the signatures of the compression types. The
+        key should be the binary signature and the value should be the
+        compression format. If left ``None``, it falls back to
+        :py:data:`MAGIC_DICT`.
+
+    Returns
+    -------
+    file_type : str or None
+        If a compression type is detected, returns a string with the format.
+        If not, returns ``None``.
+    """
+
+    if not magic_dict:
+        magic_dict = MAGIC_DICT
+
+    max_len = max(len(x) for x in magic_dict)
+
+    with open(file_path, "rb") as f:
+        file_start = f.read(max_len)
+
+    logger.debug("Binary signature start: {}".format(file_start))
+
+    for magic, file_type in magic_dict.items():
+        if file_start.startswith(magic):
+            return file_type
+
+    return None
+
+
+def formartFastqHeaders(sample_name, in_fastq_1, in_fastq_2):
+    out_fastq_1 = os.path.join(os.getcwd(), sample_name + '.headersRenamed_1.fq')
+    out_fastq_2 = os.path.join(os.getcwd(), sample_name + '.headersRenamed_2.fq')
+
+    writer_in_fastq_1 = open(out_fastq_1, 'wt')
+    writer_in_fastq_2 = open(out_fastq_2, 'wt')
+
+    outfiles = [out_fastq_1, out_fastq_2]
+
+    with open(in_fastq_1, 'rtU') as reader_in_fastq_1, open(in_fastq_2, 'rtU') as reader_in_fastq_2:
+        plus_line = True
+        quality_line = True
+        number_reads = 0
+        for in_1, in_2 in zip(reader_in_fastq_1, reader_in_fastq_2):
+            if len(in_1) > 0:
+                in_1 = in_1.splitlines()[0]
+                in_2 = in_2.splitlines()[0]
+
+                if in_1.startswith('@') and plus_line and quality_line:
+                    if in_1 != in_2:
+                      sys.exit('The PE fastq files are not aligned properly!')
+                    in_1 += '/1' + '\n'
+                    in_2 += '/2' + '\n'
+                    writer_in_fastq_1.write(in_1)
+                    writer_in_fastq_2.write(in_2)
+                    plus_line = False
+                    quality_line = False
+                elif in_1.startswith('+') and not plus_line:
+                    in_1 += '\n'
+                    writer_in_fastq_1.write(in_1)
+                    writer_in_fastq_2.write(in_1)
+                    plus_line = True
+                elif plus_line and not quality_line:
+                    in_1 += '\n'
+                    in_2 += '\n'
+                    writer_in_fastq_1.write(in_1)
+                    writer_in_fastq_2.write(in_2)
+                    writer_in_fastq_1.flush()
+                    writer_in_fastq_2.flush()
+                    number_reads += 1
+                    quality_line = True
+                else:
+                    in_1 += '\n'
+                    in_2 += '\n'
+                    writer_in_fastq_1.write(in_1)
+                    writer_in_fastq_2.write(in_2)
+    return number_reads, outfiles
+
+
+def main(sample_id, fastq_files):
+
+    logger.info("STARTING renamePE_samtoolsFASTQ.py")
+
+    file_objects = []
+
+    for fastq in fastq_files:
+
+         logger.info("Processing file {}".format(fastq))
+
+         logger.info("[{}] Guessing file compression".format(fastq))
+         ftype = guess_file_compression(fastq)
+
+        # This can guess the compression of gz, bz2 and zip. If it cannot
+        # find the compression type, it tries to open a regular file.
+        if ftype:
+            logger.info("[{}] Found file compression: {}".format(fastq, ftype))
+            file_objects.append(COPEN[ftype](fastq, "rt"))
+        else:
+            logger.info("[{}] File compression not found. Assuming an uncompressed file".format(fastq))
+            file_objects.append(open(fastq))
+
+    logger.info('Renaming fastq headers')
+    number_reads, outfiles = formartFastqHeaders(sample_id, file_objects[0], file_objects[1])
+
+    logger.info('{} read pairs were written in {} outfiles'.format(number_reads, outfiles))
+
+    os.remove(fastq_files)
+
+
+if __name__ == "__main__":
+    main(SAMPLE_ID, FASTQ_PAIR)

From b03b42770b6da465368a3ff1c41524bc527cdd94 Mon Sep 17 00:00:00 2001
From: cimendes <iines.mendes@gmail.com>
Date: Mon, 9 Sep 2019 18:20:51 +0100
Subject: [PATCH 4/8] add missing status channel to remove_host

---
 flowcraft/generator/components/metagenomics.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flowcraft/generator/components/metagenomics.py b/flowcraft/generator/components/metagenomics.py
index 30eaa60b..880ea700 100644
--- a/flowcraft/generator/components/metagenomics.py
+++ b/flowcraft/generator/components/metagenomics.py
@@ -475,6 +475,7 @@ def __init__(self, **kwargs):
 
         self.status_channels = [
             "remove_host",
+            "renamePE",
             "report_remove_host"
         ]
 

From 22985ae69df86de2e88dc81e0aa59182a5960598 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?In=C3=AAs=20Mendes?= <cimendes@users.noreply.github.com>
Date: Mon, 16 Sep 2019 14:41:48 +0100
Subject: [PATCH 5/8] update with changes in dev (#223)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Dag files (#209)

* move DAG JSON files to the resources directory

* added manifest information to the `nextflow.config` file to allow for remote execution (#204) - Partial solve to #194 issue
- Deprecation of the `manifest.config´ file

* Set phred encoding when it fails to be determined - trimmomatic (#211)

* fix bug publishdir (downsample_fastq component)

* add pphred33 when encoding fails to be determined, if still fails retry with phred64 encoding (trimmomatic component)

* Fix downsample (#222)

* edited file names for downsample fastqs
* stringified depth for file name
---
 changelog.md                                  | 15 ++++
 flowcraft/flowcraft.py                        |  9 +-
 flowcraft/generator/components/mapping.py     |  2 +-
 .../generator/components/metagenomics.py      | 61 ++++++++++++-
 .../generator/components/variant_calling.py   |  2 +-
 flowcraft/generator/engine.py                 | 29 +++---
 flowcraft/generator/error_handling.py         |  1 +
 flowcraft/generator/inspect.py                |  8 +-
 flowcraft/generator/templates/Helper.groovy   |  4 +-
 .../generator/templates/downsample_fastq.nf   |  7 +-
 flowcraft/generator/templates/vamb.nf         | 50 +++++++++++
 flowcraft/templates/downsample_fastq.py       | 15 ++--
 flowcraft/templates/trimmomatic.py            | 90 +++++++++++++------
 ...est_assemblerflow.py => test_flowcraft.py} |  7 +-
 14 files changed, 235 insertions(+), 65 deletions(-)
 create mode 100644 flowcraft/generator/templates/vamb.nf
 rename flowcraft/tests/{test_assemblerflow.py => test_flowcraft.py} (87%)

diff --git a/changelog.md b/changelog.md
index 645f2d1a..9e897e58 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,20 @@
 # Changelog
 
+## Upcoming in `dev` 
+
+### Bug fixes
+
+- Fix bug in `downsample_fastq` where the resulting output files was not being saved in the `results` directory
+- Fix bug in `downsample_fastq` where the output files were being saved as broken symlinks when there was no 
+down-sampling occurring 
+
+### Minor/Other changes
+
+- `treeDag.json` and `forktree.json` files are no longer hidden and are now stored in the 
+`resources` directory
+- `trimmomatic` now forces `-phred33` when the encoding can't be determined. If it still fails, the encoding is
+changed to `-phred64` and the run is retried. 
+
 ## 1.4.2
 
 ### New components
diff --git a/flowcraft/flowcraft.py b/flowcraft/flowcraft.py
index 69a5d5ff..cde04c26 100755
--- a/flowcraft/flowcraft.py
+++ b/flowcraft/flowcraft.py
@@ -358,6 +358,11 @@ def build(args):
 
     logger.info(colored_print("Building your awesome pipeline..."))
 
+    # copy template to cwd, to allow for immediate execution
+    # this is executed before the build() method to avoid issues with the resources/ folder
+    if not args.pipeline_only:
+        copy_project(parsed_output_nf)
+
     if args.export_params:
         nfg.export_params()
         sys.exit(0)
@@ -371,10 +376,6 @@ def build(args):
         # building the actual pipeline nf file
         nfg.build()
 
-    # copy template to cwd, to allow for immediate execution
-    if not args.pipeline_only:
-        copy_project(parsed_output_nf)
-
     logger.info(colored_print("DONE!", "green_bold"))
 
 
diff --git a/flowcraft/generator/components/mapping.py b/flowcraft/generator/components/mapping.py
index f29ab6b0..f178ab11 100644
--- a/flowcraft/generator/components/mapping.py
+++ b/flowcraft/generator/components/mapping.py
@@ -246,4 +246,4 @@ def __init__(self, **kwargs):
         self.status_channels = [
             "base_recalibrator",
             "apply_bqsr"
-        ]
\ No newline at end of file
+        ]
diff --git a/flowcraft/generator/components/metagenomics.py b/flowcraft/generator/components/metagenomics.py
index 880ea700..5cd6b377 100644
--- a/flowcraft/generator/components/metagenomics.py
+++ b/flowcraft/generator/components/metagenomics.py
@@ -28,7 +28,7 @@ def __init__(self, **kwargs):
         self.input_type = "fasta"
         self.output_type = "fasta"
 
-        self.link_end.append({"link": "__fastq", "alias": "_LAST_fastq"})
+        self.link_end.append({"link": "SIDE_max_len", "alias": "SIDE_max_len"})
 
         self.params = {
             "clusters": {
@@ -562,3 +562,62 @@ def __init__(self, **kwargs):
         self.status_channels = [
             "split_assembly"
         ]
+
+
+class Vamb(Process):
+    """
+    Vamb process template interface for the
+    taxonomic independent binning of metagenomic
+    assemblies.
+
+    This process is set with:
+        - ``input_type``: assembly
+        - ``output_type``: assembly
+        - ``ptype``: post_assembly
+
+    It contains one **dependency process**:
+
+        - ``assembly_mapping``: Requires the BAM file generated by the
+        assembly mapping process
+
+    """
+    def __init__(self, **kwargs):
+
+        super().__init__(**kwargs)
+
+        self.input_type = "fasta"
+        self.output_type = "fasta"
+
+        self.dependencies = ["assembly_mapping"]
+
+        self.params = {
+            "minContig": {
+                "default": 2000,
+                "description": "Ignore contigs shorter than this. Default: 2000"
+            },
+            "minAlignScore":{
+                "default": 50,
+                "description": "Ignore reads with alignment score below this. Default: 50"
+            },
+            "clearInput": {
+                "default": "false",
+                "description":
+                    "Permanently removes temporary input files. This option "
+                    "is only useful to remove temporary files in large "
+                    "workflows and prevents nextflow's resume functionality. "
+                    "Use with caution."
+            }
+        }
+
+        self.directives = {
+            "vamb": {
+                "container": "flowcraft/vamb",
+                "version": "1.0.1-1",
+                "cpus": 4,
+                "memory": "{ 5.GB * task.attempt }"
+            }
+        }
+
+        self.status_channels = [
+            "vamb"
+        ]
diff --git a/flowcraft/generator/components/variant_calling.py b/flowcraft/generator/components/variant_calling.py
index dd42c1bb..85f31d53 100644
--- a/flowcraft/generator/components/variant_calling.py
+++ b/flowcraft/generator/components/variant_calling.py
@@ -49,4 +49,4 @@ def __init__(self, **kwargs):
         self.status_channels = [
             "haplotypecaller",
             "merge_vcfs"
-        ]
\ No newline at end of file
+        ]
diff --git a/flowcraft/generator/engine.py b/flowcraft/generator/engine.py
index f2ff2e32..f844cc26 100644
--- a/flowcraft/generator/engine.py
+++ b/flowcraft/generator/engine.py
@@ -1300,21 +1300,23 @@ def _set_configurations(self):
         })
         self.user_config = self._render_config("user.config", {})
 
-    def dag_to_file(self, dict_viz, output_file=".treeDag.json"):
-        """Writes dag to output file
+    def dag_info_to_file(self, dict_viz, output_folder, output_file):
+        """Writes dag or fork information to output file
 
         Parameters
         ----------
         dict_viz: dict
             Tree like dictionary that is used to export tree data of processes
-            to html file and here for the dotfile .treeDag.json
+            to html file and here for the treeDag.json, stored in the resources directory
+        output_folder: str
+            Path folder to save the output file
+        output_file: str
+            Output file name
 
         """
 
-        outfile_dag = open(os.path.join(dirname(self.nf_file), output_file)
-                           , "w")
-        outfile_dag.write(json.dumps(dict_viz))
-        outfile_dag.close()
+        with open(os.path.join(output_folder, output_file), "w") as outfile:
+            outfile.write(json.dumps(dict_viz))
 
     def render_pipeline(self):
         """Write pipeline attributes to json
@@ -1379,13 +1381,16 @@ def render_pipeline(self):
 
                 last_of_us[p.lane] = lst[-1]["children"]
 
+        # check if resources dir exists - necessary for dag files
+        resources_dir = os.path.join(dirname(self.nf_file), "resources")
+        if not os.path.exists(resources_dir):
+            os.mkdir(resources_dir)
+
         # write to file dict_viz
-        self.dag_to_file(dict_viz)
+        self.dag_info_to_file(dict_viz, resources_dir, "treeDag.json")
 
-        # Write tree forking information for dotfile
-        with open(os.path.join(dirname(self.nf_file),
-                               ".forkTree.json"), "w") as fh:
-            fh.write(json.dumps(self._fork_tree))
+        # Write tree forking information
+        self.dag_info_to_file(self._fork_tree, resources_dir, "forkTree.json")
 
         # send with jinja to html resource
         return self._render_config("pipeline_graph.html", {"data": dict_viz})
diff --git a/flowcraft/generator/error_handling.py b/flowcraft/generator/error_handling.py
index 8cb3a515..a6e10c6a 100644
--- a/flowcraft/generator/error_handling.py
+++ b/flowcraft/generator/error_handling.py
@@ -34,6 +34,7 @@ def __init__(self, value):
     # def __str__(self):
     #     return repr(self.value)
 
+
 class LogError(Exception):
     def __init__(self, value):
         self.value = "Log ERROR: {}".format(value)
diff --git a/flowcraft/generator/inspect.py b/flowcraft/generator/inspect.py
index c3a4e8b1..3838c5ec 100644
--- a/flowcraft/generator/inspect.py
+++ b/flowcraft/generator/inspect.py
@@ -1465,8 +1465,8 @@ def _prepare_static_info(self):
         return pipeline_files
 
     def _dag_file_to_dict(self):
-        """Function that opens the dotfile named .treeDag.json in the current
-        working directory
+        """Function that opens the accessory file treeDag.json in the
+        resources directory and loads it's contents to a dictionary
 
         Returns
         -------
@@ -1475,11 +1475,11 @@ def _dag_file_to_dict(self):
 
         """
         try:
-            dag_file = open(os.path.join(self.workdir, ".treeDag.json"))
+            dag_file = open(os.path.join(self.workdir, "resources", "treeDag.json"))
             dag_json = json.load(dag_file)
         except (FileNotFoundError, json.decoder.JSONDecodeError):
             logger.warning(colored_print(
-                "WARNING: dotfile named .treeDag.json not found or corrupted",
+                "WARNING: JSON file named treeDag.json not found or corrupted",
                 "red_bold"))
             dag_json = {}
 
diff --git a/flowcraft/generator/templates/Helper.groovy b/flowcraft/generator/templates/Helper.groovy
index 58a2ef28..6c7f3b1e 100644
--- a/flowcraft/generator/templates/Helper.groovy
+++ b/flowcraft/generator/templates/Helper.groovy
@@ -65,8 +65,8 @@ class CollectInitialMetadata {
 
     public static void print_metadata(nextflow.script.WorkflowMetadata workflow){
 
-        def treeDag = new File("${workflow.projectDir}/.treeDag.json").text
-        def forkTree = new File("${workflow.projectDir}/.forkTree.json").text
+        def treeDag = new File("${workflow.projectDir}/resources/treeDag.json").text
+        def forkTree = new File("${workflow.projectDir}/resources/forkTree.json").text
 
         def metadataJson = "{'nfMetadata':{'scriptId':'${workflow.scriptId}',\
 'scriptName':'${workflow.scriptName}',\
diff --git a/flowcraft/generator/templates/downsample_fastq.nf b/flowcraft/generator/templates/downsample_fastq.nf
index db7fcc3b..fea53010 100644
--- a/flowcraft/generator/templates/downsample_fastq.nf
+++ b/flowcraft/generator/templates/downsample_fastq.nf
@@ -17,7 +17,8 @@ process downsample_fastq_{{ pid }} {
     {% include "post.txt" ignore missing %}
 
     tag { "${sample_id}" }
-    publishDir "results/downsample_fastq_{{ pid }}/", pattern: "_ss.*"
+    
+    publishDir "results/downsample_fastq_{{ pid }}/", pattern: "*_ss_*.*"
 
     input:
     set sample_id, file(fastq_pair) from {{ input_channel }}
@@ -27,7 +28,7 @@ process downsample_fastq_{{ pid }} {
     val clear from checkpointClear_{{ pid }}
 
     output:
-    set sample_id, file('*_ss.*') into {{ output_channel }}
+    set sample_id, file('*_ss_*.*') into {{ output_channel }}
     {% with task_name="downsample_fastq" %}
     {%- include "compiler_channels.txt" ignore missing -%}
     {% endwith %}
@@ -37,4 +38,4 @@ process downsample_fastq_{{ pid }} {
 
 }
 
-{{ forks }}
\ No newline at end of file
+{{ forks }}
diff --git a/flowcraft/generator/templates/vamb.nf b/flowcraft/generator/templates/vamb.nf
new file mode 100644
index 00000000..78c34375
--- /dev/null
+++ b/flowcraft/generator/templates/vamb.nf
@@ -0,0 +1,50 @@
+IN_min_contig_{{ pid }} = Channel.value(params.minContig{{ param_id }})
+IN_min_align_score_{{ pid }} = Channel.value(params.minAlignScore{{ param_id }})
+
+clear = params.clearInput{{ param_id }} ? "true" : "false"
+checkpointClear_{{ pid }} = Channel.value(clear)
+
+process vamb_{{ pid }} {
+
+    // Send POST request to platform
+    {% include "post.txt" ignore missing %}
+
+    tag { sample_id }
+
+    //publishDir "results/assembly/binning/vamb_{{ pid }}/${sample_id}/"
+
+    input:
+    set sample_id, file(assembly), file(bam_file), file(bam_index) from {{ input_channel }}
+    val length_threshold from IN_min_contig_{{ pid }}
+    val min_score from IN_min_align_score_{{ pid }}
+    val clear from checkpointClear_{{ pid }}
+
+    output:
+
+    {% with task_name="vamb"%}
+    {%- include "compiler_channels.txt" ignore missing -%}
+    {% endwith %}
+
+    script:
+    """
+    {
+        # run METABAT2
+        run.py results/ ${assembly} ${bam_file} -m ${length_threshold} -s ${min_score}
+
+        # In case no sequences are binned
+        if [ -z "\$(ls -A *metabat-bins*/)" ]; then
+            echo "false" > false_bin.fa
+            mv false_bin.fa *metabat-bins*/
+            echo "false" > bin_status.txt;
+        else
+            echo "true" > bin_status.txt
+        fi
+
+    } || {
+        echo fail > .status
+    }
+    """
+}
+
+
+{{ forks }}
\ No newline at end of file
diff --git a/flowcraft/templates/downsample_fastq.py b/flowcraft/templates/downsample_fastq.py
index 3f7bf746..d8e35c3c 100755
--- a/flowcraft/templates/downsample_fastq.py
+++ b/flowcraft/templates/downsample_fastq.py
@@ -36,14 +36,15 @@
 
 """
 
-__version__ = "1.0.0"
-__build__ = "30072018"
+__version__ = "1.0.1"
+__build__ = "21062019"
 __template__ = "sample_fastq-nf"
 
 import os
 import re
 import json
 import subprocess
+import shutil
 
 from os.path import basename
 
@@ -133,7 +134,8 @@ def main(sample_id, fastq_pair, genome_size, depth, clear, seed):
         # print ("Writing R1.fq.gz")
         ps = subprocess.Popen(('seqtk', 'sample', parsed_seed, p1, str(ratio)),
                               stdout=subprocess.PIPE)
-        with open('{}_ss.fq.gz'.format(bn1), 'w') as outfile:
+        with open('{}_ss_{}.fq.gz'.format(bn1, str(target_depth)), 'w') as \
+                outfile:
             subprocess.Popen(('gzip', '--fast', '-c'),
                              stdin=ps.stdout, stdout=outfile )
         ps.wait()
@@ -141,7 +143,8 @@ def main(sample_id, fastq_pair, genome_size, depth, clear, seed):
         # print ("Writing R2.fq.gz")
         ps = subprocess.Popen(('seqtk', 'sample', parsed_seed, p2, str(ratio)),
                               stdout=subprocess.PIPE)
-        with open('{}_ss.fq.gz'.format(bn2), 'w') as outfile:
+        with open('{}_ss_{}.fq.gz'.format(bn2, str(target_depth)), 'w') as \
+                outfile:
             subprocess.Popen(('gzip', '--fast', '-c'),
                              stdin=ps.stdout, stdout=outfile)
         ps.wait()
@@ -156,8 +159,8 @@ def main(sample_id, fastq_pair, genome_size, depth, clear, seed):
                     os.remove(rp)
 
     else:
-        os.symlink(p1, "{}._ss.fq.gz".format(bn1))
-        os.symlink(p2, "{}._ss.fq.gz".format(bn2))
+        os.symlink(p1, "{}_ss_{}.fq.gz".format(bn1, str(target_depth)))
+        os.symlink(p2, "{}_ss_{}.fq.gz".format(bn2, str(target_depth)))
 
     # Record the original estimated coverage
     with open(".report.json", "w") as fh:
diff --git a/flowcraft/templates/trimmomatic.py b/flowcraft/templates/trimmomatic.py
index eefb3cce..0c905fc0 100644
--- a/flowcraft/templates/trimmomatic.py
+++ b/flowcraft/templates/trimmomatic.py
@@ -46,7 +46,7 @@
 
 # TODO: More control over read trimming
 # TODO: Add option to remove adapters
-# TODO: What to do when there is encoding failure
+# TODO: What to do when there is encoding failure - forcing phred33 at the moment
 
 __version__ = "1.0.3"
 __build__ = "29062018"
@@ -283,6 +283,43 @@ def merge_default_adapters():
     return filepath
 
 
+def run_trimmomatic(cli, logfile, sample_id):
+    """
+    Runs trimmomatic command
+    Parameters
+    ----------
+    cli : lst
+        list containing trimmomatic command
+    logfile : str
+        Path to file for trimmomatic to write log
+    sample_id: str
+        Sample Identification string.
+    """
+
+    logger.debug("Running trimmomatic subprocess with command: {}".format(cli))
+
+    p = subprocess.Popen(cli, stdout=PIPE, stderr=PIPE)
+    stdout, stderr = p.communicate()
+
+    # Attempt to decode STDERR output from bytes. If unsuccessful, coerce to
+    # string
+    try:
+        stderr = stderr.decode("utf8")
+    except (UnicodeDecodeError, AttributeError):
+        stderr = str(stderr)
+
+    logger.info("Finished trimmomatic subprocess with STDOUT:\\n"
+                "======================================\\n{}".format(stdout))
+    logger.info("Finished trimmomatic subprocesswith STDERR:\\n"
+                "======================================\\n{}".format(stderr))
+    logger.info("Finished trimmomatic with return code: {}".format(
+        p.returncode))
+
+    trimmomatic_log(logfile, sample_id)
+
+    return p.returncode
+
+
 @MainWrapper
 def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file,
          clear):
@@ -329,10 +366,12 @@ def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file,
         phred = int(phred)
         phred_flag = "-phred{}".format(str(phred))
         cli += [phred_flag]
-    # Could not detect phred encoding. Do not add explicit encoding to
-    # trimmomatic and let it guess
+    # Could not detect phred encoding.
+    # Forcing as phred33 to avoid encoding errors
     except ValueError:
-        pass
+        logger.info("Could not detect quality encoding. Setting it to phred33")
+        phred_flag = "-phred33"
+        cli += [phred_flag]
 
     # Add input samples to CLI
     cli += fastq_pair
@@ -378,37 +417,32 @@ def main(sample_id, fastq_pair, trim_range, trim_opts, phred, adapters_file,
         logfile
     ]
 
-    logger.debug("Running trimmomatic subprocess with command: {}".format(cli))
-
-    p = subprocess.Popen(cli, stdout=PIPE, stderr=PIPE)
-    stdout, stderr = p.communicate()
-
-    # Attempt to decode STDERR output from bytes. If unsuccessful, coerce to
-    # string
-    try:
-        stderr = stderr.decode("utf8")
-    except (UnicodeDecodeError, AttributeError):
-        stderr = str(stderr)
-
-    logger.info("Finished trimmomatic subprocess with STDOUT:\\n"
-                "======================================\\n{}".format(stdout))
-    logger.info("Finished trimmomatic subprocesswith STDERR:\\n"
-                "======================================\\n{}".format(stderr))
-    logger.info("Finished trimmomatic with return code: {}".format(
-        p.returncode))
-
-    trimmomatic_log(logfile, sample_id)
+    returncode = run_trimmomatic(cli, logfile, sample_id)
 
-    if p.returncode == 0 and os.path.exists("{}_1_trim.fastq.gz".format(
+    if returncode == 0 and os.path.exists("{}_1_trim.fastq.gz".format(
             SAMPLE_ID)):
         clean_up(fastq_pair, clear)
 
     # Check if trimmomatic ran successfully. If not, write the error message
     # to the status channel and exit.
     with open(".status", "w") as status_fh:
-        if p.returncode != 0:
-            status_fh.write("fail")
-            return
+        if returncode != 0:
+            # retry to run trimmomatic by changing the encoding from phred33 to phred64
+            if "-phred33" in cli:
+
+                logger.info("Trimmomatic failed while running with phred33. Setting it to phred64 and trying again...")
+                cli[7] = "-phred64"
+
+                returncode = run_trimmomatic(cli, logfile, sample_id)
+
+                if returncode != 0:
+                    status_fh.write("fail")
+                    return
+                else:
+                    status_fh.write("pass")
+            else:
+                status_fh.write("fail")
+                return
         else:
             status_fh.write("pass")
 
diff --git a/flowcraft/tests/test_assemblerflow.py b/flowcraft/tests/test_flowcraft.py
similarity index 87%
rename from flowcraft/tests/test_assemblerflow.py
rename to flowcraft/tests/test_flowcraft.py
index 684b6ed6..5810978d 100644
--- a/flowcraft/tests/test_assemblerflow.py
+++ b/flowcraft/tests/test_flowcraft.py
@@ -51,12 +51,13 @@ def test_build_file_2(tmp):
                         "{}".format(p), "--pipeline-only"])
     af.build(args)
 
-    assert sorted(os.listdir(tmp)) == [".forkTree.json", ".treeDag.json",
-                                       "containers.config",
+    assert sorted(os.listdir(tmp)) == ["containers.config",
                                        "lib", "nextflow.config", "params.config",
-                                       "resources.config", "teste.html",
+                                       "resources", "resources.config", "teste.html",
                                        "teste.nf", "user.config"]
 
+    assert sorted(os.listdir(os.path.join(tmp, "resources"))) == ["forkTree.json", "treeDag.json"]
+
 
 def test_build_recipe(tmp):
 

From 0e534cc07778cca33c5b9f7b0e5c863bd0fbc24c Mon Sep 17 00:00:00 2001
From: cimendes <iines.mendes@gmail.com>
Date: Mon, 16 Sep 2019 14:53:48 +0100
Subject: [PATCH 6/8] update changelog and fix bug on remove_host component

---
 changelog.md                                 | 2 ++
 flowcraft/generator/templates/remove_host.nf | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/changelog.md b/changelog.md
index 9e897e58..9dadd041 100644
--- a/changelog.md
+++ b/changelog.md
@@ -7,6 +7,8 @@
 - Fix bug in `downsample_fastq` where the resulting output files was not being saved in the `results` directory
 - Fix bug in `downsample_fastq` where the output files were being saved as broken symlinks when there was no 
 down-sampling occurring 
+- Moved `renamePE_samtoolsFASTQ.py` from `flowcraft/bin/` to the `flowcraft/templates` folder and updated it to 
+python3 - fix issue #219
 
 ### Minor/Other changes
 
diff --git a/flowcraft/generator/templates/remove_host.nf b/flowcraft/generator/templates/remove_host.nf
index 825f8f64..2ef44a0b 100644
--- a/flowcraft/generator/templates/remove_host.nf
+++ b/flowcraft/generator/templates/remove_host.nf
@@ -17,7 +17,7 @@ process remove_host_{{ pid }} {
     val clear from checkpointClear_{{ pid }}
 
     output:
-    set sample_id , file("${sample_id}*.headersRenamed_*.fq.gz") into OUT_remove_host_{{ pid }}
+    set sample_id , file("${sample_id}_unmapped_*.fq") into OUT_remove_host_{{ pid }}
     set sample_id, file("*_bowtie2.log") into into_json_{{ pid }}
     {% with task_name="remove_host" %}
     {%- include "compiler_channels.txt" ignore missing -%}
@@ -59,7 +59,7 @@ process renamePE_{{ pid }} {
     publishDir ''
 
     input:
-    set sample_if, file(fastq_pair} from OUT_remove_host_{{ pid }}
+    set sample_if, file{fastq_pair} from OUT_remove_host_{{ pid }}
 
     output:
     set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }}

From cc6ac866f28330890be4b464f61042371abf9683 Mon Sep 17 00:00:00 2001
From: cimendes <iines.mendes@gmail.com>
Date: Mon, 16 Sep 2019 18:47:20 +0100
Subject: [PATCH 7/8] fixed a bunch of bugs in "renamePE" processes and
 template

---
 flowcraft/generator/templates/remove_host.nf  |  7 ++-
 .../generator/templates/retrieve_mapped.nf    |  3 +-
 flowcraft/templates/renamePE_samtoolsFASTQ.py | 46 +++++++++++++------
 3 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/flowcraft/generator/templates/remove_host.nf b/flowcraft/generator/templates/remove_host.nf
index 2ef44a0b..b7312454 100644
--- a/flowcraft/generator/templates/remove_host.nf
+++ b/flowcraft/generator/templates/remove_host.nf
@@ -55,11 +55,14 @@ process remove_host_{{ pid }} {
 
 process renamePE_{{ pid }} {
 
+    // Send POST request to platform
+    {% include "post.txt" ignore missing %}
+
     tag { sample_id }
-    publishDir ''
+    publishDir 'results/mapping/remove_host_{{ pid }}/'
 
     input:
-    set sample_if, file{fastq_pair} from OUT_remove_host_{{ pid }}
+    set sample_id, file(fastq_pair) from OUT_remove_host_{{ pid }}
 
     output:
     set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }}
diff --git a/flowcraft/generator/templates/retrieve_mapped.nf b/flowcraft/generator/templates/retrieve_mapped.nf
index ee4356e1..19424f38 100644
--- a/flowcraft/generator/templates/retrieve_mapped.nf
+++ b/flowcraft/generator/templates/retrieve_mapped.nf
@@ -4,7 +4,6 @@ process retrieve_mapped_{{ pid }} {
     {% include "post.txt" ignore missing %}
 
     tag { sample_id }
-    publishDir 'results/mapping/retrieve_mapped_{{ pid }}/'
 
     input:
     set sample_id, file(bam) from {{ input_channel }}
@@ -31,7 +30,7 @@ process retrieve_mapped_{{ pid }} {
 process renamePE_{{ pid }} {
 
     tag { sample_id }
-    publishDir ''
+    publishDir 'results/mapping/retrieve_mapped_{{ pid }}/'
 
     input:
     set sample_if, file(fastq_pair} from OUT_retrieve_mapped_{{ pid }}
diff --git a/flowcraft/templates/renamePE_samtoolsFASTQ.py b/flowcraft/templates/renamePE_samtoolsFASTQ.py
index c3fa582f..a9da48d5 100755
--- a/flowcraft/templates/renamePE_samtoolsFASTQ.py
+++ b/flowcraft/templates/renamePE_samtoolsFASTQ.py
@@ -9,6 +9,8 @@
 import zipfile
 from flowcraft_utils.flowcraft_base import get_logger, MainWrapper
 
+logger = get_logger(__file__)
+
 """
 Purpose
 -------
@@ -116,7 +118,7 @@ def formartFastqHeaders(sample_name, in_fastq_1, in_fastq_2):
 
     outfiles = [out_fastq_1, out_fastq_2]
 
-    with open(in_fastq_1, 'rtU') as reader_in_fastq_1, open(in_fastq_2, 'rtU') as reader_in_fastq_2:
+    with open(in_fastq_1, 'r') as reader_in_fastq_1, open(in_fastq_2, 'r') as reader_in_fastq_2:
         plus_line = True
         quality_line = True
         number_reads = 0
@@ -127,21 +129,21 @@ def formartFastqHeaders(sample_name, in_fastq_1, in_fastq_2):
 
                 if in_1.startswith('@') and plus_line and quality_line:
                     if in_1 != in_2:
-                      sys.exit('The PE fastq files are not aligned properly!')
-                    in_1 += '/1' + '\n'
-                    in_2 += '/2' + '\n'
+                        sys.exit('The PE fastq files are not aligned properly!')
+                    in_1 += '/1' + '\\n'
+                    in_2 += '/2' + '\\n'
                     writer_in_fastq_1.write(in_1)
                     writer_in_fastq_2.write(in_2)
                     plus_line = False
                     quality_line = False
                 elif in_1.startswith('+') and not plus_line:
-                    in_1 += '\n'
+                    in_1 += '\\n'
                     writer_in_fastq_1.write(in_1)
                     writer_in_fastq_2.write(in_1)
                     plus_line = True
                 elif plus_line and not quality_line:
-                    in_1 += '\n'
-                    in_2 += '\n'
+                    in_1 += '\\n'
+                    in_2 += '\\n'
                     writer_in_fastq_1.write(in_1)
                     writer_in_fastq_2.write(in_2)
                     writer_in_fastq_1.flush()
@@ -149,10 +151,14 @@ def formartFastqHeaders(sample_name, in_fastq_1, in_fastq_2):
                     number_reads += 1
                     quality_line = True
                 else:
-                    in_1 += '\n'
-                    in_2 += '\n'
+                    in_1 += '\\n'
+                    in_2 += '\\n'
                     writer_in_fastq_1.write(in_1)
                     writer_in_fastq_2.write(in_2)
+
+    writer_in_fastq_1.close()
+    writer_in_fastq_2.close()
+
     return number_reads, outfiles
 
 
@@ -164,26 +170,36 @@ def main(sample_id, fastq_files):
 
     for fastq in fastq_files:
 
-         logger.info("Processing file {}".format(fastq))
+        logger.info("Processing file {}".format(fastq))
 
-         logger.info("[{}] Guessing file compression".format(fastq))
-         ftype = guess_file_compression(fastq)
+        logger.info("[{}] Guessing file compression".format(fastq))
+        ftype = guess_file_compression(fastq)
 
         # This can guess the compression of gz, bz2 and zip. If it cannot
         # find the compression type, it tries to open a regular file.
         if ftype:
             logger.info("[{}] Found file compression: {}".format(fastq, ftype))
             file_objects.append(COPEN[ftype](fastq, "rt"))
+
         else:
             logger.info("[{}] File compression not found. Assuming an uncompressed file".format(fastq))
-            file_objects.append(open(fastq))
+            file_objects.append(fastq)
 
     logger.info('Renaming fastq headers')
     number_reads, outfiles = formartFastqHeaders(sample_id, file_objects[0], file_objects[1])
 
-    logger.info('{} read pairs were written in {} outfiles'.format(number_reads, outfiles))
+    logger.info('{} read pairs were written in {} and {}. Compressing...'.format(number_reads, outfiles[0], outfiles[1]))
+
+    # compress outfiles
+    for file in outfiles:
+        with open(file, 'rb') as f_in:
+            f_out = gzip.open(file + '.gz', 'wb')
+            f_out.writelines(f_in)
+            f_out.close()
+    logger.info('DONE')
 
-    os.remove(fastq_files)
+    os.remove(outfiles[0])
+    os.remove((outfiles[1]))
 
 
 if __name__ == "__main__":

From 05b3dcaba4f6d49b3b0df47b73b4555cc70c62fd Mon Sep 17 00:00:00 2001
From: cimendes <iines.mendes@gmail.com>
Date: Mon, 16 Sep 2019 21:31:15 +0100
Subject: [PATCH 8/8] fix issues with bugs in "renamePE" process in the
 "retrieve_mapped" component

---
 flowcraft/generator/templates/retrieve_mapped.nf | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/flowcraft/generator/templates/retrieve_mapped.nf b/flowcraft/generator/templates/retrieve_mapped.nf
index 19424f38..be371b17 100644
--- a/flowcraft/generator/templates/retrieve_mapped.nf
+++ b/flowcraft/generator/templates/retrieve_mapped.nf
@@ -32,8 +32,10 @@ process renamePE_{{ pid }} {
     tag { sample_id }
     publishDir 'results/mapping/retrieve_mapped_{{ pid }}/'
 
+    {% include "post.txt" ignore missing %}
+
     input:
-    set sample_if, file(fastq_pair} from OUT_retrieve_mapped_{{ pid }}
+    set sample_id, file(fastq_pair) from OUT_retrieve_mapped_{{ pid }}
 
     output:
     set sample_id , file("*.headersRenamed_*.fq.gz") into {{ output_channel }}