LabShengLi · liuyang2006 · Jan 8, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -62,3 +62,4 @@ testdemo.py
 /test_data/multi_fast5_demo.tar.gz
 /test_data/na12878_chr22_p3_100.tar.gz
 /test_data/NA19240_RRBS_ENCFF000LZS_chr22.txt.gz
+/test.nf
diff --git a/modules/UNTAR.nf b/modules/UNTAR.nf
@@ -39,13 +39,27 @@ process UNTAR {
 			### deal with tar.gz
 			tar -xzf !{fast5Input} -C untarTempDir
 		elif [[ -d !{fast5Input} ]]; then
-			## Copy files, do not change original files such as old analyses data
-			find !{fast5Input}/ -name '*.fast5' | \
+			## For dir, should copy files, we do not want to change original files such as old analyses data in fast5
+			find !{fast5Input}/ \\( -name "*.fast5" -o -name "*.pod5" \\)  | \
 				parallel -j!{cores}  cp -L -f {} untarTempDir/
 		else
 			echo "### Untar error for input=!{fast5Input}"
 		fi
 
+		# convert pod5 to fast5
+		if [[ !{params.pod5} == true ]] ; then
+			mv untarTempDir untarTempDir_v2
+			mkdir -p untarTempDir_v3
+			find untarTempDir_v2/ -name '*.pod5' -type f |
+				parallel -j0 mv {} untarTempDir_v3/ -f
+
+			mkdir -p untarTempDir
+			pod5 convert to_fast5 untarTempDir_v3/ \
+				--out untarTempDir/ \
+				-t !{cores} -f
+			rm -rf untarTempDir_v2 untarTempDir_v3
+		fi
+
 		if [[ !{params.multi_to_single_fast5} == true ]] ; then
 			echo "### Do multi_to_single_fast5"
 			untarTempDir=untarTempDir2

diff --git a/nextflow.config b/nextflow.config
@@ -28,6 +28,7 @@ params {
 	clair3_docker_name = "hkubal/clair3:latest"  // used for variant call
 	deepsignal2_docker_name = "liuyangzzu/deepsignal2:v1.0" // used for deepsignal v2
 	guppy_stable_name = "liuyangzzu/guppy_stable:v6.3.8"  // solve issues for guppy meth call in v6.4.6
+	preprocessing_docker_name = "liuyangzzu/preprocessing" // for untar preprocessing
 
 	// process and executor configurations
 	executor = null
@@ -47,6 +48,7 @@ params {
 	dsname = null
 	input = null
 	outdir = "results"
+	pod5 = false
 
 	// Data type, can be human, ecoli, etc.
 	genome = "hg38"
@@ -267,6 +269,9 @@ profiles {
 			container = params.docker_name
 			containerOptions = params.containerOptions // or "--gpus all" Note: this is not compatible with GitHub citest/naive docker users
 
+			withName: 'UNTAR' {
+				container = params.preprocessing_docker_name
+			}
 			withName: 'Tombo|DeepMod|METEORE' {
 				container = params.tombo_docker_name
 			}
@@ -303,6 +308,10 @@ profiles {
 			container = params.singularity_name
 			containerOptions = params.containerOptions // "--nv"
 
+			withName: 'UNTAR' {
+				container = params.preprocessing_docker_name.startsWith("/") ?
+								params.preprocessing_docker_name : "docker://${params.tombo_docker_name}"
+			}
 			withName: 'Tombo|DeepMod|METEORE' {
 				// container = "docker://${params.tombo_docker_name}"
 				// reserve local image for singularity, or append docker:// header for online image
@@ -432,6 +441,55 @@ profiles {
 		}
 	}
 
+	sumner2 {
+		params{
+			max_cpus = 72
+			max_memory = 768.GB
+
+			gpu_queue = 'gpus' // winter only have one partition
+			gpu_qos = 'gpu_training' // or use training, time can be 14 days
+			gpu_processors = 8
+			gpu_memory = '128GB'
+			gpu_time = '14.d'
+			gpu_gresOptions = 'gpu:1' // null/false if no gpu needed
+
+			cpu_queue = 'compute' // winter only have one partition
+			cpu_qos = 'long' // or use training, time can be 14 days
+			cpu_processors = 8
+			cpu_memory = '128GB'
+			cpu_time = '14.d'
+
+			queueSize = 24
+		}
+
+		process{
+			executor = "slurm"
+			module = "slurm:singularity"
+
+			withName: 'UNTAR|ALIGNMENT|QCEXPORT|RESQUIGGLE|NANOPOLISH|Tombo|Guppy6Comb|METEORE|CLAIR3|PHASING|CONSENSUS|EVAL|REPORT|NPLSHCOMB|MGLDNCOMB|DPSIGCOMB|DEEPSIGNAL2COMB|GuppyComb|TomboComb|DpmodComb|Guppy6Comb' {
+				queue = params.cpu_queue
+				cpus = params.cpu_processors
+				memory = params.cpu_memory
+				time = params.cpu_time
+				clusterOptions = "-q ${params.cpu_qos}"
+			}
+
+			withName: 'ENVCHECK|BASECALL|MEGALODON|Guppy6|Guppy|DEEPSIGNAL|DEEPSIGNAL2|DeepMod|METEORE' {
+				queue = params.gpu_queue
+				cpus = params.gpu_processors
+				memory = params.gpu_memory
+				time = params.gpu_time
+				clusterOptions = "-q ${params.gpu_qos}  ${params.gpu_gresOptions  ? '--gres=' + params.gpu_gresOptions : '  '}"
+			}
+
+
+		}
+
+		executor {
+			queueSize = params.queueSize
+		}
+	}
+
 	// Google cloud computing platform
 	// ref doc: https://www.nextflow.io/docs/latest/google.html
 	// ref doc: https://www.nextflow.io/docs/latest/google.html#configuration

diff --git a/other_envs/preprocessing/Dockerfile b/other_envs/preprocessing/Dockerfile
@@ -0,0 +1,46 @@
+# @Author   : Yang Liu
+# @FileName : Dockerfile
+# @Software : NANOME project
+# @Organization : JAX Li Lab
+# @Website  : https://github.com/LabShengLi/nanome
+
+FROM ubuntu
+
+# Author and maintainer
+MAINTAINER Yang Liu <yang.liu@jax.org>
+LABEL description="Nanome project in Li Lab at The Jackson Laboratory" \
+      author="yang.liu@jax.org"
+
+ARG BUILD_PACKAGES="wget apt-transport-https procps git curl libnvidia-compute-460-server"
+ARG DEBIAN_FRONTEND="noninteractive"
+
+RUN apt-get -q update && \
+    DEBIAN_FRONTEND="noninteractive" apt-get -q install --yes ${BUILD_PACKAGES} && \
+    apt-get autoremove --purge --yes && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+#Install miniconda
+RUN wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O Miniconda.sh && \
+    /bin/bash Miniconda.sh -b -p /opt/conda && \
+    rm Miniconda.sh
+
+# Adding conda to PATH
+ENV PATH /opt/conda/bin:$PATH
+
+# Create the environment:
+COPY preprocessing_env.yml /
+RUN conda env create --name preprocessing --file=preprocessing_env.yml && conda clean -a
+
+# Make RUN commands use the new environment:
+SHELL ["conda", "run", "-n", "preprocessing", "/bin/bash", "-c"]
+
+# Set env path into PATH
+ENV PATH /opt/conda/envs/preprocessing/bin:$PATH
+ENV LD_LIBRARY_PATH /opt/conda/envs/preprocessing/lib:$LD_LIBRARY_PATH
+ENV HDF5_PLUGIN_PATH /opt/conda/envs/preprocessing/hdf5/lib/plugin:$HDF5_PLUGIN_PATH
+
+USER root
+WORKDIR /data/
+
+CMD ["bash"]
diff --git a/other_envs/preprocessing/preprocessing_env.yml b/other_envs/preprocessing/preprocessing_env.yml
@@ -0,0 +1,21 @@
+# @Author   : Yang Liu
+# @FileName : preprocessing_env.yml
+# @Software : NANOME project
+# @Organization : JAX Li Lab
+# @Website  : https://github.com/LabShengLi/nanome
+name: preprocessing
+channels:
+  - bioconda
+  - conda-forge
+dependencies:
+  - python=3.9
+  - pip
+  - parallel
+  - h5py
+  - numpy
+  - pip:
+      - ont-fast5-api
+      - pod5
+
+# how to use:
+# conda env create --name preprocessing --file=preprocessing_env.yml