Skip to content

enhance operations #158

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,4 @@ testdemo.py
/test_data/multi_fast5_demo.tar.gz
/test_data/na12878_chr22_p3_100.tar.gz
/test_data/NA19240_RRBS_ENCFF000LZS_chr22.txt.gz
/test.nf
18 changes: 16 additions & 2 deletions modules/UNTAR.nf
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,27 @@ process UNTAR {
### deal with tar.gz
tar -xzf !{fast5Input} -C untarTempDir
elif [[ -d !{fast5Input} ]]; then
## Copy files, do not change original files such as old analyses data
find !{fast5Input}/ -name '*.fast5' | \
## For dir, should copy files, we do not want to change original files such as old analyses data in fast5
find !{fast5Input}/ \\( -name "*.fast5" -o -name "*.pod5" \\) | \
parallel -j!{cores} cp -L -f {} untarTempDir/
else
echo "### Untar error for input=!{fast5Input}"
fi

# convert pod5 to fast5
if [[ !{params.pod5} == true ]] ; then
mv untarTempDir untarTempDir_v2
mkdir -p untarTempDir_v3
find untarTempDir_v2/ -name '*.pod5' -type f |
parallel -j0 mv {} untarTempDir_v3/ -f

mkdir -p untarTempDir
pod5 convert to_fast5 untarTempDir_v3/ \
--out untarTempDir/ \
-t !{cores} -f
rm -rf untarTempDir_v2 untarTempDir_v3
fi

if [[ !{params.multi_to_single_fast5} == true ]] ; then
echo "### Do multi_to_single_fast5"
untarTempDir=untarTempDir2
Expand Down
58 changes: 58 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ params {
clair3_docker_name = "hkubal/clair3:latest" // used for variant call
deepsignal2_docker_name = "liuyangzzu/deepsignal2:v1.0" // used for deepsignal v2
guppy_stable_name = "liuyangzzu/guppy_stable:v6.3.8" // solve issues for guppy meth call in v6.4.6
preprocessing_docker_name = "liuyangzzu/preprocessing" // for untar preprocessing

// process and executor configurations
executor = null
Expand All @@ -47,6 +48,7 @@ params {
dsname = null
input = null
outdir = "results"
pod5 = false

// Data type, can be human, ecoli, etc.
genome = "hg38"
Expand Down Expand Up @@ -267,6 +269,9 @@ profiles {
container = params.docker_name
containerOptions = params.containerOptions // or "--gpus all" Note: this is not compatible with GitHub citest/naive docker users

withName: 'UNTAR' {
container = params.preprocessing_docker_name
}
withName: 'Tombo|DeepMod|METEORE' {
container = params.tombo_docker_name
}
Expand Down Expand Up @@ -303,6 +308,10 @@ profiles {
container = params.singularity_name
containerOptions = params.containerOptions // "--nv"

withName: 'UNTAR' {
container = params.preprocessing_docker_name.startsWith("/") ?
params.preprocessing_docker_name : "docker://${params.tombo_docker_name}"
}
withName: 'Tombo|DeepMod|METEORE' {
// container = "docker://${params.tombo_docker_name}"
// reserve local image for singularity, or append docker:// header for online image
Expand Down Expand Up @@ -432,6 +441,55 @@ profiles {
}
}

sumner2 {
params{
max_cpus = 72
max_memory = 768.GB

gpu_queue = 'gpus' // winter only have one partition
gpu_qos = 'gpu_training' // or use training, time can be 14 days
gpu_processors = 8
gpu_memory = '128GB'
gpu_time = '14.d'
gpu_gresOptions = 'gpu:1' // null/false if no gpu needed

cpu_queue = 'compute' // winter only have one partition
cpu_qos = 'long' // or use training, time can be 14 days
cpu_processors = 8
cpu_memory = '128GB'
cpu_time = '14.d'

queueSize = 24
}

process{
executor = "slurm"
module = "slurm:singularity"

withName: 'UNTAR|ALIGNMENT|QCEXPORT|RESQUIGGLE|NANOPOLISH|Tombo|Guppy6Comb|METEORE|CLAIR3|PHASING|CONSENSUS|EVAL|REPORT|NPLSHCOMB|MGLDNCOMB|DPSIGCOMB|DEEPSIGNAL2COMB|GuppyComb|TomboComb|DpmodComb|Guppy6Comb' {
queue = params.cpu_queue
cpus = params.cpu_processors
memory = params.cpu_memory
time = params.cpu_time
clusterOptions = "-q ${params.cpu_qos}"
}

withName: 'ENVCHECK|BASECALL|MEGALODON|Guppy6|Guppy|DEEPSIGNAL|DEEPSIGNAL2|DeepMod|METEORE' {
queue = params.gpu_queue
cpus = params.gpu_processors
memory = params.gpu_memory
time = params.gpu_time
clusterOptions = "-q ${params.gpu_qos} ${params.gpu_gresOptions ? '--gres=' + params.gpu_gresOptions : ' '}"
}


}

executor {
queueSize = params.queueSize
}
}

// Google cloud computing platform
// ref doc: https://www.nextflow.io/docs/latest/google.html
// ref doc: https://www.nextflow.io/docs/latest/google.html#configuration
Expand Down
46 changes: 46 additions & 0 deletions other_envs/preprocessing/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# @Author : Yang Liu
# @FileName : Dockerfile
# @Software : NANOME project
# @Organization : JAX Li Lab
# @Website : https://github.com/LabShengLi/nanome

FROM ubuntu

# Author and maintainer
MAINTAINER Yang Liu <yang.liu@jax.org>
LABEL description="Nanome project in Li Lab at The Jackson Laboratory" \
author="yang.liu@jax.org"

ARG BUILD_PACKAGES="wget apt-transport-https procps git curl libnvidia-compute-460-server"
ARG DEBIAN_FRONTEND="noninteractive"

RUN apt-get -q update && \
DEBIAN_FRONTEND="noninteractive" apt-get -q install --yes ${BUILD_PACKAGES} && \
apt-get autoremove --purge --yes && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

#Install miniconda
RUN wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O Miniconda.sh && \
/bin/bash Miniconda.sh -b -p /opt/conda && \
rm Miniconda.sh

# Adding conda to PATH
ENV PATH /opt/conda/bin:$PATH

# Create the environment:
COPY preprocessing_env.yml /
RUN conda env create --name preprocessing --file=preprocessing_env.yml && conda clean -a

# Make RUN commands use the new environment:
SHELL ["conda", "run", "-n", "preprocessing", "/bin/bash", "-c"]

# Set env path into PATH
ENV PATH /opt/conda/envs/preprocessing/bin:$PATH
ENV LD_LIBRARY_PATH /opt/conda/envs/preprocessing/lib:$LD_LIBRARY_PATH
ENV HDF5_PLUGIN_PATH /opt/conda/envs/preprocessing/hdf5/lib/plugin:$HDF5_PLUGIN_PATH

USER root
WORKDIR /data/

CMD ["bash"]
21 changes: 21 additions & 0 deletions other_envs/preprocessing/preprocessing_env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# @Author : Yang Liu
# @FileName : preprocessing_env.yml
# @Software : NANOME project
# @Organization : JAX Li Lab
# @Website : https://github.com/LabShengLi/nanome
name: preprocessing
channels:
- bioconda
- conda-forge
dependencies:
- python=3.9
- pip
- parallel
- h5py
- numpy
- pip:
- ont-fast5-api
- pod5

# how to use:
# conda env create --name preprocessing --file=preprocessing_env.yml