Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
logs
.snakemake
site
__pycache__
output
.tests/illumnia_demux/dry_run_out
.tests/dry_run_out
16 changes: 8 additions & 8 deletions scripts/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_current_server():
# biowulf hostnames
re_biowulf_head = (r"biowulf\.nih\.gov", "biowulf")
re_biowulf_compute = (r"cn\d{4}", "biowulf")

# skyline hostnames
re_skyline_head = (r"ai-hpc(submit|n)(\d+)?", "skyline")
re_skyline_compute = (r"ai-hpc(submit|n)(\d+)?", "skyline")
Expand All @@ -48,15 +48,15 @@ def get_current_server():
FRCE_PATH = "COVID-19_Consortium"


# ~~~ labkey configurations ~~~
# ~~~ labkey configurations ~~~
CONTEXT_PATH = "labkey"
LABKEY_CONFIGS = {
"bigsky": {"domain": BIGSKY_DEV, "container_path": BIGSKY_PATH, "context_path": CONTEXT_PATH, "use_ssl": True},
"frce": {"domain": FRCE_PROD, "container_path": FRCE_PATH, "context_path": CONTEXT_PATH, "use_ssl": True}
}


# ~~~ snakemake configurations ~~~
# ~~~ snakemake configurations ~~~
illumina_pipelines = defaultdict(lambda: Path(Path(__file__).parent.parent, "workflow", "Snakefile").resolve())
# can add support for NextSeq2k and bclconvert here
SNAKEFILE = {
Expand All @@ -69,7 +69,7 @@ def get_current_server():


def get_resource_config():
"""Return a dictionary containing server specific references utilized in
"""Return a dictionary containing server specific references utilized in
the workflow for directories or reference files.

Returns:
Expand Down Expand Up @@ -141,7 +141,7 @@ def get_bigsky_seq_dirs():
for this_child_elem in this_dir.iterdir():
try:
elem_checks = [
this_child_elem.is_dir(),
this_child_elem.is_dir(),
Path(this_child_elem, transfer_breadcrumb).exists(),
check_access(this_child_elem, R_OK)
]
Expand All @@ -155,13 +155,13 @@ def get_bigsky_seq_dirs():
def get_tmp_dir(host):
TMP_CONFIGS = {
'skyline': {'user': '/data/scratch/$USER/$SLURM_JOBID', 'global': '/data/scratch/$USER/' + str(uuid4())},
'bigsky': {'user': '/gs1/Scratch/$USER/$SLURM_JOBID', 'global': '/gs1/Scratch/$USER/' + str(uuid4())},
'bigsky': {'user': '/data/scratch/$USER/$SLURM_JOBID', 'global': '/data/scratch/$USER/' + str(uuid4())},
'biowulf': {'user': '/lscratch/$SLURM_JOBID', 'global': '/tmp/$USER/' + str(uuid4())}
}

this_tmp = TMP_CONFIGS[host]['user']

# this directory, if it does not exist,
# this directory, if it does not exist,
if Path(this_tmp).parents[0].exists():
return this_tmp
else:
Expand Down Expand Up @@ -222,4 +222,4 @@ def get_tmp_dir(host):
"mesaur": "/data/openomics/references/genomes/mesaur/2.0/GCF_017639785.1_BCM_Maur_2.0_genomic.fna.gz",
"cynomac": "/data/openomics/references/genomes/cynomac/v2/GCF_012559485.2_MFA1912RKSv2_genomic.fna.gz",
},
}
}
16 changes: 8 additions & 8 deletions scripts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

def get_all_seq_dirs(top_dir, server):
"""
Gather and return all sequencing directories from the `top_dir`.
Gather and return all sequencing directories from the `top_dir`.
This is tightly coupled at the moment to the directory that is on RML-BigSky.
In the future will need to the take a look at how to do this more generally
"""
Expand Down Expand Up @@ -42,7 +42,7 @@ def valid_run_output(output_directory, dry_run=False):
output_directory = Path(output_directory).absolute()
if not output_directory.exists():
output_directory.mkdir(parents=True, mode=0o765)

if not check_access(output_directory, W_OK):
raise PermissionError(f'Can not write to output directory {output_directory}')
return output_directory
Expand Down Expand Up @@ -70,7 +70,7 @@ def valid_fasta(suspect):

if not is_valid:
raise ValueError

return suspect


Expand Down Expand Up @@ -147,7 +147,7 @@ def find_demux_dir(run_dir):

if len(demux_stat_files) != 1:
raise FileNotFoundError

return Path(demux_stat_files[0], '..').absolute()


Expand All @@ -160,7 +160,7 @@ def get_run_directories(runids, seq_dir=None, sheetname=None):
for secondchild in firstchild.iterdir():
seq_contents.append(secondchild)
seq_contents_names = [child for child in map(lambda d: d.name, seq_contents)]

run_paths, invalid_runs = [], []
run_return = []
for run in runids:
Expand Down Expand Up @@ -192,7 +192,7 @@ def get_run_directories(runids, seq_dir=None, sheetname=None):
sheet = Path(run_p, sheetname).absolute()
else:
raise FileNotFoundError(f'Run {rid}({run_p}) does not have a find-able sample sheet.')

this_run_info['samplesheet'] = parse_samplesheet(sheet)
this_run_info.update({info.tag: info.text for run in runinfo_xml.getroot() for info in run \
if info.text is not None and info.text.strip() not in ('\n', '')})
Expand All @@ -201,5 +201,5 @@ def get_run_directories(runids, seq_dir=None, sheetname=None):
if invalid_runs:
raise ValueError('Runs entered are invalid (missing sequencing artifacts or directory does not exist): \n' + \
', '.join(invalid_runs))
return run_return

return run_return
22 changes: 11 additions & 11 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def valid_run_input(run):


def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None):
# async execution w/ filter:
# async execution w/ filter:
# - https://gist.github.com/DGrady/b713db14a27be0e4e8b2ffc351051c7c
# - https://lysator.liu.se/~bellman/download/asyncproc.py
# - https://gist.github.com/kalebo/1e085ee36de45ffded7e5d9f857265d0
Expand All @@ -113,7 +113,6 @@ def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None):
popen_kwargs['cwd'] = cwd
else:
popen_kwargs['cwd'] = str(Path.cwd())

parent_jobid = None
if local or dry_run:
popen_kwargs['env'].update(os.environ)
Expand Down Expand Up @@ -170,9 +169,7 @@ def get_mods(init=False):
mod_cmd = []

if host == 'bigsky':
mod_cmd.append('source /gs1/apps/user/rmlspack/share/spack/setup-env.sh')
mod_cmd.append('spack load miniconda3@4.11.0')
mod_cmd.append('source activate snakemake7-19-1')
mod_cmd.append('module load snakemake/7.22.0-ufanewz')
elif host == 'skyline':
mod_cmd.append('source /data/openomics/bin/dependencies.sh')
elif host == 'biowulf':
Expand Down Expand Up @@ -220,8 +217,9 @@ def get_mounts(*extras):
raise FileNotFoundError(f"Can't mount {str(bind)}, it doesn't exist!")
file_to, file_from, mode = str(bind), str(bind), 'rw'
mounts.append(file_from + ':' + file_to + ':' + mode)

mounts.append(r'\$TMPDIR:/tmp:rw')

if 'TMPDIR' in os.environ:
mounts.append(os.environ['TMPDIR'] + ':/tmp:rw')

return ','.join(mounts)

Expand Down Expand Up @@ -265,13 +263,15 @@ def exec_pipeline(configs, dry_run=False, local=False):
top_env['PATH'] = os.environ["PATH"]
top_env['SNK_CONFIG'] = str(config_file.absolute())
top_env['SINGULARITY_CACHEDIR'] = str(Path(this_config['out_to'], '.singularity').absolute())
top_env['SINGULARITY_CONTAINALL'] = '1'
top_env['APPTAINER_CONTAINALL'] = '1'
this_cmd = [
"snakemake", "-p", "--use-singularity", "--rerun-incomplete", "--keep-incomplete",
"--rerun-triggers", "mtime", "--verbose", "-s", snake_file,
"snakemake", "-p", "--cores", "2", "--use-singularity", "--rerun-incomplete", "--keep-incomplete",
"--rerun-triggers", "mtime", "--verbose", "-s", str(snake_file),
]

if singularity_binds and not dry_run:
this_cmd.extend(["--singularity-args", f"\"--env 'TMPDIR=/tmp' -C -B '{singularity_binds}'\""])
this_cmd.extend(["--singularity-args", f"\"-B '{singularity_binds}'\""])

if dry_run:
print(f"{esc_colors.OKGREEN}> {esc_colors.ENDC}{esc_colors.UNDERLINE}Dry run{esc_colors.ENDC} " + \
Expand Down Expand Up @@ -314,4 +314,4 @@ def valid_host_pathogen_genomes(host, pathogen):
if not g2:
raise ValueError('Pathogen genome does not exist on the file system.')

return host, pathogen
return host, pathogen
4 changes: 2 additions & 2 deletions workflow/fastq.smk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ rule kaiju_annotation:
log: config['out_to'] + "/logs/" + config["project"] + "/kaiju/{sids}.log",
threads: 24
resources:
mem_mb = 220000,
mem_mb = 300000,
runtime = 60*24*2
shell:
"""
Expand Down Expand Up @@ -109,7 +109,7 @@ rule kraken_annotation:
log: config['out_to'] + "/logs/" + config["project"] + "/kraken/{sids}.log",
threads: 24
resources:
mem_mb = 220000,
mem_mb = 300000,
runtime = 60*24*2
shell:
"""
Expand Down