Skip to content

Commit ac9e36c

Browse files
authored
Merge pull request #281 from sigven/read_support_patch
Read support patch
2 parents 98712e0 + 8d11270 commit ac9e36c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+2084
-526
lines changed

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ PCGR originates from the [Norwegian Cancer Genomics Consortium (NCGC)](https://c
2828

2929
### Top News
3030

31+
- *September 17th 2025:* **2.2.5 release**
32+
- fixing missing support for dp/af filtering, adding ad filtering
33+
- skip processing when no PASS variants detected in input VCF
34+
- more streamlined plotting functions in quarto report templates
35+
- [CHANGELOG](https://sigven.github.io/pcgr/articles/CHANGELOG.html)
36+
3137
- *September 8th 2025:* **2.2.4 release**
3238
- various minor bug fixes, addition of `--sex` option for sex-adjusted CNA annotation
3339
- [CHANGELOG](https://sigven.github.io/pcgr/articles/CHANGELOG.html)
@@ -87,7 +93,7 @@ PCGR originates from the [Norwegian Cancer Genomics Consortium (NCGC)](https://c
8793

8894
### Example reports
8995

90-
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15068347.svg)](https://doi.org/10.5281/zenodo.15068347)
96+
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17140659.svg)](https://doi.org/10.5281/zenodo.17140659)
9197

9298
### Why use PCGR?
9399

pcgr/annoutils.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def write_pass_vcf(annotated_vcf, logger):
100100
"""
101101
#out_vcf = re.sub(r'\.annotated\.vcf\.gz$','.annotated.pass.vcf',annotated_vcf)
102102
out_vcf = re.sub(r'\.vcf\.gz$', '.pass.vcf', annotated_vcf)
103+
103104
vcf = VCF(annotated_vcf)
104105
w = Writer(out_vcf, vcf)
105106

@@ -118,16 +119,21 @@ def write_pass_vcf(annotated_vcf, logger):
118119
logger.info('Number of non-PASS/REJECTED variant calls: ' +
119120
str(num_rejected))
120121
logger.info('Number of PASSed variant calls: ' + str(num_pass))
122+
123+
124+
vcf_no_pass_variants = False
121125
if num_pass == 0:
126+
vcf_no_pass_variants = True
122127
logger.warning(
123128
'There are zero variants with a \'PASS\' filter in the VCF file')
124-
os.system('bgzip -dc ' + str(annotated_vcf) +
125-
' egrep \'^#\' > ' + str(out_vcf))
129+
os.system(f"bgzip -dc {annotated_vcf} | egrep '^#' > {out_vcf}")
126130
# else:
127-
os.system('bgzip -f ' + str(out_vcf))
128-
os.system('tabix -f -p vcf ' + str(out_vcf) + '.gz')
131+
os.system(f'bgzip -f {out_vcf}')
132+
os.system(f'tabix -f -p vcf {out_vcf}.gz')
133+
134+
#exit(-1)
129135

130-
return
136+
return(vcf_no_pass_variants)
131137

132138

133139
def map_regulatory_variant_annotations(vep_csq_records):

pcgr/arg_checker.py

Lines changed: 97 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -40,51 +40,118 @@ def verify_args(arg_dict):
4040

4141
# check that tumor purity and tumor ploidy is set correctly
4242
if arg_dict['tumor_purity'] is not None:
43-
if not (arg_dict['tumor_purity'] > 0 and arg_dict['tumor_purity'] <= 1):
43+
if not (float(arg_dict['tumor_purity']) > 0 and float(arg_dict['tumor_purity']) <= 1):
4444
err_msg = f"Tumor purity value ('--tumor_purity' = {arg_dict['tumor_purity']}) must be within (0, 1]"
4545
error_message(err_msg, logger)
4646

4747
if arg_dict['tumor_ploidy'] is not None:
48-
if not arg_dict['tumor_ploidy'] > 0:
48+
if not float(arg_dict['tumor_ploidy']) > 0:
4949
err_msg = f"Tumor ploidy value ('--tumor_ploidy' = {arg_dict['tumor_ploidy']}) must be > 0"
5050
error_message(err_msg, logger)
5151

52-
# check that minimum/maximum depth/allelic fractions are set correctly
53-
if int(arg_dict['tumor_dp_min']) < 0:
54-
err_msg = f"Minimum sequencing depth tumor ('tumor_dp_min' = {arg_dict['tumor_dp_min']}) must be >= 0"
52+
## check that allelic support tags are set correctly when minimum/maximum depth/allelic fractions are set
53+
if arg_dict['tumor_dp_tag'] == "_NA_" and arg_dict['tumor_dp_min'] is not None:
54+
err_msg = f"Minimum sequencing depth tumor ('tumor_dp_min' = {arg_dict['tumor_dp_min']}) requires '--tumor_dp_tag' to be set"
5555
error_message(err_msg, logger)
56-
57-
if float(arg_dict['tumor_af_min']) < 0 or float(arg_dict['tumor_af_min']) > 1:
58-
err_msg = f"Minimum AF tumor ('tumor_af_min' = {arg_dict['tumor_af_min']}) must be within [0, 1]"
56+
57+
if arg_dict['tumor_af_tag'] == "_NA_" and (arg_dict['tumor_af_min'] is not None or arg_dict['tumor_ad_min'] is not None):
58+
err_msg = f"Minimum AF/AD tumor ('tumor_af_min' = {arg_dict['tumor_af_min']}, 'tumor_ad_min' = {arg_dict['tumor_ad_min']}) requires '--tumor_af_tag' to be set"
5959
error_message(err_msg, logger)
60-
61-
if int(arg_dict['control_dp_min']) < 0:
62-
err_msg = f"Minimum sequencing depth control ('control_dp_min' = {arg_dict['control_dp_min']}) must be >= 0"
60+
61+
if arg_dict['control_dp_tag'] == "_NA_" and arg_dict['control_dp_min'] is not None:
62+
err_msg = f"Minimum sequencing depth control ('control_dp_min' = {arg_dict['control_dp_min']}) requires '--control_dp_tag' to be set"
6363
error_message(err_msg, logger)
64-
65-
if float(arg_dict['control_af_max']) < 0 or float(arg_dict['control_af_max']) > 1:
66-
err_msg = f"Maximum AF control ('control_af_max' = {arg_dict['control_af_max']}) must be within [0, 1]"
64+
65+
if arg_dict['control_af_tag'] == "_NA_" and (arg_dict['control_af_max'] is not None or arg_dict['control_ad_max'] is not None):
66+
err_msg = f"Maximum AF/AD control ('control_af_max' = {arg_dict['control_af_max']}, 'control_ad_max' = {arg_dict['control_ad_max']}) requires '--control_af_tag' to be set"
6767
error_message(err_msg, logger)
68+
69+
# check that minimum/maximum depth/allelic fractions are set correctly
70+
dp_tumor_set = False
71+
if arg_dict['tumor_dp_min'] is not None:
72+
dp_tumor_set = True
73+
if int(arg_dict['tumor_dp_min']) <= 0:
74+
err_msg = f"Minimum sequencing depth tumor ('tumor_dp_min' = {arg_dict['tumor_dp_min']}) must be > 0"
75+
error_message(err_msg, logger)
76+
77+
if arg_dict['tumor_af_min'] is not None:
78+
if (float(arg_dict['tumor_af_min']) > 1 or float(arg_dict['tumor_af_min']) < 0):
79+
err_msg = f"Minimum AF tumor ('tumor_af_min' = {arg_dict['tumor_af_min']}) must be within [0, 1]"
80+
error_message(err_msg, logger)
81+
82+
dp_control_set = False
83+
if arg_dict['control_dp_min'] is not None:
84+
dp_control_set = True
85+
if int(arg_dict['control_dp_min']) <= 0:
86+
err_msg = f"Minimum sequencing depth control ('control_dp_min' = {arg_dict['control_dp_min']}) must be > 0"
87+
error_message(err_msg, logger)
88+
89+
if arg_dict['tumor_ad_min'] is not None:
90+
err = 0
91+
if int(arg_dict['tumor_ad_min']) <= 0:
92+
err = 1
93+
if dp_tumor_set is True and int(arg_dict['tumor_ad_min']) > int(arg_dict['tumor_dp_min']):
94+
err = 1
95+
if err == 1:
96+
err_msg = (
97+
f"Minimum allelic depth tumor - ('tumor_ad_min' = {arg_dict['tumor_ad_min']}) must be > 0 "
98+
f"and less than or equal to minimum sequencing depth tumor ('tumor_dp_min' = {arg_dict['tumor_dp_min']})"
99+
)
100+
error_message(err_msg, logger)
101+
102+
if arg_dict['control_af_max'] is not None:
103+
if float(arg_dict['control_af_max']) < 0 or float(arg_dict['control_af_max']) > 1:
104+
err_msg = f"Maximum AF control ('control_af_max' = {arg_dict['control_af_max']}) must be within [0, 1]"
105+
error_message(err_msg, logger)
68106

107+
if arg_dict['control_ad_max'] is not None:
108+
err = 0
109+
if int(arg_dict['control_ad_max']) < 0:
110+
err = 1
111+
if dp_control_set is True and int(arg_dict['control_ad_max']) > int(arg_dict['control_dp_min']):
112+
err = 1
113+
if err == 1:
114+
err_msg = (
115+
f"Maximum allelic depth control - ('control_ad_max' = {arg_dict['control_ad_max']}) must be >= 0 "
116+
f"and less than or equal to minimum sequencing depth control ('control_dp_min' = {arg_dict['control_dp_min']})"
117+
)
118+
error_message(err_msg, logger)
69119

70-
# TMB: check that minimum/maximum depth/allelic fractions are set correctly
71-
if int(arg_dict['tmb_dp_min']) < 0:
72-
err_msg = f"Minimum sequencing depth tumor - TMB calculation ('tmb_dp_min' = {arg_dict['tmb_dp_min']}) must be >= 0"
73-
error_message(err_msg, logger)
74-
75-
if int(arg_dict['tmb_dp_min']) > 0 and (int(arg_dict['tmb_dp_min']) < int(arg_dict['tumor_dp_min'])):
76-
err_msg = f"Minimum sequencing depth (tumor) for TMB calculation ('tmb_dp_min' = {str(arg_dict['tmb_dp_min'])}) must be "
77-
err_msg += f"greater or equal to minimum sequencing depth tumor {str(arg_dict['tumor_dp_min'])} (i.e. filter for variant inclusion in report)"
78-
error_message(err_msg, logger)
120+
# TMB: check that minimum/maximum depth/allelic fractions are set correctly
121+
if arg_dict['tmb_dp_min'] is not None:
122+
if int(arg_dict['tmb_dp_min']) <= 0:
123+
err_msg = f"Minimum sequencing depth tumor - TMB calculation ('tmb_dp_min' = {arg_dict['tmb_dp_min']}) must be > 0"
124+
error_message(err_msg, logger)
125+
if dp_tumor_set is True and (int(arg_dict['tmb_dp_min']) < int(arg_dict['tumor_dp_min'])):
126+
err_msg = f"Minimum sequencing depth (tumor) for TMB calculation ('tmb_dp_min' = {str(arg_dict['tmb_dp_min'])}) must be "
127+
err_msg += f"greater or equal to minimum sequencing depth tumor {str(arg_dict['tumor_dp_min'])} (i.e. global filter for variant inclusion in report)"
128+
error_message(err_msg, logger)
129+
130+
if arg_dict['tmb_ad_min'] is not None:
131+
if int(arg_dict['tmb_ad_min']) <= 0:
132+
err_msg = f"Minimum allelic depth tumor - TMB calculation ('tmb_ad_min' = {arg_dict['tmb_ad_min']}) must be > 0"
133+
error_message(err_msg, logger)
134+
if arg_dict['tumor_ad_min'] is not None:
135+
if int(arg_dict['tmb_ad_min']) > 0 and int(arg_dict['tumor_ad_min']) > 0:
136+
if int(arg_dict['tmb_ad_min']) < int(arg_dict['tumor_ad_min']):
137+
err_msg = f"Minimum allelic depth (tumor) for TMB calculation ('tmb_ad_min' = {str(arg_dict['tmb_ad_min'])}) must be "
138+
err_msg += f"greater or equal to minimum allelic depth tumor {str(arg_dict['tumor_ad_min'])} (i.e. global filter for variant inclusion in report)"
139+
error_message(err_msg, logger)
140+
if dp_tumor_set is True and (int(arg_dict['tmb_ad_min']) > int(arg_dict['tmb_dp_min']) and int(arg_dict['tmb_dp_min']) > 0):
141+
err_msg = f"Minimum allelic depth (tumor) for TMB calculation ('tmb_ad_min' = {str(arg_dict['tmb_ad_min'])}) must be "
142+
err_msg += f"less than or equal to minimum sequencing depth (tumor) for TMB calculation ('tmb_dp_min' = {str(arg_dict['tmb_dp_min'])})"
143+
error_message(err_msg, logger)
79144

80-
if float(arg_dict['tmb_af_min']) < 0 or float(arg_dict['tmb_af_min']) > 1:
81-
err_msg = f"Minimum AF (tumor) for TMB calculation ('tmb_af_min' = {arg_dict['tmb_af_min']}) must be within [0, 1]"
82-
error_message(err_msg, logger)
145+
if arg_dict['tmb_af_min'] is not None:
146+
if float(arg_dict['tmb_af_min']) < 0 or float(arg_dict['tmb_af_min']) > 1:
147+
err_msg = f"Minimum AF (tumor) for TMB calculation ('tmb_af_min' = {arg_dict['tmb_af_min']}) must be within [0, 1]"
148+
error_message(err_msg, logger)
83149

84-
if float(arg_dict['tmb_af_min']) > 0 and (float(arg_dict['tmb_af_min']) < float(arg_dict['tumor_af_min'])):
85-
err_msg = f"Minimum AF (tumor) for TMB calculation ('tmb_af_min' = {str(arg_dict['tmb_af_min'])}) must be "
86-
err_msg += f"greater or equal to minimum AF tumor {str(arg_dict['tumor_dp_min'])} (i.e. filter for variant inclusion in report)"
87-
error_message(err_msg, logger)
150+
if arg_dict['tumor_af_min'] is not None:
151+
if float(arg_dict['tmb_af_min']) > 0 and (float(arg_dict['tmb_af_min']) < float(arg_dict['tumor_af_min'])):
152+
err_msg = f"Minimum AF (tumor) for TMB calculation ('tmb_af_min' = {str(arg_dict['tmb_af_min'])}) must be "
153+
err_msg += f"greater or equal to minimum AF tumor ({str(arg_dict['tumor_af_min'])}, i.e. global filter for variant inclusion in report)"
154+
error_message(err_msg, logger)
88155

89156
# Check that coding target size region of sequencing assay is set correctly
90157
if float(arg_dict['effective_target_size_mb']) < 0 or float(arg_dict['effective_target_size_mb']) > float(pcgr_vars.CODING_EXOME_SIZE_MB):

pcgr/config.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#!/usr/bin/env python
22

33
from pcgr import pcgr_vars
4-
from pcgr.utils import check_file_exists, error_message
4+
from pcgr.utils import check_file_exists, error_message, warn_message
55

66
import pandas as pd
77
import os
88
import csv
99

10-
def create_config(arg_dict, workflow = "PCGR"):
10+
def create_config(arg_dict, workflow = "PCGR", logger=None):
1111

1212
conf_options = {}
1313
if workflow == "PCGR" or workflow == "CPSR":
@@ -65,7 +65,21 @@ def create_config(arg_dict, workflow = "PCGR"):
6565
if arg_dict['tumor_ploidy'] is not None:
6666
conf_options['sample_properties']['tumor_ploidy'] = float(arg_dict['tumor_ploidy'])
6767
if arg_dict['sex'] is not None:
68+
sex = str(arg_dict['sex'])
69+
## Breast, Ovary/Fallopian Tube, Uterus, Vulva/Vagina
70+
if arg_dict['tsite'] == 6 or arg_dict['tsite'] == 18 or arg_dict['tsite'] == 29 or arg_dict['tsite'] == 30:
71+
if sex == 'MALE':
72+
if arg_dict['tsite'] == 6:
73+
warn_message(f"Tumor site '{conf_options['sample_properties']['site']}' is typically observed in females - ensure '--sex' option is correctly set", logger)
74+
else:
75+
error_message(f"Tumor site '{conf_options['sample_properties']['site']}' is not observed in males - please check the '--sex' option", logger)
76+
## Prostate, Testis
77+
if arg_dict['tsite'] == 23 or arg_dict['tsite'] == 26:
78+
if sex == 'FEMALE':
79+
error_message(f"Tumor site '{conf_options['sample_properties']['site']}' is not observed in females - please check the '--sex' option", logger)
80+
6881
conf_options['sample_properties']['sex'] = str(arg_dict['sex'])
82+
6983

7084
#conf_options['clinicaltrials'] = {
7185
# 'run': int(arg_dict['include_trials'])
@@ -97,10 +111,12 @@ def create_config(arg_dict, workflow = "PCGR"):
97111

98112
conf_options['somatic_snv'] = {}
99113
conf_options['somatic_snv']['allelic_support'] = {
100-
'tumor_dp_min': int(arg_dict['tumor_dp_min']),
101-
'control_dp_min': int(arg_dict['control_dp_min']),
102-
'tumor_af_min': float(arg_dict['tumor_af_min']),
103-
'control_af_max': float(arg_dict['control_af_max']),
114+
'tumor_dp_min': arg_dict['tumor_dp_min'],
115+
'control_dp_min': arg_dict['control_dp_min'],
116+
'tumor_af_min': arg_dict['tumor_af_min'],
117+
'control_af_max': arg_dict['control_af_max'],
118+
'tumor_ad_min': arg_dict['tumor_ad_min'],
119+
'control_ad_max': arg_dict['control_ad_max'],
104120
'control_dp_tag': str(arg_dict['control_dp_tag']),
105121
'control_af_tag': str(arg_dict['control_af_tag']),
106122
'tumor_dp_tag': str(arg_dict['tumor_dp_tag']),
@@ -119,11 +135,16 @@ def create_config(arg_dict, workflow = "PCGR"):
119135
conf_options['somatic_snv']['msi'] = {
120136
'run': int(arg_dict['estimate_msi'])
121137
}
138+
139+
tmb_dp_min = 0 if arg_dict['tmb_dp_min'] is None else int(arg_dict['tmb_dp_min'])
140+
tmb_af_min = 0.0 if arg_dict['tmb_af_min'] is None else float(arg_dict['tmb_af_min'])
141+
tmb_ad_min = 0 if arg_dict['tmb_ad_min'] is None else int(arg_dict['tmb_ad_min'])
122142
conf_options['somatic_snv']['tmb'] = {
123143
'run': int(arg_dict['estimate_tmb']),
124144
'tmb_display': arg_dict['tmb_display'],
125-
'tmb_dp_min': arg_dict['tmb_dp_min'],
126-
'tmb_af_min': arg_dict['tmb_af_min']
145+
'tmb_dp_min': tmb_dp_min,
146+
'tmb_af_min': tmb_af_min,
147+
'tmb_ad_min': tmb_ad_min
127148
}
128149

129150
for pop in ['nfe', 'fin', 'amr', 'eas', 'sas', 'asj', 'oth', 'afr', 'global']:

0 commit comments

Comments
 (0)