Skip to content

Commit c4860d4

Browse files
authored
Added a '--prefer-mane-transcripts' mode that enforces MANE_Select tagged Gencode transcripts where possible (#9012)
* Added a '--prefer-mane-transcripts' mode that enforces MANE_Select tagged Gencode transcripts where possible
1 parent d056c32 commit c4860d4

29 files changed

+297
-16
lines changed

src/main/java/org/broadinstitute/hellbender/tools/funcotator/BaseFuncotatorArgumentCollection.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ public abstract class BaseFuncotatorArgumentCollection implements Serializable {
8080
)
8181
public TranscriptSelectionMode transcriptSelectionMode = FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE;
8282

83+
@Advanced
84+
@Argument(
85+
fullName = FuncotatorArgumentDefinitions.PREFER_MANE_TRANSCRIPT_MODE,
86+
optional = true,
87+
doc = "If this flag is set, Funcotator will prefer 'MANE_Plus_Clinical' followed by 'MANE_select' transcripts (including those not tagged 'basic') if one is present for a given variant. If neither tag is present it use the default behavior (only base transcripts)."
88+
)
89+
public boolean MANETranscriptMode = false;
90+
8391
@Argument(
8492
fullName = FuncotatorArgumentDefinitions.TRANSCRIPT_LIST_LONG_NAME,
8593
optional = true,

src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotateSegments.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ public void onTraversalStart() {
147147
new FlankSettings(0,0),
148148
true,
149149
funcotatorArgs.minNumBasesForValidSegment,
150-
funcotatorArgs.spliceSiteWindow
150+
funcotatorArgs.spliceSiteWindow,
151+
funcotatorArgs.MANETranscriptMode
151152
).stream()
152153
.filter(DataSourceFuncotationFactory::isSupportingSegmentFuncotation)
153154
.collect(Collectors.toList());

src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,8 @@ public void onTraversalStart() {
794794
new FlankSettings(funcotatorArgs.fivePrimeFlankSize, funcotatorArgs.threePrimeFlankSize),
795795
false,
796796
funcotatorArgs.minNumBasesForValidSegment,
797-
funcotatorArgs.spliceSiteWindow
797+
funcotatorArgs.spliceSiteWindow,
798+
funcotatorArgs.MANETranscriptMode
798799
);
799800

800801
logger.info("Initializing Funcotator Engine...");

src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ public class FuncotatorArgumentDefinitions {
3636
public static final String TRANSCRIPT_SELECTION_MODE_LONG_NAME = "transcript-selection-mode";
3737
public static final TranscriptSelectionMode TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE = TranscriptSelectionMode.CANONICAL;
3838

39+
public static final String PREFER_MANE_TRANSCRIPT_MODE = "prefer-mane-transcripts";
40+
3941
/**
4042
* Do not give this a static default value or the integration tests will get hosed.
4143
*/

src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ private static boolean isValidDirectory(final Path p) {
329329
* ignored for those that don't.
330330
* @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid.
331331
* @param spliceSiteWindowSize The number of bases on either side of a splice site for a variant to be a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE} variant.
332+
* @param preferMANETranscriptsWhereApplicable If this is set, in {@link GencodeFuncotationFactory}, we will only emit MANE transcripts if any are availible for a given variant, otherwise behaves as normal.
332333
* @return A {@link List} of {@link DataSourceFuncotationFactory} given the data source metadata, overrides, and transcript reporting priority information.
333334
*/
334335
public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFactoriesForDataSources(final Map<Path, Properties> dataSourceMetaData,
@@ -340,7 +341,8 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
340341
final FlankSettings flankSettings,
341342
final boolean doAttemptSegmentFuncotationForTranscriptDatasources,
342343
final int minBasesForValidSegment,
343-
final int spliceSiteWindowSize) {
344+
final int spliceSiteWindowSize,
345+
final boolean preferMANETranscriptsWhereApplicable) {
344346
Utils.nonNull(dataSourceMetaData);
345347
Utils.nonNull(annotationOverridesMap);
346348
Utils.nonNull(transcriptSelectionMode);
@@ -379,7 +381,7 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
379381
case GENCODE:
380382
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class, false);
381383
funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode,
382-
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize);
384+
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize, preferMANETranscriptsWhereApplicable);
383385
break;
384386
case VCF:
385387
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class, false);
@@ -596,7 +598,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
596598
final FlankSettings flankSettings,
597599
final boolean isSegmentFuncotationEnabled,
598600
final int minBasesForValidSegment,
599-
final int spliceSiteWindowSize) {
601+
final int spliceSiteWindowSize,
602+
final boolean onlyUseMANETranscriptsWhenApplicable) {
600603
Utils.nonNull(dataSourceFile);
601604
Utils.nonNull(dataSourceProperties);
602605
Utils.nonNull(annotationOverridesMap);
@@ -626,7 +629,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
626629
ncbiBuildVersion,
627630
isSegmentFuncotationEnabled,
628631
minBasesForValidSegment,
629-
spliceSiteWindowSize
632+
spliceSiteWindowSize,
633+
onlyUseMANETranscriptsWhenApplicable
630634
);
631635
}
632636

src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,11 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {
242242
*/
243243
private boolean isSegmentFuncotationEnabled;
244244

245+
/**
246+
* If this is true, only MANE transcripts will be used for funcotation creation when at least one is present.
247+
*/
248+
private boolean preferMANETranscripts;
249+
245250
//==================================================================================================================
246251
// Constructors:
247252

@@ -354,7 +359,7 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,
354359

355360
this(gencodeTranscriptFastaFilePath, version, name, transcriptSelectionMode, userRequestedTranscripts,
356361
annotationOverrides, mainFeatureInput, flankSettings, isDataSourceB37, ncbiBuildVersion,
357-
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE);
362+
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE, false);
358363
}
359364

360365
/**
@@ -385,7 +390,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,
385390
final String ncbiBuildVersion,
386391
final boolean isSegmentFuncotationEnabled,
387392
final int minBasesForValidSegment,
388-
final int spliceSiteWindowSize) {
393+
final int spliceSiteWindowSize,
394+
final boolean preferMANETranscriptsWhereApplicable) {
389395

390396
super(mainFeatureInput, minBasesForValidSegment);
391397

@@ -429,6 +435,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,
429435

430436
// Initialize overrides / defaults:
431437
initializeAnnotationOverrides( annotationOverrides );
438+
439+
this.preferMANETranscripts = preferMANETranscriptsWhereApplicable;
432440
}
433441

434442
private Path localizeGencodeTranscriptFastaFile( final Path gencodeTranscriptFastaFilePath ) {
@@ -622,6 +630,28 @@ private static List<GencodeGtfGeneFeature> convertFeaturesToGencodeGtfGeneFeatur
622630
.collect(Collectors.toList());
623631
}
624632

633+
/**
634+
* If MANE_Plus_Clinical transcripts are avalible, only return them, followed by MANE_Select transcripts, followed by only the basic transcripts if none were MANE_Plus_Clinical or MANE_Select.
635+
* @param transcripts of gencode transcripts to possibly filter
636+
* @return
637+
*/
638+
@VisibleForTesting
639+
static List<GencodeGtfTranscriptFeature> retreiveMANESelectModeTranscriptsCriteria(final List<GencodeGtfTranscriptFeature> transcripts) {
640+
final List<GencodeGtfTranscriptFeature> plusClincal = transcripts.stream()
641+
.filter(g -> hasTag(g, MANE_PLUS_CLINICAL)).toList();
642+
if (plusClincal.size() > 0) {
643+
return plusClincal;
644+
}
645+
646+
final List<GencodeGtfTranscriptFeature> maneSelectTranscripts = transcripts.stream()
647+
.filter(g -> hasTag(g, MANE_SELECT)).toList();
648+
649+
if (maneSelectTranscripts.size() > 0) {
650+
return maneSelectTranscripts;
651+
}
652+
653+
return transcripts.stream().filter(GencodeFuncotationFactory::isBasic).collect(Collectors.toList());
654+
}
625655

626656
/**
627657
* {@inheritDoc}
@@ -853,16 +883,21 @@ static boolean isVariantInCodingRegion(final GencodeFuncotation.VariantClassific
853883
*/
854884
private List<GencodeFuncotation> createFuncotationsHelper(final VariantContext variant, final Allele altAllele, final GencodeGtfGeneFeature gtfFeature, final ReferenceContext reference) {
855885

856-
final List<GencodeGtfTranscriptFeature> transcriptList;
886+
List<GencodeGtfTranscriptFeature> transcriptList;
857887

858888
// Only get basic transcripts if we're using data from Gencode:
859889
if ( gtfFeature.getGtfSourceFileType().equals(GencodeGtfCodec.GTF_FILE_TYPE_STRING) ) {
860-
transcriptList = retrieveBasicTranscripts(gtfFeature);
861-
}
862-
else {
890+
if (preferMANETranscripts) {
891+
// Filter out the non-MANE_Select/Mane_Plus_Clinical transcripts if we're only using MANE transcripts:
892+
transcriptList = retreiveMANESelectModeTranscriptsCriteria(gtfFeature.getTranscripts());
893+
} else {
894+
transcriptList = retrieveBasicTranscripts(gtfFeature);
895+
}
896+
} else {
863897
transcriptList = gtfFeature.getTranscripts();
864898
}
865899

900+
866901
return createFuncotationsHelper(variant, altAllele, reference, transcriptList);
867902
}
868903

@@ -979,9 +1014,14 @@ static final GencodeFuncotation createDefaultFuncotationsOnProblemVariant( final
9791014

9801015
private static boolean isBasic(final GencodeGtfTranscriptFeature transcript) {
9811016
// Check if this transcript has the `basic` tag:
1017+
return hasTag(transcript, GencodeGTFFieldConstants.FeatureTag.BASIC);
1018+
}
1019+
1020+
private static boolean hasTag(final GencodeGtfTranscriptFeature transcript, final GencodeGTFFieldConstants.FeatureTag tag) {
1021+
// Check if this transcript has the given tag:
9821022
return transcript.getOptionalFields().stream()
9831023
.filter( f -> f.getName().equals("tag") )
984-
.filter( f -> f.getValue().equals(GencodeGTFFieldConstants.FeatureTag.BASIC.toString()) )
1024+
.filter( f -> f.getValue().equals(tag.toString()) )
9851025
.count() > 0;
9861026
}
9871027

src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngineUnitTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ public void testGetFuncotationFactoriesAndCreateFuncotationMapForVariant(final F
6565
new FlankSettings(0, 0),
6666
false,
6767
FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT,
68-
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE)
68+
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE,
69+
false)
6970
);
7071

7172
for (int i = 0; i < entireVcf.getRight().size(); i++) {

0 commit comments

Comments
 (0)