@@ -794,7 +794,7 @@ def estimate_phase_beagle(
794794 VCF's contig names.
795795 panel : str, optional
796796 VCF file corresponding to a reference haplotype panel (compressed or
797- uncompressed). By default, the 1KGP panel in the ``~/ pypgx-bundle``
797+ uncompressed). By default, the 1KGP panel in the ``pypgx-bundle``
798798 directory will be used.
799799 impute : bool, default: False
800800 If True, perform imputation of missing genotypes.
@@ -819,8 +819,7 @@ def estimate_phase_beagle(
819819 metadata ['Program' ] = 'Beagle'
820820
821821 if panel is None :
822- home = os .path .expanduser ('~' )
823- panel = f'{ home } /pypgx-bundle/1kgp/{ assembly } /{ gene } .vcf.gz'
822+ panel = f'{ sdk .get_bundle_path ()} /1kgp/{ assembly } /{ gene } .vcf.gz'
824823
825824 has_chr_prefix = pyvcf .has_chr_prefix (panel )
826825
@@ -839,6 +838,31 @@ def estimate_phase_beagle(
839838 if metadata ['Platform' ] == 'Chip' :
840839 vf1 = vf1 .filter_gsa ()
841840
841+ def run_beagle (vf1 , em ):
842+ with tempfile .TemporaryDirectory () as t :
843+ vf1 .to_file (f'{ t } /input.vcf' )
844+ command = [
845+ 'java' , '-Xmx2g' , '-jar' , beagle ,
846+ f'gt={ t } /input.vcf' ,
847+ f'chrom={ region } ' ,
848+ f'ref={ panel } ' ,
849+ f'out={ t } /output' ,
850+ f'impute={ str (impute ).lower ()} ' ,
851+ f'em={ em } '
852+ ]
853+ subprocess .run (
854+ command ,
855+ check = True ,
856+ stdout = subprocess .DEVNULL ,
857+ stderr = subprocess .PIPE
858+ )
859+ vf3 = pyvcf .VcfFrame .from_file (f'{ t } /output.vcf.gz' )
860+ if common_samples :
861+ vf = vf3 .rename ({f'{ x } _TEMP' : x for x in common_samples })
862+ if has_chr_prefix :
863+ vf = vf3 .update_chr_prefix ('remove' )
864+ return vf3
865+
842866 # Beagle will throw an error if there is only one marker overlapping with
843867 # the reference panel in a given window. This typically occurs when the
844868 # input VCF has very few markers or only one marker. Therefore, these
@@ -863,39 +887,26 @@ def estimate_phase_beagle(
863887 common_samples = list (set (vf1 .samples ) & set (vf2 .samples ))
864888 if common_samples :
865889 vf1 = vf1 .rename ({x : f'{ x } _TEMP' for x in common_samples })
866- with tempfile .TemporaryDirectory () as t :
867- vf1 .to_file (f'{ t } /input.vcf' )
868- command = [
869- 'java' , '-Xmx2g' , '-jar' , beagle ,
870- f'gt={ t } /input.vcf' ,
871- f'chrom={ region } ' ,
872- f'ref={ panel } ' ,
873- f'out={ t } /output' ,
874- f'impute={ str (impute ).lower ()} '
875- ]
876- try :
877- subprocess .run (
878- command ,
879- check = True ,
880- stdout = subprocess .DEVNULL ,
881- stderr = subprocess .PIPE
882- )
883- vf3 = pyvcf .VcfFrame .from_file (f'{ t } /output.vcf.gz' )
884- if common_samples :
885- vf3 = vf3 .rename ({f'{ x } _TEMP' : x for x in common_samples })
886- if has_chr_prefix :
887- vf3 = vf3 .update_chr_prefix ('remove' )
890+
891+ try :
892+ vf3 = run_beagle (vf1 , em = 'true' )
893+ except subprocess .CalledProcessError as e :
894+ message = e .stderr .decode ()
888895 # Beagle may throw an error even when multiple overlapping markers
889896 # exist because they are too distant from each other -- that is,
890897 # located in separate haplotype windows.
891- except subprocess .CalledProcessError as e :
892- message = e .stderr .decode ()
893- if "Window has only one position" in message :
894- warnings .warn ("Beagle: Window has only one position" )
895- vf3 = pyvcf .VcfFrame ([], vf1 .df [0 :0 ])
896- else :
897- print (message )
898- raise e
898+ if "Window has only one position" in message :
899+ warnings .warn ("Beagle: Window has only one position" )
900+ vf3 = pyvcf .VcfFrame ([], vf1 .df [0 :0 ])
901+ # Beagle will throw an error if the expectation-maximization
902+ # algorithm estimates a parameter value outside the permitted
903+ # range. When this happens, we skip the expectation-maximization.
904+ elif "IllegalArgumentException: 1.0" in message :
905+ warnings .warn ("Beagle: Expectation-maximization skipped" )
906+ vf3 = run_beagle (vf1 , em = 'false' )
907+ else :
908+ print (message )
909+ raise e
899910
900911 return sdk .Archive (metadata , vf3 )
901912
@@ -1203,7 +1214,7 @@ def predict_cnv(copy_number, cnv_caller=None):
12031214 Archive file or object with the semantic type CovFrame[CopyNumber].
12041215 cnv_caller : str or pypgx.Archive, optional
12051216 Archive file or object with the semantic type Model[CNV]. By default,
1206- a pre-trained CNV caller in the ``~/ pypgx-bundle`` directory will be
1217+ a pre-trained CNV caller in the ``pypgx-bundle`` directory will be
12071218 used.
12081219
12091220 Returns
@@ -1218,8 +1229,7 @@ def predict_cnv(copy_number, cnv_caller=None):
12181229
12191230 gene = copy_number .metadata ['Gene' ]
12201231 assembly = copy_number .metadata ['Assembly' ]
1221- home = os .path .expanduser ('~' )
1222- model_file = f'{ home } /pypgx-bundle/cnv/{ assembly } /{ gene } .zip'
1232+ model_file = f'{ sdk .get_bundle_path ()} /cnv/{ assembly } /{ gene } .zip'
12231233
12241234 if cnv_caller is None :
12251235 cnv_caller = sdk .Archive .from_file (model_file )
0 commit comments