1010 from ..ard import ARD
1111
1212
13- class GLStringProcessor :
14- """Handles GL string parsing, validation and processing"""
13+ class GLStringHandler :
14+ """Handles GL string parsing, validation and processing
15+
16+ GL (Genotype List) strings represent HLA typing data using standardized
17+ delimiters to express ambiguity and relationships between alleles.
18+ This class processes these complex strings by parsing delimiters and
19+ applying reductions to individual components.
20+ """
1521
1622 def __init__ (self , ard_instance : "ARD" ):
23+ """Initialize the GLStringHandler with an ARD instance
24+
25+ Args:
26+ ard_instance: The main ARD object for database access and configuration
27+ """
1728 self .ard = ard_instance
1829
1930 def process_gl_string (
2031 self , glstring : str , redux_type : VALID_REDUCTION_TYPE = "lgx"
2132 ) -> str :
22- """Main GL string processing logic extracted from redux method"""
33+ """Main GL string processing logic extracted from redux method
34+
35+ Processes GL strings by parsing delimiters in order of precedence
36+ and applying reductions to individual components. GL string delimiters:
37+ ^ = unphased genotype list
38+ | = phased genotype list
39+ + = allele list (multiple alleles at same locus)
40+ ~ = possible allele list
41+ / = ambiguous allele list
42+
43+ Args:
44+ glstring: GL string to process (e.g., "A*01:01+A*02:01^B*07:02")
45+ redux_type: Type of reduction to apply to each component
46+
47+ Returns:
48+ Processed GL string with reductions applied
49+ """
2350 validate_reduction_type (redux_type )
2451
52+ # Validate GL string structure if strict mode is enabled
2553 if self .ard ._config ["strict" ]:
2654 self .validate_gl_string (glstring )
2755
28- # Handle GL string delimiters
56+ # Handle GL string delimiters in order of precedence
57+ # Unphased genotype list (highest precedence)
2958 if "^" in glstring :
3059 return self ._sorted_unique_gl (
3160 [self .ard .redux (a , redux_type ) for a in glstring .split ("^" )], "^"
3261 )
3362
63+ # Phased genotype list
3464 if "|" in glstring :
3565 return self ._sorted_unique_gl (
3666 [self .ard .redux (a , redux_type ) for a in glstring .split ("|" )], "|"
3767 )
3868
69+ # Allele list (multiple alleles at same locus)
3970 if "+" in glstring :
4071 return self ._sorted_unique_gl (
4172 [self .ard .redux (a , redux_type ) for a in glstring .split ("+" )], "+"
4273 )
4374
75+ # Possible allele list
4476 if "~" in glstring :
4577 return self ._sorted_unique_gl (
4678 [self .ard .redux (a , redux_type ) for a in glstring .split ("~" )], "~"
4779 )
4880
81+ # Ambiguous allele list (lowest precedence)
4982 if "/" in glstring :
5083 return self ._sorted_unique_gl (
5184 [self .ard .redux (a , redux_type ) for a in glstring .split ("/" )], "/"
5285 )
5386
87+ # Single allele - return as-is for further processing
5488 return glstring
5589
5690 def _sorted_unique_gl (self , gls : List [str ], delim : str ) -> str :
57- """Make a list of sorted unique GL Strings separated by delim"""
91+ """Make a list of sorted unique GL Strings separated by delim
92+
93+ Creates a sorted, deduplicated list of GL string components.
94+ Different delimiters have different sorting behaviors:
95+ - '~' preserves original order (no sorting/deduplication)
96+ - '+' sorts but keeps structure intact
97+ - Others flatten, deduplicate, and sort
98+
99+ Args:
100+ gls: List of GL string components to process
101+ delim: Delimiter to use for joining results
102+
103+ Returns:
104+ Sorted and deduplicated GL string components joined by delimiter
105+ """
106+ # Possible allele list (~) preserves original order
58107 if delim == "~" :
59108 return delim .join (gls )
60109
110+ # Allele list (+) sorts but maintains structure
61111 if delim == "+" :
62112 non_empty_gls = filter (lambda s : s != "" , gls )
63113 return delim .join (
@@ -71,6 +121,7 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
71121 )
72122 )
73123
124+ # Other delimiters: flatten, deduplicate, and sort
74125 all_gls = []
75126 for gl in gls :
76127 all_gls += gl .split (delim )
@@ -87,7 +138,22 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
87138 )
88139
89140 def validate_gl_string (self , glstring : str ) -> bool :
90- """Validate GL string structure and components"""
141+ """Validate GL string structure and components
142+
143+ Recursively validates GL string by parsing delimiters and checking
144+ that all leaf components (individual alleles) are valid according
145+ to the ARD database.
146+
147+ Args:
148+ glstring: GL string to validate
149+
150+ Returns:
151+ True if all components are valid
152+
153+ Raises:
154+ InvalidAlleleError: If any component allele is invalid
155+ """
156+ # Recursively validate components separated by each delimiter type
91157 if "^" in glstring :
92158 return all (map (self .validate_gl_string , glstring .split ("^" )))
93159 if "|" in glstring :
@@ -99,7 +165,7 @@ def validate_gl_string(self, glstring: str) -> bool:
99165 if "/" in glstring :
100166 return all (map (self .validate_gl_string , glstring .split ("/" )))
101167
102- # what falls through here is an allele
168+ # Base case: validate individual allele against database
103169 is_valid_allele = self .ard ._is_valid (glstring )
104170 if not is_valid_allele :
105171 from ..exceptions import InvalidAlleleError
0 commit comments