Skip to content

Commit de49ecf

Browse files
committed
create Specialized Handler Classes for allele, glstring, mac etc.
1 parent 4ecd8c7 commit de49ecf

File tree

9 files changed

+1002
-0
lines changed

9 files changed

+1002
-0
lines changed

pyard/ard_refactored.py

Lines changed: 463 additions & 0 deletions
Large diffs are not rendered by default.

pyard/handlers/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from .allele_reducer import AlleleReducer
4+
from .gl_string_processor import GLStringProcessor
5+
from .mac_handler import MACHandler
6+
from .serology_handler import SerologyHandler
7+
from .v2_handler import V2Handler
8+
from .xx_handler import XXHandler
9+
from .shortnull_handler import ShortNullHandler
10+
11+
__all__ = [
12+
"AlleleReducer",
13+
"GLStringProcessor",
14+
"MACHandler",
15+
"SerologyHandler",
16+
"V2Handler",
17+
"XXHandler",
18+
"ShortNullHandler",
19+
]

pyard/handlers/allele_reducer.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import functools
4+
from typing import TYPE_CHECKING
5+
6+
from ..constants import VALID_REDUCTION_TYPES, expression_chars
7+
from ..exceptions import InvalidAlleleError
8+
from ..misc import get_n_field_allele
9+
10+
if TYPE_CHECKING:
11+
from ..ard import ARD
12+
13+
14+
class AlleleReducer:
15+
"""Handles core allele reduction logic"""
16+
17+
def __init__(self, ard_instance: "ARD"):
18+
self.ard = ard_instance
19+
20+
def reduce_allele(
21+
self, allele: str, redux_type: VALID_REDUCTION_TYPES, re_ping=True
22+
) -> str:
23+
"""Core allele reduction logic extracted from _redux_allele"""
24+
25+
if redux_type == "G" and allele in self.ard.ars_mappings.g_group:
26+
if allele in self.ard.ars_mappings.dup_g:
27+
return self.ard.ars_mappings.dup_g[allele]
28+
else:
29+
return self.ard.ars_mappings.g_group[allele]
30+
31+
elif redux_type == "P" and allele in self.ard.ars_mappings.p_group:
32+
return self.ard.ars_mappings.p_group[allele]
33+
34+
elif redux_type in ["lgx", "lg"]:
35+
if allele in self.ard.ars_mappings.lgx_group:
36+
redux_allele = self.ard.ars_mappings.lgx_group[allele]
37+
else:
38+
redux_allele = ":".join(allele.split(":")[0:2])
39+
if redux_type == "lg":
40+
return self._add_lg_suffix(redux_allele)
41+
return redux_allele
42+
43+
elif redux_type == "W":
44+
if self.ard._is_who_allele(allele):
45+
return allele
46+
if allele in self.ard.code_mappings.who_group:
47+
return self.ard.redux(
48+
"/".join(self.ard.code_mappings.who_group[allele]), redux_type
49+
)
50+
else:
51+
return allele
52+
53+
elif redux_type == "exon":
54+
return self._handle_exon_reduction(allele)
55+
56+
elif redux_type == "U2":
57+
return self._handle_u2_reduction(allele)
58+
59+
elif redux_type == "S":
60+
return self._handle_serology_reduction(allele)
61+
62+
else:
63+
return self._handle_default_reduction(allele)
64+
65+
def _add_lg_suffix(self, redux_allele):
66+
"""Add lg suffix to reduced allele"""
67+
if "/" in redux_allele:
68+
return "/".join(
69+
[self._add_lg_suffix(allele) for allele in redux_allele.split("/")]
70+
)
71+
if self.ard._config["ARS_as_lg"]:
72+
return redux_allele + "ARS"
73+
return redux_allele + "g"
74+
75+
def _handle_exon_reduction(self, allele):
76+
"""Handle exon reduction type"""
77+
if allele in self.ard.ars_mappings.exon_group:
78+
exon_group_allele = self.ard.ars_mappings.exon_group[allele]
79+
last_char = allele[-1]
80+
if last_char in expression_chars:
81+
exon_short_null_allele = exon_group_allele + last_char
82+
if self.ard.is_shortnull(exon_short_null_allele):
83+
return exon_short_null_allele
84+
return exon_group_allele
85+
else:
86+
w_redux = self.ard.redux(allele, "W")
87+
if w_redux == allele or len(w_redux.split(":")) == 2:
88+
return allele
89+
else:
90+
return self.ard.redux(w_redux, "exon")
91+
92+
def _handle_u2_reduction(self, allele):
93+
"""Handle U2 reduction type"""
94+
allele_fields = allele.split(":")
95+
if len(allele_fields) == 2:
96+
return allele
97+
allele_2_fields = get_n_field_allele(allele, 2, preserve_expression=True)
98+
if self.ard._is_allele_in_db(allele_2_fields):
99+
return allele_2_fields
100+
else:
101+
return self.reduce_allele(allele, "lgx")
102+
103+
def _handle_serology_reduction(self, allele):
104+
"""Handle serology reduction type"""
105+
from .. import db
106+
from ..misc import is_2_field_allele
107+
108+
if is_2_field_allele(allele):
109+
allele = self.reduce_allele(allele, "lgx")
110+
serology_mapping = db.find_serology_for_allele(
111+
self.ard.db_connection, allele, "lgx_allele_list"
112+
)
113+
else:
114+
serology_mapping = db.find_serology_for_allele(
115+
self.ard.db_connection, allele
116+
)
117+
118+
serology_set = set()
119+
for serology, allele_list in serology_mapping.items():
120+
if allele in allele_list.split("/"):
121+
serology_set.add(serology)
122+
123+
if not serology_set and is_2_field_allele(allele):
124+
for serology, allele_list in serology_mapping.items():
125+
allele_list_lgx = self.ard.redux(allele_list, "lgx")
126+
if allele in allele_list_lgx.split("/"):
127+
serology_set.add(serology)
128+
129+
return "/".join(
130+
sorted(
131+
serology_set, key=functools.cmp_to_key(self.ard.smart_sort_comparator)
132+
)
133+
)
134+
135+
def _handle_default_reduction(self, allele):
136+
"""Handle default reduction cases"""
137+
if allele.endswith("P"):
138+
if allele in self.ard.ars_mappings.p_group.values():
139+
return allele
140+
elif allele.endswith("G"):
141+
if allele in self.ard.ars_mappings.g_group.values():
142+
return allele
143+
144+
if self.ard._is_allele_in_db(allele):
145+
return allele
146+
else:
147+
raise InvalidAlleleError(f"{allele} is an invalid allele.")
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import functools
4+
from typing import List, TYPE_CHECKING
5+
6+
from ..constants import VALID_REDUCTION_TYPES
7+
from ..misc import validate_reduction_type
8+
9+
if TYPE_CHECKING:
10+
from ..ard import ARD
11+
12+
13+
class GLStringProcessor:
14+
"""Handles GL string parsing, validation and processing"""
15+
16+
def __init__(self, ard_instance: "ARD"):
17+
self.ard = ard_instance
18+
19+
def process_gl_string(
20+
self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx"
21+
) -> str:
22+
"""Main GL string processing logic extracted from redux method"""
23+
validate_reduction_type(redux_type)
24+
25+
if self.ard._config["strict"]:
26+
self.validate_gl_string(glstring)
27+
28+
# Handle GL string delimiters
29+
if "^" in glstring:
30+
return self._sorted_unique_gl(
31+
[self.ard.redux(a, redux_type) for a in glstring.split("^")], "^"
32+
)
33+
34+
if "|" in glstring:
35+
return self._sorted_unique_gl(
36+
[self.ard.redux(a, redux_type) for a in glstring.split("|")], "|"
37+
)
38+
39+
if "+" in glstring:
40+
return self._sorted_unique_gl(
41+
[self.ard.redux(a, redux_type) for a in glstring.split("+")], "+"
42+
)
43+
44+
if "~" in glstring:
45+
return self._sorted_unique_gl(
46+
[self.ard.redux(a, redux_type) for a in glstring.split("~")], "~"
47+
)
48+
49+
if "/" in glstring:
50+
return self._sorted_unique_gl(
51+
[self.ard.redux(a, redux_type) for a in glstring.split("/")], "/"
52+
)
53+
54+
return glstring
55+
56+
def _sorted_unique_gl(self, gls: List[str], delim: str) -> str:
57+
"""Make a list of sorted unique GL Strings separated by delim"""
58+
if delim == "~":
59+
return delim.join(gls)
60+
61+
if delim == "+":
62+
non_empty_gls = filter(lambda s: s != "", gls)
63+
return delim.join(
64+
sorted(
65+
non_empty_gls,
66+
key=functools.cmp_to_key(
67+
lambda a, b: self.ard.smart_sort_comparator(
68+
a, b, self.ard._config["ignore_allele_with_suffixes"]
69+
)
70+
),
71+
)
72+
)
73+
74+
all_gls = []
75+
for gl in gls:
76+
all_gls += gl.split(delim)
77+
unique_gls = filter(lambda s: s != "", set(all_gls))
78+
return delim.join(
79+
sorted(
80+
unique_gls,
81+
key=functools.cmp_to_key(
82+
lambda a, b: self.ard.smart_sort_comparator(
83+
a, b, self.ard._config["ignore_allele_with_suffixes"]
84+
)
85+
),
86+
)
87+
)
88+
89+
def validate_gl_string(self, glstring: str) -> bool:
90+
"""Validate GL string structure and components"""
91+
if "^" in glstring:
92+
return all(map(self.validate_gl_string, glstring.split("^")))
93+
if "|" in glstring:
94+
return all(map(self.validate_gl_string, glstring.split("|")))
95+
if "+" in glstring:
96+
return all(map(self.validate_gl_string, glstring.split("+")))
97+
if "~" in glstring:
98+
return all(map(self.validate_gl_string, glstring.split("~")))
99+
if "/" in glstring:
100+
return all(map(self.validate_gl_string, glstring.split("/")))
101+
102+
# what falls through here is an allele
103+
is_valid_allele = self.ard._is_valid(glstring)
104+
if not is_valid_allele:
105+
from ..exceptions import InvalidAlleleError
106+
107+
raise InvalidAlleleError(f"{glstring} is not a valid Allele")
108+
return is_valid_allele

pyard/handlers/mac_handler.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import functools
4+
import sqlite3
5+
from collections import Counter
6+
from typing import Iterable, TYPE_CHECKING
7+
8+
from ..constants import HLA_regex, DEFAULT_CACHE_SIZE
9+
from ..exceptions import InvalidMACError
10+
from .. import db
11+
12+
if TYPE_CHECKING:
13+
from ..ard import ARD
14+
15+
16+
class MACHandler:
17+
"""Handles MAC (Multiple Allele Code) operations"""
18+
19+
def __init__(self, ard_instance: "ARD"):
20+
self.ard = ard_instance
21+
22+
@functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE)
23+
def is_mac(self, allele: str) -> bool:
24+
"""Check if allele is a valid MAC code"""
25+
if ":" in allele:
26+
allele_split = allele.split(":")
27+
if len(allele_split) == 2:
28+
locus_antigen, code = allele_split
29+
if code.isalpha():
30+
try:
31+
alleles = db.mac_code_to_alleles(self.ard.db_connection, code)
32+
if alleles:
33+
if any(map(lambda a: ":" in a, alleles)):
34+
antigen_groups = map(lambda a: a.split(":")[0], alleles)
35+
antigen_counts = Counter(antigen_groups)
36+
valid_antigen = antigen_counts.most_common(1).pop()[0]
37+
provided_antigen = locus_antigen.split("*").pop()
38+
return provided_antigen == valid_antigen
39+
return True
40+
except sqlite3.OperationalError as e:
41+
print("Error: ", e)
42+
return False
43+
44+
def expand_mac(self, mac_code: str) -> str:
45+
"""Expand MAC code into GL string of alleles"""
46+
if self.is_mac(mac_code):
47+
locus_antigen, code = mac_code.split(":")
48+
if HLA_regex.search(mac_code):
49+
locus_antigen = locus_antigen.split("-")[1]
50+
return "/".join(
51+
["HLA-" + a for a in self._get_alleles(code, locus_antigen)]
52+
)
53+
else:
54+
return "/".join(self._get_alleles(code, locus_antigen))
55+
raise InvalidMACError(f"{mac_code} is an invalid MAC.")
56+
57+
def lookup_mac(self, allelelist_gl: str) -> str:
58+
"""Find MAC code corresponding to allele list"""
59+
alleles = allelelist_gl.split("/")
60+
allele_fields = [allele.split("*")[1] for allele in alleles]
61+
antigen_groups = sorted({allele.split(":")[0] for allele in allele_fields})
62+
63+
if len(antigen_groups) == 1:
64+
mac_expansion = "/".join(
65+
sorted({allele.split(":")[1] for allele in allele_fields})
66+
)
67+
mac_code = db.alleles_to_mac_code(self.ard.db_connection, mac_expansion)
68+
if mac_code:
69+
locus = allelelist_gl.split("*")[0]
70+
return f"{locus}*{antigen_groups[0]}:{mac_code}"
71+
72+
# Try given list order
73+
mac_expansion = "/".join(allele_fields)
74+
mac_code = db.alleles_to_mac_code(self.ard.db_connection, mac_expansion)
75+
if mac_code:
76+
locus = allelelist_gl.split("*")[0]
77+
return f"{locus}*{antigen_groups[0]}:{mac_code}"
78+
79+
# Try sorted list
80+
mac_expansion = "/".join(
81+
sorted(
82+
allele_fields, key=functools.cmp_to_key(self.ard.smart_sort_comparator)
83+
)
84+
)
85+
mac_code = db.alleles_to_mac_code(self.ard.db_connection, mac_expansion)
86+
if mac_code:
87+
locus = allelelist_gl.split("*")[0]
88+
return f"{locus}*{antigen_groups[0]}:{mac_code}"
89+
90+
raise InvalidMACError(f"{allelelist_gl} does not have a MAC.")
91+
92+
def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
93+
"""Get alleles for MAC code"""
94+
alleles = db.mac_code_to_alleles(self.ard.db_connection, code)
95+
96+
is_allelic_expansion = any([":" in allele for allele in alleles])
97+
if is_allelic_expansion:
98+
locus = locus_antigen.split("*")[0]
99+
alleles = [f"{locus}*{a}" for a in alleles]
100+
else:
101+
alleles = [f"{locus_antigen}:{a}" for a in alleles]
102+
103+
return list(filter(self.ard._is_allele_in_db, alleles))

0 commit comments

Comments
 (0)