1
1
use crate :: node:: { Node , ParseError } ;
2
2
use memchr:: memchr2;
3
3
4
+ // 修改 NodeParser 枚举以使用 trait 对象
5
+ pub enum NodeParser {
6
+ Default ,
7
+ Custom ( Box < dyn Fn ( & str ) -> Result < ( String , f64 , f64 ) , ParseError > + Send > ) ,
8
+ }
9
+
10
+ impl Default for NodeParser {
11
+ fn default ( ) -> Self {
12
+ NodeParser :: Default
13
+ }
14
+ }
15
+
16
+ /// Parse the label of a node from a Newick tree string.
17
+ ///
18
+ /// This function takes a byte slice representing a node in a Newick tree string,
19
+ /// and returns the name and length of the node as a tuple.
20
+ ///
21
+ /// # Arguments
22
+ ///
23
+ /// * `label` - A string slice representing the node in a Newick tree string.
4
24
fn parse_label ( label : & str ) -> Result < ( String , f64 ) , ParseError > {
5
25
let label = label. trim_end_matches ( ";" ) . trim_matches ( '\'' ) . to_string ( ) ;
6
26
@@ -31,27 +51,41 @@ fn parse_label(label: &str) -> Result<(String, f64), ParseError> {
31
51
///
32
52
/// # Arguments
33
53
///
34
- /// * `node_bytes ` - A byte slice representing the node in a Newick tree string.
54
+ /// * `node_str ` - A string slice representing the node in a Newick tree string.
35
55
///
36
56
/// # Returns
37
57
///
38
- /// Returns a `Result` containing a tuple of the name and length on success,
58
+ /// Returns a `Result` containing a tuple of the name, bootstrap, and length on success,
39
59
/// or an `Err(ParseError)` on failure.
40
60
///
41
61
/// # Example
42
62
///
43
63
/// ```
44
- /// use gtdb_tree::tree::parse_node ;
64
+ /// use gtdb_tree::tree::parse_node_default ;
45
65
///
46
- /// let node_bytes = b "A:0.1";
47
- /// let (name, bootstrap, length) = parse_node(node_bytes ).unwrap();
66
+ /// let node_str = "A:0.1";
67
+ /// let (name, bootstrap, length) = parse_node_default(node_str ).unwrap();
48
68
/// assert_eq!(name, "A");
49
69
/// assert_eq!(bootstrap, 0.0);
50
70
/// assert_eq!(length, 0.1);
51
71
/// ```
52
- pub fn parse_node ( node_bytes : & [ u8 ] ) -> Result < ( String , f64 , f64 ) , ParseError > {
53
- let node_str = std:: str:: from_utf8 ( node_bytes) . expect ( "UTF-8 sequence" ) ;
54
- // gtdb
72
+ pub fn parse_node_default ( node_str : & str ) -> Result < ( String , f64 , f64 ) , ParseError > {
73
+ // 处理 "AD:0.03347[21.0]" 格式
74
+ if let Some ( ( name_length, bootstrap_str) ) = node_str. rsplit_once ( '[' ) {
75
+ if let Some ( ( name, length_str) ) = name_length. rsplit_once ( ':' ) {
76
+ let bootstrap = bootstrap_str
77
+ . trim_end_matches ( ']' )
78
+ . parse :: < f64 > ( )
79
+ . map_err ( |_| {
80
+ ParseError :: InvalidFormat ( format ! ( "Invalid bootstrap value: {}" , bootstrap_str) )
81
+ } ) ?;
82
+ let length = length_str. parse :: < f64 > ( ) . map_err ( |_| {
83
+ ParseError :: InvalidFormat ( format ! ( "Invalid length value: {}" , length_str) )
84
+ } ) ?;
85
+ return Ok ( ( name. to_string ( ) , bootstrap, length) ) ;
86
+ }
87
+ }
88
+
55
89
// Check if node_str contains single quotes and ensure they are together
56
90
if node_str. matches ( '\'' ) . count ( ) % 2 != 0 {
57
91
return Err ( ParseError :: InvalidFormat ( format ! (
@@ -102,12 +136,13 @@ pub fn parse_node(node_bytes: &[u8]) -> Result<(String, f64, f64), ParseError> {
102
136
///
103
137
/// ```
104
138
/// use gtdb_tree::tree::parse_tree;
139
+ /// use gtdb_tree::tree::NodeParser;
105
140
///
106
141
/// let newick_str = "((A:0.1,B:0.2):0.3,C:0.4);";
107
- /// let nodes = parse_tree(newick_str).unwrap();
142
+ /// let nodes = parse_tree(newick_str, NodeParser::default() ).unwrap();
108
143
/// assert_eq!(nodes.len(), 5);
109
144
/// ```
110
- pub fn parse_tree ( newick_str : & str ) -> Result < Vec < Node > , ParseError > {
145
+ pub fn parse_tree ( newick_str : & str , parser : NodeParser ) -> Result < Vec < Node > , ParseError > {
111
146
let mut nodes: Vec < Node > = Vec :: new ( ) ;
112
147
let mut pos = 0 ;
113
148
@@ -132,7 +167,16 @@ pub fn parse_tree(newick_str: &str) -> Result<Vec<Node>, ParseError> {
132
167
let end_pos = memchr2 ( b',' , b')' , & bytes[ pos..] ) . unwrap_or ( bytes_len - pos) ;
133
168
let node_end_pos = pos + end_pos;
134
169
let node_bytes = & bytes[ pos..node_end_pos] ;
135
- let ( name, bootstrap, length) = parse_node ( node_bytes) ?;
170
+
171
+ let mut node_str = std:: str:: from_utf8 ( node_bytes) . expect ( "UTF-8 sequence" ) ;
172
+ if node_end_pos == bytes_len {
173
+ node_str = node_str. trim_end_matches ( ';' ) ;
174
+ }
175
+ let ( name, bootstrap, length) = match & parser {
176
+ NodeParser :: Default => parse_node_default ( node_str) ?,
177
+ NodeParser :: Custom ( func) => func ( node_str) ?,
178
+ } ;
179
+
136
180
let node_id = if & bytes[ pos - 1 ] == & b')' {
137
181
stack. pop ( ) . unwrap_or ( 0 )
138
182
} else {
@@ -161,8 +205,9 @@ mod tests {
161
205
use super :: * ;
162
206
163
207
#[ test]
164
- fn test_parse_tree ( ) {
208
+ fn test_parse_tree ( ) -> Result < ( ) , ParseError > {
165
209
let test_cases = vec ! [
210
+ "(A:0.1,B:0.2,(C:0.3,D:0.4)AD:0.03347[21.0]);" ,
166
211
"((A:0.1,B:0.2)'56:F;H;':0.3,C:0.4);" ,
167
212
"(,,(,));" , // no nodes are named
168
213
"(A,B,(C,D));" , // leaf nodes are named
@@ -175,15 +220,15 @@ mod tests {
175
220
] ;
176
221
177
222
for newick_str in test_cases {
178
- match parse_tree ( newick_str) {
179
- Ok ( nodes) => println ! (
180
- "Parsed nodes for '{}': {:?}, len: {}" ,
181
- newick_str,
182
- nodes,
183
- nodes. len( )
184
- ) ,
185
- Err ( e) => println ! ( "Error parsing '{}': {:?}" , newick_str, e) ,
186
- }
223
+ let nodes = parse_tree ( newick_str, NodeParser :: default ( ) ) ?;
224
+ println ! (
225
+ "Parsed nodes for '{}': {:?}, len: {}" ,
226
+ newick_str,
227
+ nodes,
228
+ nodes. len( )
229
+ )
187
230
}
231
+
232
+ Ok ( ( ) )
188
233
}
189
234
}
0 commit comments