Skip to content

Commit 9842ba7

Browse files
committed
fix: flatten choice struct members within sequences
The PubChem PUG View schema[1] frequently uses the following pattern: ```xml <!-- ... --> <xs:complexType> <xs:sequence> <xs:choice> <xs:element name="A" type="xs:string"/> <xs:element name="B" type="xs:int"/> </xs:choice> <xs:element name="..." type="xs:string" minOccurs="0"/> </xs:sequence> </xs:complexType> </xs:element> <!-- ... --> ``` Prior to this change the `xs:choice` element was being treated as just another xsd element rather than a choice. This commit correctly tags enum struct fields so that they are properly flattened. [1]: https://pubchem.ncbi.nlm.nih.gov/docs/pug-view#section=Formats
1 parent 7c31c6e commit 9842ba7

File tree

6 files changed

+126
-1
lines changed

6 files changed

+126
-1
lines changed

xsd-parser/src/parser/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ pub fn enum_to_field(en: Enum) -> StructField {
7373
name: en.name.clone(),
7474
type_name: en.name.clone(),
7575
subtypes: vec![RsEntity::Enum(en)],
76-
source: StructFieldSource::Element,
76+
source: StructFieldSource::Choice,
7777
..Default::default()
7878
}
7979
}

xsd-parser/tests/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ mod tuple_with_vec;
1818
mod type_name_clash;
1919
mod union;
2020
mod xsd_string;
21+
mod sequence_choice;
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<Section
2+
xmlns:xs="http://www.w3.org/2001/XMLSchema-instance"
3+
xs:schemaLocation="http://pubchem.ncbi.nlm.nih.gov/pug_view https://pubchem.ncbi.nlm.nih.gov/pug_view/pug_view.xsd"
4+
>
5+
<TOCHeading>Structures</TOCHeading>
6+
<Description>Structure depictions of this compound, including computationally generated two-dimensional (2D) and three-dimensional (3D) structures, as well as experimentally determined 3D single-crystal structures.</Description>
7+
<Section>
8+
<TOCHeading>2D Structure</TOCHeading>
9+
<Description>A two-dimensional (2D) structure representation of the compound. Because this structure is processed through chemical structure standardization (Hähnke et al., J. Cheminform. 2018, 10, 36), it is not necessarily the same as the structures provided by individual data contributors. </Description>
10+
<URL>https://doi.org/10.1186/s13321-018-0293-8</URL>
11+
<DisplayControls>
12+
<MoveToTop>true</MoveToTop>
13+
</DisplayControls>
14+
<Information>
15+
<ReferenceNumber>66</ReferenceNumber>
16+
<Value>
17+
<Boolean>true</Boolean>
18+
</Value>
19+
</Information>
20+
</Section>
21+
<Section>
22+
<TOCHeading>3D Conformer</TOCHeading>
23+
<Description>A three-dimensional (3D) structure representation of the compound. This 3D structure is not experimentally determined, but computed by PubChem. This structure may or may not be the same as the inherent structure of the compound you would expect to see in vacuum or in the gas phase, because the underlying computational algorithm aims to generate a protein-bound structure, which would be observed in a protein-ligand complex. More detailed information on this conformer model can be found in Kim et al., J. Cheminform. 2013, 5, 1.</Description>
24+
<URL>https://doi.org/10.1186/1758-2946-5-1</URL>
25+
<DisplayControls>
26+
<MoveToTop>true</MoveToTop>
27+
</DisplayControls>
28+
<Information>
29+
<ReferenceNumber>66</ReferenceNumber>
30+
<Description>2-Hydroxyethyl Methacrylate</Description>
31+
<Value>
32+
<Number>13360</Number>
33+
</Value>
34+
</Information>
35+
</Section>
36+
</Section>
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#[derive(Default, PartialEq, Debug, YaSerialize, YaDeserialize)]
2+
#[yaserde(namespace = "http://pubchem.ncbi.nlm.nih.gov/pug_view")]
3+
pub struct Section {
4+
#[yaserde(flatten)]
5+
pub section_choice: section::SectionChoice,
6+
7+
#[yaserde(rename = "Description")]
8+
pub description: Option<String>,
9+
10+
#[yaserde(rename = "URL")]
11+
pub url: Option<String>,
12+
}
13+
14+
impl Validate for Section {}
15+
16+
pub mod section {
17+
use super::*;
18+
19+
#[derive(PartialEq, Debug, YaSerialize, YaDeserialize)]
20+
#[yaserde(namespace = "http://pubchem.ncbi.nlm.nih.gov/pug_view")]
21+
22+
pub enum SectionChoice {
23+
#[yaserde(rename = "TOCHeading")]
24+
Tocheading(String),
25+
#[yaserde(rename = "TOCID")]
26+
Tocid(i32),
27+
__Unknown__(String),
28+
}
29+
30+
impl Default for SectionChoice {
31+
fn default() -> SectionChoice {
32+
Self::__Unknown__("No valid variants".into())
33+
}
34+
}
35+
36+
impl Validate for SectionChoice {}
37+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema
3+
xmlns:xs="http://www.w3.org/2001/XMLSchema"
4+
xmlns="http://pubchem.ncbi.nlm.nih.gov/pug_view"
5+
targetNamespace="http://pubchem.ncbi.nlm.nih.gov/pug_view"
6+
elementFormDefault="qualified"
7+
attributeFormDefault="unqualified">
8+
<xs:element name="Section">
9+
<xs:complexType>
10+
<xs:sequence>
11+
<xs:choice>
12+
<xs:element name="TOCHeading" type="xs:string"/>
13+
<xs:element name="TOCID" type="xs:int"/>
14+
</xs:choice>
15+
<xs:element name="Description" type="xs:string" minOccurs="0"/>
16+
<xs:element name="URL" type="xs:string" minOccurs="0"/>
17+
</xs:sequence>
18+
</xs:complexType>
19+
</xs:element>
20+
</xs:schema>
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
use super::utils;
2+
3+
#[test]
4+
fn deserialization_works() {
5+
mod expected {
6+
use xsd_parser::generator::validator::Validate;
7+
use yaserde_derive::{YaDeserialize, YaSerialize};
8+
9+
include!("expected.rs");
10+
}
11+
12+
let ser = include_str!("example.xml");
13+
14+
let de: expected::Section = yaserde::de::from_str(ser).unwrap();
15+
16+
assert_eq!(de, expected::Section {
17+
section_choice: expected::section::SectionChoice::Tocheading("2D Structure".into()),
18+
description: Some("Structure depictions of this compound, including computationally generated two-dimensional (2D) and three-dimensional (3D) structures, as well as experimentally determined 3D single-crystal structures.".into()),
19+
url: None,
20+
});
21+
}
22+
23+
#[test]
24+
fn generator_does_not_panic() {
25+
println!("{}", utils::generate(include_str!("input.xsd")))
26+
}
27+
28+
#[test]
29+
fn generator_output_has_correct_ast() {
30+
utils::ast_test(include_str!("input.xsd"), include_str!("expected.rs"));
31+
}

0 commit comments

Comments
 (0)