Skip to content

Commit 21b6a45

Browse files
authored
Merge pull request #24 from h-2/cleanup
Cleanup & documentation
2 parents 1e3d708 + a7de322 commit 21b6a45

24 files changed

+909
-1307
lines changed

README.md

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,36 @@
1-
# B.I.O -- The Biological Input-Output library
1+
# B.I.O. – the Biological Input/Output library
22

3+
B.I.O. is a C++ library for reading and writing files in the field of Bioinformatics and in particular sequence
4+
analysis. It provides easy-to-use interfaces for the following formats:
35

6+
* Plain I/O: plain-text, CSV, TSV, …
7+
* Map I/O: SAM, BAM, …
8+
* Seq I/O: FastA, FastQ, …
9+
* Var I/O: VCF, BCF, …
10+
11+
The primary goal of this library is to offer higher level abstractions than the C libraries typically used in this
12+
domain (e.g. htslib) while at the same time offering an excellent performance.
13+
It hopes to offer a modern, well-integrated design that covers most typical I/O use-cases Bioinformaticians encounter.
14+
15+
The library relies strongly on *Modern C++* and plays well with other Modern C++ libraries.
16+
17+
Please see the [online documentation](TODO) for more details.
18+
19+
## Current state
20+
21+
The library is currently under heavy development. There is no release, yet, and all interfaces are subject to change.
22+
23+
## Dependencies
24+
25+
| | requirement | version | comment |
26+
|-------------------|-------------------------------------------|----------|---------------------------------------------|
27+
|**compiler** | [GCC](https://gcc.gnu.org) | ≥ 10 | no other compiler is currently supported! |
28+
|**required libs** | [SeqAn3](https://github.com/seqan/seqan3) | ≥ 3 | |
29+
|**optional libs** | [zlib](https://github.com/madler/zlib) | ≥ 1.2 | required for `*.gz` and `.bam` file support |
30+
| | [bzip2](https://www.sourceware.org/bzip2) | ≥ 1.0 | required for `*.bz2` file support |
31+
32+
## Usage
33+
34+
* Using the library entails no build-steps, it is header-only and can be used as-is.
35+
* A single-header version is available (TODO).
36+
* CMake files are provided for easy integration into applications (and automatic detection/inclusion of dependencies).

include/bio/format/bcf_input_handler.hpp

Lines changed: 173 additions & 194 deletions
Large diffs are not rendered by default.

include/bio/format/bcf_output_handler.hpp

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -555,13 +555,7 @@ class format_output_handler<bcf> : public format_output_handler_base<format_outp
555555
}
556556

557557
//!\brief Overload for n_fmt.
558-
void set_core_n_fmt(auto & field)
559-
{
560-
if constexpr (detail::genotypes_vcf_style_writer_concept<decltype(field)>)
561-
record_core.n_fmt = std::ranges::distance(detail::get_first(field));
562-
else
563-
record_core.n_fmt = detail::range_or_tuple_size(field);
564-
}
558+
void set_core_n_fmt(auto & field) { record_core.n_fmt = detail::range_or_tuple_size(field); }
565559
//!\}
566560

567561
/*!\name Field writers
@@ -651,7 +645,7 @@ class format_output_handler<bcf> : public format_output_handler_base<format_outp
651645
// explicit integer width given in header
652646
if (hdr_entry.other_fields.find("IntegerBits") != hdr_entry.other_fields.end())
653647
{
654-
desc = detail::dynamic_type_id_2_type_descriptor(hdr_entry.type);
648+
desc = detail::value_type_id_2_type_descriptor(hdr_entry.type);
655649
if (!detail::type_descriptor_is_int(desc)) // ignore header value if it isn't intX
656650
desc = c_desc;
657651
}
@@ -665,7 +659,7 @@ class format_output_handler<bcf> : public format_output_handler_base<format_outp
665659

666660
if (verify_header_types)
667661
{
668-
detail::bcf_type_descriptor header_desc = detail::dynamic_type_id_2_type_descriptor(hdr_entry.type);
662+
detail::bcf_type_descriptor header_desc = detail::value_type_id_2_type_descriptor(hdr_entry.type);
669663
if (desc != header_desc || !detail::type_descriptor_is_int(desc) ||
670664
!detail::type_descriptor_is_int(header_desc))
671665
{
@@ -707,7 +701,7 @@ class format_output_handler<bcf> : public format_output_handler_base<format_outp
707701
var_io::header::info_t const & info = header->infos.at(header->idx_to_info_pos().at(idx));
708702

709703
/* VALUE */
710-
if constexpr (detail::is_dynamic_type<value_t>)
704+
if constexpr (detail::is_info_element_value_type<value_t>)
711705
{
712706
auto func = [&](auto & param) { write_typed_data(param, get_desc(param, info)); };
713707
std::visit(func, value);
@@ -950,15 +944,15 @@ class format_output_handler<bcf> : public format_output_handler_base<format_outp
950944
}
951945
};
952946

953-
if constexpr (detail::is_dynamic_vector_type<value_t>)
947+
if constexpr (detail::is_genotype_element_value_type<value_t>)
954948
std::visit(func, value);
955949
else
956950
func(value);
957951
}
958952

959-
//!\brief Overload for GENOTYPES; genotypes_bcf_style.
953+
//!\brief Overload for GENOTYPES.
960954
template <std::ranges::forward_range range_t>
961-
requires(detail::genotype_bcf_style_writer_concept<std::ranges::range_reference_t<range_t>>)
955+
requires(detail::genotype_writer_concept<std::ranges::range_reference_t<range_t>>)
962956
void write_field(vtag_t<field::genotypes> /**/, range_t && range)
963957
{
964958
for (auto && genotype : range)
@@ -967,13 +961,12 @@ class format_output_handler<bcf> : public format_output_handler_base<format_outp
967961

968962
//!\brief Overload for GENOTYPES; tuple of pairs.
969963
template <typename... elem_ts>
970-
requires(detail::genotype_bcf_style_writer_concept<elem_ts> &&...)
964+
requires(detail::genotype_writer_concept<elem_ts> &&...)
971965
void write_field(vtag_t<field::genotypes> /**/, std::tuple<elem_ts...> & tup) // TODO add const version
972966
{
973967
auto func = [&](auto &... field) { (write_genotypes_element(field), ...); };
974968
std::apply(func, tup);
975969
}
976-
// TODO vcf-style
977970
//!\}
978971

979972
//!\brief Write the header.

0 commit comments

Comments
 (0)