diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 121a7d952..9b49f0017 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -765,7 +765,7 @@ GENERATE_DEPRECATEDLIST= YES # sections, marked by \if ... \endif and \cond # ... \endcond blocks. -ENABLED_SECTIONS = +ENABLED_SECTIONS = PRIVATE_DOCS # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the diff --git a/include/sparrow/timestamp_array.hpp b/include/sparrow/timestamp_array.hpp index ffe60b841..fcab4b74b 100644 --- a/include/sparrow/timestamp_array.hpp +++ b/include/sparrow/timestamp_array.hpp @@ -438,6 +438,44 @@ namespace sparrow */ [[nodiscard]] constexpr const_value_iterator value_cend() const; + /// \cond PRIVATE_DOCS + + /** + * @brief Creates Arrow proxy with specified count of default-initialized timestamp values. + * + * Creates a timestamp array proxy with n elements, each initialized to the default + * timestamp value (Unix epoch: 1970-01-01 00:00:00 UTC). This is useful for creating + * arrays that will be populated later or for allocating space with a known baseline. + * + * @tparam METADATA_RANGE Type of metadata container + * @param timezone Timezone for interpreting all timestamp elements + * @param n Number of elements to create + * @param name Optional name for the array column + * @param metadata Optional metadata key-value pairs + * @return Arrow proxy containing n default-initialized timestamp elements + * + * @pre timezone must be a valid date::time_zone pointer + * @pre n must be >= 0 + * @post Returns proxy with n timestamp elements + * @post All elements contain Unix epoch duration (zero) + * @post No validity bitmap (all elements considered valid) + * @post All elements use the specified timezone for interpretation + * + * @note Efficient for creating large arrays that will be populated later + * @note All elements start with the same baseline value (Unix epoch) + * @note No null values - use other overloads if nullability is needed + * @note Elements can be modified after array construction + * + * @code{.cpp} + * // Create array of 1000 timestamps initialized to Unix epoch + * const auto* utc = date::locate_zone("UTC"); + * auto proxy = timestamp_seconds_array::create_proxy(utc, 1000); + * + * // All elements will represent 1970-01-01 00:00:00 UTC + * timestamp_seconds_array arr(std::move(proxy)); + * // Elements can be modified: arr[0] = some_timestamp; + * @endcode + */ template [[nodiscard]] static arrow_proxy create_proxy( const date::time_zone* timezone, @@ -446,6 +484,34 @@ namespace sparrow std::optional metadata = std::nullopt ); + // + /** + * @brief Creates Arrow proxy from pre-allocated data buffer and validity bitmap. + * + * Creates a timestamp array proxy from an existing data buffer containing duration + * values and an optional validity bitmap. This is the most direct way to create + * a timestamp array when you have pre-processed duration data. + * + * @tparam R Validity bitmap input type + * @tparam METADATA_RANGE Type of metadata container + * @param timezone Timezone for interpreting stored duration values + * @param data_buffer Buffer containing duration values since Unix epoch + * @param bitmaps Validity bitmap or input to create validity bitmap + * @param name Optional name for the array column + * @param metadata Optional metadata key-value pairs + * @return Arrow proxy containing timestamp array with specified data + * + * @pre timezone must be a valid date::time_zone pointer + * @pre data_buffer must contain valid duration values + * @pre If provided, bitmaps must match data_buffer size + * @post Returns proxy with timestamps constructed from data_buffer values + * @post Validity information is properly configured from bitmaps + * @post All timestamps use the specified timezone for interpretation + * + * @note Duration values are interpreted as time since Unix epoch + * @note This method takes ownership of the data_buffer + * @note Validity bitmap is ensured to match array size if provided + */ template < validity_bitmap_input R = validity_bitmap, input_metadata_container METADATA_RANGE = std::vector> @@ -457,7 +523,34 @@ namespace sparrow std::optional metadata = std::nullopt ) -> arrow_proxy; - // range of values (no missing values) + /** + * @brief Creates Arrow proxy from range of timestamp values (no missing values). + * + * Creates a timestamp array proxy from a range of timestamp objects. This method + * extracts duration components from timestamps and creates a dense array without + * null values (unless nullable=true, which adds an empty validity bitmap). + * + * @tparam R Range type containing convertible timestamp values + * @tparam METADATA_RANGE Type of metadata container + * @param timezone Timezone for all timestamps (should match range values) + * @param range Input range of timestamp values + * @param nullable Whether to create validity bitmap (empty if true) + * @param name Optional name for the array column + * @param metadata Optional metadata key-value pairs + * @return Arrow proxy containing timestamp array from range + * + * @pre timezone must be a valid date::time_zone pointer + * @pre Range elements must be convertible to timestamp type T + * @pre All timestamps in range should reference compatible timezone + * @post Returns proxy with timestamps from range + * @post Duration values are extracted and stored efficiently + * @post If nullable=true, empty validity bitmap is created + * @post If nullable=false, no validity bitmap (all values valid) + * + * @note Timezone compatibility is not enforced but recommended + * @note Duration extraction preserves precision of timestamp type + * @note This is optimal for dense timestamp data without nulls + */ template > requires std::convertible_to, T> [[nodiscard]] static auto create_proxy( @@ -468,6 +561,34 @@ namespace sparrow std::optional metadata = std::nullopt ) -> arrow_proxy; + + /** + * @brief Creates Arrow proxy from scalar timestamp value. + * + * Creates a timestamp array proxy from a single timestamp value. This method + * wraps the value in an array structure and provides metadata for Arrow compatibility. + * + * @tparam U Type of the scalar value + * @tparam METADATA_RANGE Type of metadata container + * @param timezone Timezone for interpreting the timestamp value + * @param n Number of elements in the array (must be 1 for scalar) + * @param value Scalar timestamp value + * @param name Optional name for the array column + * @param metadata Optional metadata key-value pairs + * @return Arrow proxy containing timestamp array from scalar value + * + * @pre timezone must be a valid date::time_zone pointer + * @pre n must be 1 for scalar values + * @pre value must be convertible to timestamp type T + * @post Returns proxy with timestamp from scalar value + * @post Duration values are extracted and stored efficiently + * @post If nullable=true, empty validity bitmap is created + * @post If nullable=false, no validity bitmap (all values valid) + * + * @note Timezone compatibility is not enforced but recommended + * @note Duration extraction preserves precision of timestamp type + * @note This is optimal for dense timestamp data without nulls + */ template > requires std::convertible_to [[nodiscard]] static arrow_proxy create_proxy( @@ -478,7 +599,36 @@ namespace sparrow std::optional metadata = std::nullopt ); - // range of values, validity_bitmap_input + /** + * @brief Creates Arrow proxy from value range and separate validity information. + * + * Creates a timestamp array proxy from a range of timestamp values and separate + * validity information. This allows precise control over which elements are null + * while providing timestamp values for non-null elements. + * + * @tparam VALUE_RANGE Range type containing timestamp values + * @tparam VALIDITY_RANGE Range or bitmap type for validity information + * @tparam METADATA_RANGE Type of metadata container + * @param timezone Timezone for interpreting timestamp values + * @param values Range of timestamp values (for non-null positions) + * @param validity Validity bitmap or boolean range + * @param name Optional name for the array column + * @param metadata Optional metadata key-value pairs + * @return Arrow proxy containing timestamp array with validity info + * + * @pre timezone must be a valid date::time_zone pointer + * @pre VALUE_RANGE elements must be convertible to timestamp type T + * @pre validity must provide validity information for each element + * @pre Both ranges must have compatible sizes + * @post Returns proxy with timestamps and validity bitmap + * @post Null positions are marked according to validity input + * @post Non-null positions contain duration values from range + * @post All timestamps use the specified timezone + * + * @note Provides fine-grained control over null/non-null elements + * @note Values range should provide meaningful data for non-null positions + * @note Validity information determines the final null count + */ template < std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, @@ -492,7 +642,34 @@ namespace sparrow std::optional metadata = std::nullopt ); - // range of nullable values + /** + * @brief Creates Arrow proxy from range of nullable timestamp values. + * + * Creates a timestamp array proxy from a range of nullable timestamp objects. + * Each element in the range specifies both its value (if non-null) and whether + * it should be considered null, providing a convenient single-range interface. + * + * @tparam R Range type containing nullable elements + * @tparam METADATA_RANGE Type of metadata container + * @param timezone Timezone for interpreting non-null timestamp values + * @param range Range of nullable timestamp values + * @param name Optional name for the array column + * @param metadata Optional metadata key-value pairs + * @return Arrow proxy containing timestamp array from nullable range + * + * @pre timezone must be a valid date::time_zone pointer + * @pre Range elements must be of type nullable + * @pre Non-null timestamps should use compatible timezone + * @post Returns proxy with timestamps and validity from nullable range + * @post Validity bitmap reflects has_value() state of each element + * @post Non-null elements contain duration values from get() calls + * @post All timestamps use the specified timezone + * + * @note Convenient for ranges where null status is embedded in values + * @note Automatically separates values and validity information + * @note Handles mixed null/non-null data efficiently + * @note Null elements don't require meaningful timestamp values + */ template > requires std::is_same_v, nullable> [[nodiscard]] static arrow_proxy create_proxy( @@ -502,15 +679,58 @@ namespace sparrow std::optional metadata = std::nullopt ); + /** + * @brief Core implementation for creating Arrow proxy from processed data. + * + * This is the fundamental implementation that all other create_proxy methods + * ultimately call. It creates a complete Arrow proxy with schema and array + * from pre-processed duration data and validity information. + * + * The method handles: + * - Arrow schema creation with proper timestamp format and timezone + * - Arrow array creation with validity bitmap and duration data + * - Proper buffer management and ownership transfer + * - Metadata and naming integration + * - Null count calculation and flag setting + * + * @tparam METADATA_RANGE Type of metadata container + * @param timezone Timezone for timestamp interpretation (embedded in schema) + * @param data_buffer Buffer containing duration values since Unix epoch + * @param bitmap Optional validity bitmap (null if all values valid) + * @param name Optional name for the array column + * @param metadata Optional metadata key-value pairs + * @return Complete Arrow proxy ready for timestamp_array construction + * + * @pre timezone must be a valid date::time_zone pointer + * @pre data_buffer must contain valid duration values + * @pre If bitmap provided, must match data_buffer size + * @pre Metadata must be valid key-value pairs if provided + * @post Returns complete Arrow proxy with schema and array + * @post Schema includes timezone information in format string + * @post Array buffers contain validity bitmap and duration data + * @post Proper null count and nullable flags are set + * @post All buffer ownership is properly transferred + * + * @note This is the core implementation used by all create_proxy overloads + * @note Creates Arrow-compatible timestamp format with timezone suffix + * @note Handles buffer ownership transfer to prevent memory leaks + * @note Schema format follows Arrow timestamp specification + * @note Supports both nullable and non-nullable configurations + * + * @see Arrow timestamp format specification + * @see ArrowSchema and ArrowArray creation utilities + */ template > [[nodiscard]] static arrow_proxy create_proxy_impl( const date::time_zone* timezone, u8_buffer&& data_buffer, - std::optional&& bitmap_input, + std::optional&& bitmap, std::optional name = std::nullopt, std::optional metadata = std::nullopt ); + /// \endcond + // Modifiers /**