37
37
@control_n_jobs (decorated_methods = ["partial_fit" , "_onedal_finalize_fit" ])
38
38
class IncrementalBasicStatistics (BaseEstimator ):
39
39
"""
40
- Incremental estimator for basic statistics.
41
- Allows to compute basic statistics if data are splitted into batches.
40
+ Calculates basic statistics on the given data, allows for computation when the data are split into
41
+ batches. The user can use ``partial_fit`` method to provide a single batch of data or use the ``fit`` method to provide
42
+ the entire dataset.
43
+
42
44
Parameters
43
45
----------
44
46
result_options: string or list, default='all'
@@ -47,10 +49,9 @@ class IncrementalBasicStatistics(BaseEstimator):
47
49
batch_size : int, default=None
48
50
The number of samples to use for each batch. Only used when calling
49
51
``fit``. If ``batch_size`` is ``None``, then ``batch_size``
50
- is inferred from the data and set to ``5 * n_features``, to provide a
51
- balance between approximation accuracy and memory consumption.
52
+ is inferred from the data and set to ``5 * n_features``.
52
53
53
- Attributes (are existing only if corresponding result option exists)
54
+ Attributes
54
55
----------
55
56
min : ndarray of shape (n_features,)
56
57
Minimum of each feature over all samples.
@@ -81,6 +82,38 @@ class IncrementalBasicStatistics(BaseEstimator):
81
82
82
83
second_order_raw_moment : ndarray of shape (n_features,)
83
84
Second order moment of each feature over all samples.
85
+
86
+ n_samples_seen_ : int
87
+ The number of samples processed by the estimator. Will be reset on
88
+ new calls to ``fit``, but increments across ``partial_fit`` calls.
89
+
90
+ batch_size_ : int
91
+ Inferred batch size from ``batch_size``.
92
+
93
+ n_features_in_ : int
94
+ Number of features seen during ``fit`` or ``partial_fit``.
95
+
96
+ Note
97
+ ----
98
+ Attribute exists only if corresponding result option has been provided.
99
+
100
+ Examples
101
+ --------
102
+ >>> import numpy as np
103
+ >>> from sklearnex.basic_statistics import IncrementalBasicStatistics
104
+ >>> incbs = IncrementalBasicStatistics(batch_size=1)
105
+ >>> X = np.array([[1, 2], [3, 4]])
106
+ >>> incbs.partial_fit(X[:1])
107
+ >>> incbs.partial_fit(X[1:])
108
+ >>> incbs.sum_
109
+ np.array([4., 6.])
110
+ >>> incbs.min_
111
+ np.array([1., 2.])
112
+ >>> incbs.fit(X)
113
+ >>> incbs.sum_
114
+ np.array([4., 6.])
115
+ >>> incbs.max_
116
+ np.array([3., 4.])
84
117
"""
85
118
86
119
_onedal_incremental_basic_statistics = staticmethod (onedal_IncrementalBasicStatistics )
@@ -229,14 +262,14 @@ def partial_fit(self, X, sample_weight=None):
229
262
Parameters
230
263
----------
231
264
X : array-like of shape (n_samples, n_features)
232
- Data for compute, where `n_samples` is the number of samples and
233
- `n_features` is the number of features.
265
+ Data for compute, where `` n_samples` ` is the number of samples and
266
+ `` n_features` ` is the number of features.
234
267
235
268
y : Ignored
236
269
Not used, present for API consistency by convention.
237
270
238
271
sample_weight : array-like of shape (n_samples,), default=None
239
- Weights for compute weighted statistics, where `n_samples` is the number of samples.
272
+ Weights for compute weighted statistics, where `` n_samples` ` is the number of samples.
240
273
241
274
Returns
242
275
-------
@@ -261,14 +294,14 @@ def fit(self, X, y=None, sample_weight=None):
261
294
Parameters
262
295
----------
263
296
X : array-like of shape (n_samples, n_features)
264
- Data for compute, where `n_samples` is the number of samples and
265
- `n_features` is the number of features.
297
+ Data for compute, where `` n_samples` ` is the number of samples and
298
+ `` n_features` ` is the number of features.
266
299
267
300
y : Ignored
268
301
Not used, present for API consistency by convention.
269
302
270
303
sample_weight : array-like of shape (n_samples,), default=None
271
- Weights for compute weighted statistics, where `n_samples` is the number of samples.
304
+ Weights for compute weighted statistics, where `` n_samples` ` is the number of samples.
272
305
273
306
Returns
274
307
-------
0 commit comments