Skip to content

Commit a3cc265

Browse files
filter kdocs
1 parent d81af82 commit a3cc265

File tree

4 files changed

+93
-6
lines changed

4 files changed

+93
-6
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,14 +137,16 @@ public typealias ColumnsSelector<T, C> = Selector<ColumnsSelectionDsl<T>, Column
137137
// region filters
138138

139139
/**
140-
* ## Row Filter
140+
* A lambda expression that evaluates a row of the [DataFrame]
141+
* and returns a [Boolean] indicating whether the row should be included in the result.
141142
*
142-
* [RowFilter] is a lambda function expecting a [Boolean] result given an instance of [DataRow]`<T>` as context
143-
* (`this` and `it`).
143+
* The lambda has access to the [`DataRow<T>`][DataRow] both as `this` and as `it`,
144+
* enabling concise and readable conditions.
144145
*
145-
* Return `true` if the row should be included in the result.
146+
* Commonly used in operations such as [filter][org.jetbrains.kotlinx.dataframe.api.filter],
147+
* [drop][org.jetbrains.kotlinx.dataframe.api.drop], and others.
146148
*
147-
* Shorthand for:
149+
* Equivalent to:
148150
* ```kotlin
149151
* DataRow<T>.(it: DataRow<T>) -> Boolean
150152
* ```

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
1414
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
1515
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
1616
import org.jetbrains.kotlinx.dataframe.columns.asColumnSet
17+
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
1718
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
19+
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
1820
import org.jetbrains.kotlinx.dataframe.documentation.Indent
1921
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
22+
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
2023
import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet
2124
import org.jetbrains.kotlinx.dataframe.impl.getTrueIndices
2225
import org.jetbrains.kotlinx.dataframe.indices
@@ -25,6 +28,12 @@ import kotlin.reflect.KProperty
2528

2629
// region DataColumn
2730

31+
/**
32+
* Returns a new [DataColumn] containing only the elements that match the given [predicate].
33+
*
34+
* @param predicate the condition used to filter the elements in the DataColumn.
35+
* @return a new DataColumn containing elements that satisfy the predicate.
36+
*/
2837
public inline fun <T> DataColumn<T>.filter(predicate: Predicate<T>): DataColumn<T> =
2938
indices
3039
.filter { predicate(get(it)) }
@@ -34,15 +43,88 @@ public inline fun <T> DataColumn<T>.filter(predicate: Predicate<T>): DataColumn<
3443

3544
// region DataFrame
3645

46+
/**
47+
* Filters the rows of this [DataFrame] based on the provided [RowFilter].
48+
* Returns a new [DataFrame] containing only the rows that satisfy the given [predicate].
49+
*
50+
* A [RowFilter] provides each row as a lambda argument, allowing you to define filtering logic
51+
* using a [Boolean] condition.
52+
*
53+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
54+
*
55+
* For more information, see: {@include [DocumentationUrls.Filter]}
56+
*
57+
* See also:
58+
* - [filterBy], which filters rows based on the values in a given [Boolean] column.
59+
* - [drop][DataFrame.drop], which drops rows based on values within the row.
60+
*
61+
* ### Example
62+
* ```kotlin
63+
* // Select rows where the value in the "age" column is greater than 18
64+
* // and the "name/firstName" column starts with 'A'
65+
* df.filter { age > 18 && name.firstName.startsWith("A") }
66+
* ```
67+
*
68+
* @param predicate A lambda that takes a row (twice for compatibility) and returns `true`
69+
* if the row should be included in the result.
70+
* @return A new [DataFrame] containing only the rows that satisfy the predicate.
71+
*/
3772
public inline fun <T> DataFrame<T>.filter(predicate: RowFilter<T>): DataFrame<T> =
3873
indices().filter {
3974
val row = get(it)
4075
predicate(row, row)
4176
}.let { get(it) }
4277

78+
/**
79+
* Filters the rows of this [DataFrame] based on the [Boolean] values in the specified [column].
80+
*
81+
* Returns a new [DataFrame] containing only the rows where the value in the given [column] is `true`.
82+
*
83+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
84+
*
85+
* For more information, see: {@include [DocumentationUrls.Filter]}
86+
*
87+
* See also: [filter], which allows filtering rows based on values within the row.
88+
*
89+
* ### This Gather Overload
90+
*/
91+
@ExcludeFromSources
92+
internal interface FilterByDocs
93+
94+
/**
95+
* {@include [FilterByDocs]}
96+
* {@include [SelectingColumns.Dsl]}
97+
*
98+
* ### Examples
99+
* ```kotlin
100+
* // Filter rows by the "isHappy" column
101+
* df.filterBy { isHappy }
102+
*
103+
* // Filter rows by a single `Boolean` column
104+
* df.filterBy { colsOf<Boolean>().single() }
105+
* ```
106+
*
107+
* @param column A [ColumnSelector] that selects the Boolean column to use for filtering.
108+
* Only rows where the value in this column is `true` will be included.
109+
* @return A new [DataFrame] containing only the rows where the selected column is `true`.
110+
*/
43111
public fun <T> DataFrame<T>.filterBy(column: ColumnSelector<T, Boolean>): DataFrame<T> =
44112
getRows(getColumn(column).toList().getTrueIndices())
45113

114+
/**
115+
* {@include [FilterByDocs]}
116+
* {@include [SelectingColumns.ColumnNames]}
117+
*
118+
* ### Example
119+
* ```kotlin
120+
* // Filter rows by the "isHappy" column
121+
* df.filterBy("isHappy")
122+
* ```
123+
*
124+
* @param column The name of the `Boolean` column to use for filtering.
125+
* Only rows where the value in this column is `true` will be included.
126+
* @return A new [DataFrame] containing only the rows where the specified column is `true`.
127+
*/
46128
public fun <T> DataFrame<T>.filterBy(column: String): DataFrame<T> = filterBy { column.toColumnOf() }
47129

48130
@Deprecated(DEPRECATED_ACCESS_API)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ public fun <T, C, K, R> Gather<T, C, K, R>.where(filter: RowValueFilter<T, C>):
204204
* Filters out `null` values from the columns previously selected by [gather],
205205
* keeping only non-null entries.
206206
*
207-
* A special case of [where].
207+
* A special case of [Gather.where].
208208
*
209209
* It's an intermediate step; returns a new [Gather] with filtered value columns.
210210
*

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,7 @@ internal interface DocumentationUrls {
110110

111111
/** [See `gather` on the documentation website.]({@include [Url]}/gather.html) */
112112
interface Gather
113+
114+
/** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */
115+
interface Filter
113116
}

0 commit comments

Comments
 (0)