Skip to content

Commit 58012fc

Browse files
Merge pull request #1288 from Kotlin/filter_kdocs
filter kdocs
2 parents d81af82 + 3bf6f8d commit 58012fc

File tree

7 files changed

+56
-48
lines changed

7 files changed

+56
-48
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,14 +137,16 @@ public typealias ColumnsSelector<T, C> = Selector<ColumnsSelectionDsl<T>, Column
137137
// region filters
138138

139139
/**
140-
* ## Row Filter
140+
* A lambda expression that evaluates a row of the [DataFrame]
141+
* and returns a [Boolean] indicating whether the row should be included in the result.
141142
*
142-
* [RowFilter] is a lambda function expecting a [Boolean] result given an instance of [DataRow]`<T>` as context
143-
* (`this` and `it`).
143+
* The lambda has access to the [`DataRow<T>`][DataRow] both as `this` and as `it`,
144+
* enabling concise and readable conditions.
144145
*
145-
* Return `true` if the row should be included in the result.
146+
* Commonly used in operations such as [filter][org.jetbrains.kotlinx.dataframe.api.filter],
147+
* [drop][org.jetbrains.kotlinx.dataframe.api.drop], and others.
146148
*
147-
* Shorthand for:
149+
* Equivalent to:
148150
* ```kotlin
149151
* DataRow<T>.(it: DataRow<T>) -> Boolean
150152
* ```

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,28 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
1414
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
1515
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
1616
import org.jetbrains.kotlinx.dataframe.columns.asColumnSet
17+
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
1718
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
19+
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
1820
import org.jetbrains.kotlinx.dataframe.documentation.Indent
1921
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
22+
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
2023
import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet
2124
import org.jetbrains.kotlinx.dataframe.impl.getTrueIndices
2225
import org.jetbrains.kotlinx.dataframe.indices
2326
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
27+
import org.jetbrains.kotlinx.dataframe.util.FILTER_BY
28+
import org.jetbrains.kotlinx.dataframe.util.FILTER_BY_REPLACE
2429
import kotlin.reflect.KProperty
2530

2631
// region DataColumn
2732

33+
/**
34+
* Returns a new [DataColumn] containing only the elements that match the given [predicate].
35+
*
36+
* @param predicate the condition used to filter the elements in the DataColumn.
37+
* @return a new DataColumn containing elements that satisfy the predicate.
38+
*/
2839
public inline fun <T> DataColumn<T>.filter(predicate: Predicate<T>): DataColumn<T> =
2940
indices
3041
.filter { predicate(get(it)) }
@@ -34,21 +45,52 @@ public inline fun <T> DataColumn<T>.filter(predicate: Predicate<T>): DataColumn<
3445

3546
// region DataFrame
3647

48+
/**
49+
* Filters the rows of this [DataFrame] based on the provided [RowFilter].
50+
* Returns a new [DataFrame] containing only the rows that satisfy the given [predicate].
51+
*
52+
* A [RowFilter] provides each row as a lambda argument, allowing you to define filtering logic
53+
* using a [Boolean] condition.
54+
*
55+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
56+
*
57+
* For more information, see: {@include [DocumentationUrls.Filter]}
58+
*
59+
* See also:
60+
* - [drop][DataFrame.drop], which drops rows based on values within the row.
61+
* - [distinct][DataFrame.distinct], which filters out rows with duplicated values.
62+
*
63+
* ### Example
64+
* ```kotlin
65+
* // Select rows where the value in the "age" column is greater than 18
66+
* // and the "name/firstName" column starts with 'A'
67+
* df.filter { age > 18 && name.firstName.startsWith("A") }
68+
* ```
69+
*
70+
* @param predicate A lambda that takes a row (twice for compatibility) and returns `true`
71+
* if the row should be included in the result.
72+
* @return A new [DataFrame] containing only the rows that satisfy the predicate.
73+
*/
3774
public inline fun <T> DataFrame<T>.filter(predicate: RowFilter<T>): DataFrame<T> =
3875
indices().filter {
3976
val row = get(it)
4077
predicate(row, row)
4178
}.let { get(it) }
4279

80+
@Deprecated(message = FILTER_BY, replaceWith = ReplaceWith(FILTER_BY_REPLACE), level = DeprecationLevel.ERROR)
4381
public fun <T> DataFrame<T>.filterBy(column: ColumnSelector<T, Boolean>): DataFrame<T> =
4482
getRows(getColumn(column).toList().getTrueIndices())
4583

84+
@Suppress("DEPRECATION_ERROR")
85+
@Deprecated(message = FILTER_BY, replaceWith = ReplaceWith(FILTER_BY_REPLACE), level = DeprecationLevel.ERROR)
4686
public fun <T> DataFrame<T>.filterBy(column: String): DataFrame<T> = filterBy { column.toColumnOf() }
4787

88+
@Suppress("DEPRECATION_ERROR")
4889
@Deprecated(DEPRECATED_ACCESS_API)
4990
@AccessApiOverload
5091
public fun <T> DataFrame<T>.filterBy(column: ColumnReference<Boolean>): DataFrame<T> = filterBy { column }
5192

93+
@Suppress("DEPRECATION_ERROR")
5294
@Deprecated(DEPRECATED_ACCESS_API)
5395
@AccessApiOverload
5496
public fun <T> DataFrame<T>.filterBy(column: KProperty<Boolean>): DataFrame<T> = filterBy { column.toColumnAccessor() }

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ public fun <T, C, K, R> Gather<T, C, K, R>.where(filter: RowValueFilter<T, C>):
204204
* Filters out `null` values from the columns previously selected by [gather],
205205
* keeping only non-null entries.
206206
*
207-
* A special case of [where].
207+
* A special case of [Gather.where].
208208
*
209209
* It's an intermediate step; returns a new [Gather] with filtered value columns.
210210
*

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,7 @@ internal interface DocumentationUrls {
110110

111111
/** [See `gather` on the documentation website.]({@include [Url]}/gather.html) */
112112
interface Gather
113+
114+
/** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */
115+
interface Filter
113116
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ internal const val CONVERT_TO_URL_REPLACE = "convertToUrl()"
110110
internal const val TO_URL = "This function is replaced by `toUrl()`. $MESSAGE_1_0"
111111
internal const val TO_URL_REPLACE = "toUrl()"
112112

113+
internal const val FILTER_BY = "This function is deprecated in favor of `filter { }`. $MESSAGE_1_0"
114+
internal const val FILTER_BY_REPLACE = "filter { column }"
115+
113116
// endregion
114117

115118
// region WARNING in 1.0, ERROR in 1.1

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import org.jetbrains.kotlinx.dataframe.api.dropNulls
2020
import org.jetbrains.kotlinx.dataframe.api.dropWhile
2121
import org.jetbrains.kotlinx.dataframe.api.fillNaNs
2222
import org.jetbrains.kotlinx.dataframe.api.filter
23-
import org.jetbrains.kotlinx.dataframe.api.filterBy
2423
import org.jetbrains.kotlinx.dataframe.api.first
2524
import org.jetbrains.kotlinx.dataframe.api.forEach
2625
import org.jetbrains.kotlinx.dataframe.api.gather
@@ -324,22 +323,6 @@ class Access : TestBase() {
324323
// SampleEnd
325324
}
326325

327-
@Test
328-
@TransformDataFrameExpressions
329-
fun filterBy_properties() {
330-
// SampleStart
331-
df.filterBy { isHappy }
332-
// SampleEnd
333-
}
334-
335-
@Test
336-
@TransformDataFrameExpressions
337-
fun filterBy_strings() {
338-
// SampleStart
339-
df.filterBy("isHappy")
340-
// SampleEnd
341-
}
342-
343326
@Test
344327
@TransformDataFrameExpressions
345328
fun dropWhere_properties() {

docs/StardustDocs/topics/filter.md

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,3 @@ df.filter { "age"<Int>() > 18 && "name"["firstName"]<String>().startsWith("A") }
2222
</tab></tabs>
2323
<inline-frame src="resources/org.jetbrains.kotlinx.dataframe.samples.api.Access.filter.html" width="100%"/>
2424
<!---END-->
25-
26-
## filterBy
27-
28-
Returns [`DataFrame`](DataFrame.md) with rows that have value `true` in the given column of type `Boolean`.
29-
30-
See [column selectors](ColumnSelectors.md) for how to select the column for this operation.
31-
32-
<!---FUN filterBy-->
33-
<tabs>
34-
<tab title="Properties">
35-
36-
```kotlin
37-
df.filterBy { isHappy }
38-
```
39-
40-
</tab>
41-
<tab title="Strings">
42-
43-
```kotlin
44-
df.filterBy("isHappy")
45-
```
46-
47-
</tab></tabs>
48-
<inline-frame src="resources/org.jetbrains.kotlinx.dataframe.samples.api.Access.filterBy.html" width="100%"/>
49-
<!---END-->

0 commit comments

Comments
 (0)