diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt index 4f1335a9b0..14d0d6e1a9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt @@ -137,14 +137,16 @@ public typealias ColumnsSelector = Selector, Column // region filters /** - * ## Row Filter + * A lambda expression that evaluates a row of the [DataFrame] + * and returns a [Boolean] indicating whether the row should be included in the result. * - * [RowFilter] is a lambda function expecting a [Boolean] result given an instance of [DataRow]`` as context - * (`this` and `it`). + * The lambda has access to the [`DataRow`][DataRow] both as `this` and as `it`, + * enabling concise and readable conditions. * - * Return `true` if the row should be included in the result. + * Commonly used in operations such as [filter][org.jetbrains.kotlinx.dataframe.api.filter], + * [drop][org.jetbrains.kotlinx.dataframe.api.drop], and others. * - * Shorthand for: + * Equivalent to: * ```kotlin * DataRow.(it: DataRow) -> Boolean * ``` diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt index b60121aace..5dbbcd3fbf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt @@ -14,17 +14,28 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnSet import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath import org.jetbrains.kotlinx.dataframe.columns.SingleColumn import org.jetbrains.kotlinx.dataframe.columns.asColumnSet +import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate +import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources import org.jetbrains.kotlinx.dataframe.documentation.Indent import org.jetbrains.kotlinx.dataframe.documentation.LineBreak +import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet import org.jetbrains.kotlinx.dataframe.impl.getTrueIndices import org.jetbrains.kotlinx.dataframe.indices import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API +import org.jetbrains.kotlinx.dataframe.util.FILTER_BY +import org.jetbrains.kotlinx.dataframe.util.FILTER_BY_REPLACE import kotlin.reflect.KProperty // region DataColumn +/** + * Returns a new [DataColumn] containing only the elements that match the given [predicate]. + * + * @param predicate the condition used to filter the elements in the DataColumn. + * @return a new DataColumn containing elements that satisfy the predicate. + */ public inline fun DataColumn.filter(predicate: Predicate): DataColumn = indices .filter { predicate(get(it)) } @@ -34,21 +45,52 @@ public inline fun DataColumn.filter(predicate: Predicate): DataColumn< // region DataFrame +/** + * Filters the rows of this [DataFrame] based on the provided [RowFilter]. + * Returns a new [DataFrame] containing only the rows that satisfy the given [predicate]. + * + * A [RowFilter] provides each row as a lambda argument, allowing you to define filtering logic + * using a [Boolean] condition. + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * For more information, see: {@include [DocumentationUrls.Filter]} + * + * See also: + * - [drop][DataFrame.drop], which drops rows based on values within the row. + * - [distinct][DataFrame.distinct], which filters out rows with duplicated values. + * + * ### Example + * ```kotlin + * // Select rows where the value in the "age" column is greater than 18 + * // and the "name/firstName" column starts with 'A' + * df.filter { age > 18 && name.firstName.startsWith("A") } + * ``` + * + * @param predicate A lambda that takes a row (twice for compatibility) and returns `true` + * if the row should be included in the result. + * @return A new [DataFrame] containing only the rows that satisfy the predicate. + */ public inline fun DataFrame.filter(predicate: RowFilter): DataFrame = indices().filter { val row = get(it) predicate(row, row) }.let { get(it) } +@Deprecated(message = FILTER_BY, replaceWith = ReplaceWith(FILTER_BY_REPLACE), level = DeprecationLevel.ERROR) public fun DataFrame.filterBy(column: ColumnSelector): DataFrame = getRows(getColumn(column).toList().getTrueIndices()) +@Suppress("DEPRECATION_ERROR") +@Deprecated(message = FILTER_BY, replaceWith = ReplaceWith(FILTER_BY_REPLACE), level = DeprecationLevel.ERROR) public fun DataFrame.filterBy(column: String): DataFrame = filterBy { column.toColumnOf() } +@Suppress("DEPRECATION_ERROR") @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.filterBy(column: ColumnReference): DataFrame = filterBy { column } +@Suppress("DEPRECATION_ERROR") @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public fun DataFrame.filterBy(column: KProperty): DataFrame = filterBy { column.toColumnAccessor() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt index 552a0c383b..c432c230cc 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -204,7 +204,7 @@ public fun Gather.where(filter: RowValueFilter): * Filters out `null` values from the columns previously selected by [gather], * keeping only non-null entries. * - * A special case of [where]. + * A special case of [Gather.where]. * * It's an intermediate step; returns a new [Gather] with filtered value columns. * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index a1823956fe..19f2f73e30 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -110,4 +110,7 @@ internal interface DocumentationUrls { /** [See `gather` on the documentation website.]({@include [Url]}/gather.html) */ interface Gather + + /** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */ + interface Filter } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index 7cb3fa01ff..ea5f4b78cc 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -110,6 +110,9 @@ internal const val CONVERT_TO_URL_REPLACE = "convertToUrl()" internal const val TO_URL = "This function is replaced by `toUrl()`. $MESSAGE_1_0" internal const val TO_URL_REPLACE = "toUrl()" +internal const val FILTER_BY = "This function is deprecated in favor of `filter { }`. $MESSAGE_1_0" +internal const val FILTER_BY_REPLACE = "filter { column }" + // endregion // region WARNING in 1.0, ERROR in 1.1 diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt index a798cd386c..760d4609f6 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt @@ -20,7 +20,6 @@ import org.jetbrains.kotlinx.dataframe.api.dropNulls import org.jetbrains.kotlinx.dataframe.api.dropWhile import org.jetbrains.kotlinx.dataframe.api.fillNaNs import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.filterBy import org.jetbrains.kotlinx.dataframe.api.first import org.jetbrains.kotlinx.dataframe.api.forEach import org.jetbrains.kotlinx.dataframe.api.gather @@ -324,22 +323,6 @@ class Access : TestBase() { // SampleEnd } - @Test - @TransformDataFrameExpressions - fun filterBy_properties() { - // SampleStart - df.filterBy { isHappy } - // SampleEnd - } - - @Test - @TransformDataFrameExpressions - fun filterBy_strings() { - // SampleStart - df.filterBy("isHappy") - // SampleEnd - } - @Test @TransformDataFrameExpressions fun dropWhere_properties() { diff --git a/docs/StardustDocs/topics/filter.md b/docs/StardustDocs/topics/filter.md index e25a15435d..b9dde106b3 100644 --- a/docs/StardustDocs/topics/filter.md +++ b/docs/StardustDocs/topics/filter.md @@ -22,28 +22,3 @@ df.filter { "age"() > 18 && "name"["firstName"]().startsWith("A") } - -## filterBy - -Returns [`DataFrame`](DataFrame.md) with rows that have value `true` in the given column of type `Boolean`. - -See [column selectors](ColumnSelectors.md) for how to select the column for this operation. - - - - - -```kotlin -df.filterBy { isHappy } -``` - - - - -```kotlin -df.filterBy("isHappy") -``` - - - -