Skip to content

Commit 210143a

Browse files
authored
Merge pull request #1052 from Kotlin/merge-improvements
Add more operation to compiler plugin
2 parents 81ec10a + edc9915 commit 210143a

File tree

33 files changed

+1471
-149
lines changed

33 files changed

+1471
-149
lines changed

core/api/core.api

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6003,16 +6003,23 @@ public final class org/jetbrains/kotlinx/dataframe/api/Merge {
60036003
}
60046004

60056005
public final class org/jetbrains/kotlinx/dataframe/api/MergeKt {
6006-
public static final fun asStrings (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
6007-
public static final fun by (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
6008-
public static synthetic fun by$default (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
6006+
public static final fun asStrings (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
6007+
public static final fun by (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
6008+
public static synthetic fun by$default (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
60096009
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
6010-
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnAccessor;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
60116010
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
6011+
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
6012+
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
60126013
public static final fun intoList (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Ljava/util/List;
6014+
public static final fun intoList (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;)Ljava/util/List;
60136015
public static final fun merge (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
60146016
public static final fun merge (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
60156017
public static final fun notNull (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
6018+
public static final fun notNullList (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
6019+
}
6020+
6021+
public final class org/jetbrains/kotlinx/dataframe/api/MergeWithTransform {
6022+
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;ZLkotlin/jvm/functions/Function2;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;)V
60166023
}
60176024

60186025
public final class org/jetbrains/kotlinx/dataframe/api/MinKt {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateDsl.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public abstract class AggregateDsl<out T> :
1717
DataFrame<T>,
1818
ColumnSelectionDsl<T> {
1919

20-
@Interpretable("GroupByInto")
20+
@Interpretable("AggregateDslInto")
2121
public inline infix fun <reified R> R.into(name: String): NamedValue =
2222
internal().yield(pathOf(name), this, typeOf<R>())
2323

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ public fun <T> Convert<T, *>.to(type: KType): DataFrame<T> = to { it.convertTo(t
110110
public fun <T, C> Convert<T, C>.to(columnConverter: DataFrame<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> =
111111
df.replace(columns).with { columnConverter(df, it) }
112112

113+
@Refine
113114
@Interpretable("With0")
114115
public inline fun <T, C, reified R> Convert<T, C>.with(
115116
infer: Infer = Infer.Nulls,
@@ -126,6 +127,8 @@ public fun <T, C, R> Convert<T, DataRow<C>>.asFrame(
126127
body: ColumnsContainer<T>.(ColumnGroup<C>) -> DataFrame<R>,
127128
): DataFrame<T> = to { body(this, it.asColumnGroup()).asColumnGroup(it.name()) }
128129

130+
@Refine
131+
@Interpretable("PerRowCol")
129132
public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
130133
infer: Infer = Infer.Nulls,
131134
noinline expression: RowColumnExpression<T, C, R>,

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ public data class Gather<T, C, K, R>(
7474
public fun <T, C, K, R> Gather<T, C, K, R>.into(keyColumn: String, valueColumn: String): DataFrame<T> =
7575
gatherImpl(keyColumn, valueColumn)
7676

77+
@AccessApiOverload
7778
public fun <T, C, K, R> Gather<T, C, K, R>.into(
7879
keyColumn: ColumnAccessor<K>,
7980
valueColumn: ColumnAccessor<R>,

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/into.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import org.jetbrains.kotlinx.dataframe.AnyRow
55
import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.RowExpression
77
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
8+
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
9+
import org.jetbrains.kotlinx.dataframe.annotations.Refine
810
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
911
import org.jetbrains.kotlinx.dataframe.impl.aggregation.internal
1012
import org.jetbrains.kotlinx.dataframe.impl.aggregation.withExpr
@@ -14,6 +16,8 @@ import kotlin.reflect.typeOf
1416

1517
// region GroupBy
1618

19+
@Refine
20+
@Interpretable("GroupByInto")
1721
public fun <T, G> GroupBy<T, G>.into(column: String): DataFrame<T> = toDataFrame(column)
1822

1923
@AccessApiOverload

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/merge.kt

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
55
import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.DataRow
77
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
8+
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
9+
import org.jetbrains.kotlinx.dataframe.annotations.Refine
810
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
911
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
1012
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
@@ -16,6 +18,7 @@ import kotlin.reflect.KProperty
1618
import kotlin.reflect.KType
1719
import kotlin.reflect.typeOf
1820

21+
@Interpretable("Merge0")
1922
public fun <T, C> DataFrame<T>.merge(selector: ColumnsSelector<T, C>): Merge<T, C, List<C>> =
2023
Merge(this, selector, false, { it }, typeOf<Any?>(), Infer.Type)
2124

@@ -44,19 +47,51 @@ public data class Merge<T, C, R>(
4447
internal val infer: Infer,
4548
)
4649

47-
public fun <T, C, R> Merge<T, C, R>.notNull(): Merge<T, C, R> = copy(notNull = true)
50+
public class MergeWithTransform<T, C, R>(
51+
internal val df: DataFrame<T>,
52+
internal val selector: ColumnsSelector<T, C>,
53+
internal val notNull: Boolean,
54+
internal val transform: DataRow<T>.(List<C>) -> R,
55+
internal val resultType: KType,
56+
internal val infer: Infer,
57+
)
58+
59+
@Interpretable("MergeId")
60+
public fun <T, C, R> Merge<T, C, R>.notNull(): Merge<T, C & Any, R> = copy(notNull = true) as Merge<T, C & Any, R>
4861

62+
@JvmName("notNullList")
63+
@Interpretable("MergeId")
64+
public fun <T, C, R> Merge<T, C, List<R>>.notNull(): Merge<T, C & Any, List<R & Any>> =
65+
copy(notNull = true) as Merge<T, C & Any, List<R & Any>>
66+
67+
@Refine
68+
@Interpretable("MergeInto0")
69+
public fun <T, C, R> MergeWithTransform<T, C, R>.into(columnName: String): DataFrame<T> = into(pathOf(columnName))
70+
71+
@Refine
72+
@Interpretable("MergeInto0")
4973
public fun <T, C, R> Merge<T, C, R>.into(columnName: String): DataFrame<T> = into(pathOf(columnName))
5074

5175
@AccessApiOverload
52-
public fun <T, C, R> Merge<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> = into(column.path())
76+
public inline fun <T, C, reified R> Merge<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> = into(column.path())
77+
78+
@AccessApiOverload
79+
public inline fun <T, C, reified R> MergeWithTransform<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> =
80+
into(column.path())
5381

5482
public fun <T, C, R> Merge<T, C, R>.intoList(): List<R> =
5583
df.select(selector).rows().map { transform(it, it.values() as List<C>) }
5684

57-
public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> {
85+
public fun <T, C, R> MergeWithTransform<T, C, R>.intoList(): List<R> =
86+
df.select(selector).rows().map { transform(it, it.values() as List<C>) }
87+
88+
public fun <T, C, R> MergeWithTransform<T, C, R>.into(path: ColumnPath): DataFrame<T> {
5889
// If target path exists, merge into temp path
59-
val mergePath = if (df.getColumnOrNull(path) != null) pathOf(nameGenerator().addUnique("temp")) else path
90+
val mergePath = if (df.getColumnOrNull(path) != null) {
91+
pathOf(df.nameGenerator().addUnique("temp"))
92+
} else {
93+
path
94+
}
6095

6196
// move columns into group
6297
val grouped = df.move(selector).under { mergePath }
@@ -82,16 +117,21 @@ public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> {
82117
return res
83118
}
84119

85-
public fun <T, C, R> Merge<T, C, R>.asStrings(): Merge<T, C, String> = by(", ")
120+
public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> =
121+
MergeWithTransform(df, selector, notNull, transform, resultType, infer).into(path)
122+
123+
@Interpretable("MergeId")
124+
public fun <T, C, R> Merge<T, C, R>.asStrings(): MergeWithTransform<T, C, String> = by(", ")
86125

126+
@Interpretable("MergeBy0")
87127
public fun <T, C, R> Merge<T, C, R>.by(
88128
separator: CharSequence = ", ",
89129
prefix: CharSequence = "",
90130
postfix: CharSequence = "",
91131
limit: Int = -1,
92132
truncated: CharSequence = "...",
93-
): Merge<T, C, String> =
94-
Merge(
133+
): MergeWithTransform<T, C, String> =
134+
MergeWithTransform(
95135
df = df,
96136
selector = selector,
97137
notNull = notNull,
@@ -108,7 +148,11 @@ public fun <T, C, R> Merge<T, C, R>.by(
108148
infer = Infer.Nulls,
109149
)
110150

151+
@Interpretable("MergeBy1")
111152
public inline fun <T, C, R, reified V> Merge<T, C, R>.by(
112153
infer: Infer = Infer.Nulls,
113154
crossinline transform: DataRow<T>.(R) -> V,
114-
): Merge<T, C, V> = Merge(df, selector, notNull, { transform(this@by.transform(this, it)) }, typeOf<V>(), infer)
155+
): MergeWithTransform<T, C, V> =
156+
MergeWithTransform(df, selector, notNull, {
157+
transform(this@by.transform(this, it))
158+
}, typeOf<V>(), infer)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ import kotlin.reflect.KProperty
2424

2525
// region DataFrame
2626

27+
@Refine
28+
@Interpretable("RenameMapping")
2729
public fun <T> DataFrame<T>.rename(vararg mappings: Pair<String, String>): DataFrame<T> =
2830
rename { mappings.map { it.first.toColumnAccessor() }.toColumnSet() }
2931
.into(*mappings.map { it.second }.toTypedArray())

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
66
import org.jetbrains.kotlinx.dataframe.DataFrame
77
import org.jetbrains.kotlinx.dataframe.Selector
88
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
9+
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
10+
import org.jetbrains.kotlinx.dataframe.annotations.Refine
911
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1012
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1113
import org.jetbrains.kotlinx.dataframe.impl.api.reorderImpl
@@ -52,6 +54,8 @@ public fun <T, V : Comparable<V>> DataFrame<T>.reorderColumnsBy(
5254
inFrameColumns = atAnyDepth,
5355
).reorderImpl(desc, expression)
5456

57+
@Refine
58+
@Interpretable("ReorderColumnsByName")
5559
public fun <T> DataFrame<T>.reorderColumnsByName(atAnyDepth: Boolean = true, desc: Boolean = false): DataFrame<T> =
5660
reorderColumnsBy(atAnyDepth, desc) { name() }
5761

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dataframe.api.GroupBy
1212
import org.jetbrains.kotlinx.dataframe.api.GroupClause
1313
import org.jetbrains.kotlinx.dataframe.api.InsertClause
1414
import org.jetbrains.kotlinx.dataframe.api.Merge
15+
import org.jetbrains.kotlinx.dataframe.api.MergeWithTransform
1516
import org.jetbrains.kotlinx.dataframe.api.MoveClause
1617
import org.jetbrains.kotlinx.dataframe.api.Pivot
1718
import org.jetbrains.kotlinx.dataframe.api.PivotGroupBy
@@ -165,6 +166,7 @@ public object KotlinNotebookPluginUtils {
165166
is SplitWithTransform<*, *, *>,
166167
is Split<*, *>,
167168
is Merge<*, *, *>,
169+
is MergeWithTransform<*, *, *>,
168170
is Gather<*, *, *, *>,
169171
is Update<*, *>,
170172
is Convert<*, *>,
@@ -213,6 +215,13 @@ public object KotlinNotebookPluginUtils {
213215
),
214216
)
215217

218+
is MergeWithTransform<*, *, *> -> dataframeLike.into(
219+
generateRandomVariationOfColumnName(
220+
"merged",
221+
dataframeLike.df.columnNames(),
222+
),
223+
)
224+
216225
is Gather<*, *, *, *> -> dataframeLike.into(
217226
generateRandomVariationOfColumnName("key", dataframeLike.df.columnNames()),
218227
generateRandomVariationOfColumnName("value", dataframeLike.df.columnNames()),

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
1717
import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns
1818
import org.jetbrains.kotlinx.dataframe.api.GroupBy
1919
import org.jetbrains.kotlinx.dataframe.api.Infer
20+
import org.jetbrains.kotlinx.dataframe.api.Merge
2021
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
2122
import org.jetbrains.kotlinx.dataframe.api.add
2223
import org.jetbrains.kotlinx.dataframe.api.addAll
@@ -190,6 +191,7 @@ import org.jetbrains.kotlinx.dataframe.typeClass
190191
import org.junit.Test
191192
import java.math.BigDecimal
192193
import java.time.LocalDate
194+
import kotlin.reflect.KType
193195
import kotlin.reflect.jvm.jvmErasure
194196
import kotlin.reflect.typeOf
195197

@@ -1397,6 +1399,35 @@ class DataFrameTests : BaseTest() {
13971399
res shouldBe expected
13981400
}
13991401

1402+
@Test
1403+
fun `merge into temp`() {
1404+
dataFrameOf("a", "b", "temp")(1, null, 3)
1405+
.merge { cols("a", "b") }.into("b")
1406+
}
1407+
1408+
inline fun <T, reified C, R> Merge<T, C, R>.typeOfElement() = typeOf<C>()
1409+
1410+
@Test
1411+
fun `merge not null`() {
1412+
val merge = dataFrameOf("a", "b")(1, null).merge { col("a") }
1413+
merge.typeOfElement() shouldBe typeOf<Any?>()
1414+
merge.notNull().typeOfElement() shouldBe typeOf<Any>()
1415+
}
1416+
1417+
inline fun <reified T> List<T>.typeOfElement(): KType = typeOf<List<T>>().arguments[0].type!!
1418+
1419+
@Test
1420+
fun `merge cols into list`() {
1421+
val merge = dataFrameOf("a", "b")(1, null).merge { col("a") }
1422+
merge.intoList().typeOfElement() shouldBe typeOf<List<Any?>>()
1423+
merge.by { it }.intoList().typeOfElement() shouldBe typeOf<List<Any?>>()
1424+
// here we can safely narrow down List<Any?> to List<Any> after notNull because the default transformer creates a List from C
1425+
merge.notNull().intoList().typeOfElement() shouldBe typeOf<List<Any>>()
1426+
// if by notNull could go after by { },
1427+
// we won't be able to do so because non-default transformer could introduce nulls itself:
1428+
merge.notNull().by { listOf(1, null) }.intoList().typeOfElement() shouldBe typeOf<List<Int?>>()
1429+
}
1430+
14001431
@Test
14011432
fun `generic column type`() {
14021433
val d = typed.convert { city }.with { it?.toCharArray()?.toList() ?: emptyList() }

0 commit comments

Comments
 (0)