Merge branch 'main' into columnApis3.1

imback82 · web-flow · commit 56eb806bd1e9 · 2021-04-06T22:25:09.000-07:00
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamReaderTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamReaderTests.cs
@@ -4,10 +4,13 @@
 
 using System.Collections.Generic;
 using System.IO;
+using System.Linq;
+using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.Sql;
 using Microsoft.Spark.Sql.Streaming;
 using Microsoft.Spark.Sql.Types;
 using Xunit;
+using static Microsoft.Spark.E2ETest.Utils.SQLUtils;
 
 namespace Microsoft.Spark.E2ETest.IpcTests
 {
@@ -69,5 +72,25 @@ public void TestSignaturesV2_3_X()
             // needs to be set in these scenarios as well.
             Assert.IsType<DataFrame>(dsr.Format("json").Option("path", jsonFilePath).Load());
         }
+
+        /// <summary>
+        /// Test signatures for APIs introduced in Spark 3.1.*.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V3_1_0)]
+        public void TestSignaturesV3_1_X()
+        {
+            string tableName = "input_table";
+            WithTable(
+                _spark,
+                new string[] { tableName },
+                () =>
+                {
+                    DataStreamReader dsr = _spark.ReadStream();
+                    var intMemoryStream = new MemoryStream<int>(_spark);
+                    intMemoryStream.AddData(Enumerable.Range(1, 10).ToArray());
+                    intMemoryStream.ToDF().CreateOrReplaceTempView(tableName);
+                    Assert.IsType<DataFrame>(dsr.Table(tableName));
+                });
+        }
     }
 }
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
@@ -12,6 +12,7 @@
 using Microsoft.Spark.Sql.Types;
 using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
+using static Microsoft.Spark.E2ETest.Utils.SQLUtils;
 using static Microsoft.Spark.Sql.Functions;
 
 namespace Microsoft.Spark.E2ETest.IpcTests
@@ -67,6 +68,29 @@ public void TestSignaturesV2_3_X()
             Assert.IsType<DataStreamWriter>(dsw.Trigger(Trigger.Once()));
         }
 
+        /// <summary>
+        /// Test signatures for APIs introduced in Spark 3.1.*.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V3_1_0)]
+        public void TestSignaturesV3_1_X()
+        {
+            string tableName = "output_table";
+            WithTable(
+                _spark,
+                new string[] { tableName },
+                () =>
+                {
+                    using var tempDirectory = new TemporaryDirectory();
+                    var intMemoryStream = new MemoryStream<int>(_spark);
+                    DataStreamWriter dsw = intMemoryStream
+                        .ToDF()
+                        .WriteStream()
+                        .Format("parquet")
+                        .Option("checkpointLocation", tempDirectory.Path);
+                    Assert.IsType<StreamingQuery>(dsw.ToTable(tableName));
+                });
+        }
+
         [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
         public void TestForeachBatch()
         {
diff --git a/src/csharp/Microsoft.Spark.E2ETest/Utils/SQLUtils.cs b/src/csharp/Microsoft.Spark.E2ETest/Utils/SQLUtils.cs
@@ -0,0 +1,32 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Spark.Sql;
+
+namespace Microsoft.Spark.E2ETest.Utils
+{
+    internal static class SQLUtils
+    {
+        /// <summary>
+        /// Drops tables in <paramref name="tableNames"/> after calling <paramref name="action"/>.
+        /// </summary>
+        /// <param name="spark">The <see cref="SparkSession"/></param>
+        /// <param name="tableNames">Names of the tables to drop</param>
+        /// <param name="action"><see cref="Action"/> to execute.</param>
+        public static void WithTable(SparkSession spark, IEnumerable<string> tableNames, Action action)
+        {
+            try
+            {
+                action();
+            }
+            finally
+            {
+                tableNames.ToList().ForEach(name => spark.Sql($"DROP TABLE IF EXISTS {name}"));
+            }
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamReader.cs b/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamReader.cs
@@ -166,6 +166,16 @@ public DataFrame Load(string path) =>
         /// <returns>DataFrame object</returns>
         public DataFrame Parquet(string path) => LoadSource("parquet", path);
 
+        /// <summary>
+        /// Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should
+        /// support streaming mode.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        /// <returns>DataFrame object</returns>
+        [Since(Versions.V3_1_0)]
+        public DataFrame Table(string tableName) =>
+            new DataFrame((JvmObjectReference)_jvmObject.Invoke("table", tableName));
+
         /// <summary>
         /// Loads text files and returns a <see cref="DataFrame"/> whose schema starts
         /// with a string column named "value", and followed by partitioned columns
diff --git a/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamWriter.cs b/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamWriter.cs
@@ -178,6 +178,30 @@ public StreamingQuery Start(string path = null)
             return new StreamingQuery((JvmObjectReference)_jvmObject.Invoke("start"));
         }
 
+        /// <summary>
+        /// Starts the execution of the streaming query, which will continually output results to the
+        /// given table as new data arrives. The returned <see cref="StreamingQuery"/> object can be
+        /// used to interact with the stream.
+        /// </summary>
+        /// <remarks>
+        /// For v1 table, partitioning columns provided by <see cref="PartitionBy(string[])"/> will be
+        /// respected no matter the table exists or not. A new table will be created if the table not
+        /// exists.
+        ///
+        /// For v2 table, <see cref="PartitionBy(string[])"/> will be ignored if the table already exists.
+        /// <see cref="PartitionBy(string[])"/> will be respected only if the v2 table does not exist.
+        /// Besides, the v2 table created by this API lacks some functionalities (e.g., customized
+        /// properties, options, and serde info). If you need them, please create the v2 table manually
+        /// before the execution to avoid creating a table with incomplete information.
+        /// </remarks>
+        /// <param name="tableName">Name of the table</param>
+        /// <returns>StreamingQuery object</returns>
+        [Since(Versions.V3_1_0)]
+        public StreamingQuery ToTable(string tableName)
+        {
+            return new StreamingQuery((JvmObjectReference)_jvmObject.Invoke("toTable", tableName));
+        }
+
         /// <summary>
         /// Sets the output of the streaming query to be processed using the provided
         /// writer object. See <see cref="IForeachWriter"/> for more details on the