Expose DataStreamWriter.PartitionBy(). (#270)

imback82 · web-flow · commit e50d7b24d7e8 · 2019-09-25T21:01:54.000-07:00
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
@@ -0,0 +1,63 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Generic;
+using Microsoft.Spark.Sql;
+using Microsoft.Spark.Sql.Streaming;
+using Xunit;
+
+namespace Microsoft.Spark.E2ETest.IpcTests
+{
+    [Collection("Spark E2E Tests")]
+    public class DataStreamWriterTests
+    {
+        private readonly SparkSession _spark;
+
+        public DataStreamWriterTests(SparkFixture fixture)
+        {
+            _spark = fixture.Spark;
+        }
+
+        /// <summary>
+        /// Test signatures for APIs up to Spark 2.3.*.
+        /// </summary>
+        [Fact]
+        public void TestSignaturesV2_3_X()
+        {
+            DataFrame df = _spark
+                .ReadStream()
+                .Format("rate")
+                .Option("rowsPerSecond", 1)
+                .Load();
+
+            DataStreamWriter dsw = df.WriteStream();
+
+            Assert.IsType<DataStreamWriter>(dsw.OutputMode("append"));
+
+            Assert.IsType<DataStreamWriter>(dsw.OutputMode(OutputMode.Append));
+
+            Assert.IsType<DataStreamWriter>(dsw.Format("json"));
+
+            Assert.IsType<DataStreamWriter>(dsw.Option("stringOption", "value"));
+            Assert.IsType<DataStreamWriter>(dsw.Option("boolOption", true));
+            Assert.IsType<DataStreamWriter>(dsw.Option("longOption", 1L));
+            Assert.IsType<DataStreamWriter>(dsw.Option("doubleOption", 3D));
+
+            Assert.IsType<DataStreamWriter>(
+                dsw.Options(
+                    new Dictionary<string, string>
+                    {
+                        { "option1", "value1" },
+                        { "option2", "value2" }
+                    }));
+
+            Assert.IsType<DataStreamWriter>(dsw.PartitionBy("age"));
+            Assert.IsType<DataStreamWriter>(dsw.PartitionBy("age", "name"));
+
+            Assert.IsType<DataStreamWriter>(dsw.QueryName("queryName"));
+
+            Assert.IsType<DataStreamWriter>(dsw.Trigger(Trigger.Once()));
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/TriggerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/TriggerTests.cs
@@ -11,13 +11,6 @@ namespace Microsoft.Spark.E2ETest.IpcTests
     [Collection("Spark E2E Tests")]
     public class TriggerTests
     {
-        private readonly SparkSession _spark;
-
-        public TriggerTests(SparkFixture fixture)
-        {
-            _spark = fixture.Spark;
-        }
-
         /// <summary>
         /// Test Trigger's static functions
         /// </summary>
diff --git a/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamWriter.cs b/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamWriter.cs
@@ -59,6 +59,18 @@ public DataStreamWriter Format(string source)
             return this;
         }
 
+        /// <summary>
+        /// Partitions the output by the given columns on the file system. If specified,
+        /// the output is laid out on the file system similar to Hive's partitioning scheme.
+        /// </summary>
+        /// <param name="colNames">Column names to partition by</param>
+        /// <returns>This DataStreamWriter object</returns>
+        public DataStreamWriter PartitionBy(params string[] colNames)
+        {
+            _jvmObject.Invoke("partitionBy", (object)colNames);
+            return this;
+        }
+
         /// <summary>
         /// Adds an output option for the underlying data source.
         /// </summary>