dotnet · imback82 · Mar 17, 2021 · Mar 9, 2021 · Mar 9, 2021 · Mar 16, 2021
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -44,6 +44,54 @@ variables:
   forwardCompatibleTestOptions_Windows_3_0_2: "--filter FullyQualifiedName=NONE"
   forwardCompatibleTestOptions_Linux_3_0_2: $(forwardCompatibleTestOptions_Windows_3_0_2)
 
+  # Skip backwardCompatible tests because Microsoft.Spark.Worker requires Spark 3.1 support in
+  # CommandProcessor.cs and TaskContextProcessor.cs. Support added in https://github.com/dotnet/spark/pull/836
+  backwardCompatibleTestOptions_Windows_3_1: "--filter \
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestDataFrameGroupedMapUdf)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestGroupedMapUdf&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfRegistrationWithReturnAsRowType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithArrayChain)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithSimpleArrayType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithMapType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithRowArrayType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithReturnAsMapType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithReturnAsArrayOfArrayType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithArrayOfArrayType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithMapOfMapType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithReturnAsSimpleArrayType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithRowType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfComplexTypesTests.TestUdfWithReturnAsRowType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSerDeTests.TestExternalStaticMethodCall)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSerDeTests.TestInitExternalClassInUdf)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSerDeTests.TestUdfClosure)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithReturnAsDateType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithReturnAsTimestampType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithDateType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithTimestampType)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.BroadcastTests.TestMultipleBroadcast)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.BroadcastTests.TestUnpersist)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.BroadcastTests.TestDestroy)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.PairRDDFunctionsTests.TestCollect)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestPipelinedRDD)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestMap)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestFlatMap)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestMapPartitions)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestMapPartitionsWithIndex)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestTextFile)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.RDDTests.TestFilter)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestDataFrameVectorUdf)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestVectorUdf)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestWithColumn)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestUDF)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.SparkSessionExtensionsTests.TestVersion)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataStreamWriterTests.TestForeachBatch)&\
+  (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataStreamWriterTests.TestForeach)"
+  # Skip all forwardCompatible tests since microsoft-spark-3-1 jar does not get built when
+  # building forwardCompatible repo.
+  forwardCompatibleTestOptions_Windows_3_1: "--filter FullyQualifiedName=NONE"
+  backwardCompatibleTestOptions_Linux_3_1: $(backwardCompatibleTestOptions_Windows_3_1)
+  forwardCompatibleTestOptions_Linux_3_1: $(forwardCompatibleTestOptions_Windows_3_1)
+
   # Azure DevOps variables are transformed into environment variables, with these variables we
   # avoid the first time experience and telemetry to speed up the build.
   DOTNET_CLI_TELEMETRY_OPTOUT: 1
@@ -361,3 +409,13 @@ stages:
         testOptions: ""
         backwardCompatibleTestOptions: $(backwardCompatibleTestOptions_Linux_3_0)
         forwardCompatibleTestOptions: $(forwardCompatibleTestOptions_Linux_3_0_2)
+    - version: '3.1.1'
+      jobOptions:
+      - pool: 'Hosted VS2017'
+        testOptions: ""
+        backwardCompatibleTestOptions: $(backwardCompatibleTestOptions_Windows_3_1)
+        forwardCompatibleTestOptions: $(backwardCompatibleTestOptions_Windows_3_1)
+      - pool: 'Hosted Ubuntu 1604'
+        testOptions: ""
+        backwardCompatibleTestOptions: $(backwardCompatibleTestOptions_Linux_3_1)
+        forwardCompatibleTestOptions: $(forwardCompatibleTestOptions_Linux_3_1)
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs
@@ -30,7 +30,11 @@ public DeltaTableTests(DeltaFixture fixture)
         /// Run the end-to-end scenario from the Delta Quickstart tutorial.
         /// </summary>
         /// <see cref="https://docs.delta.io/latest/quick-start.html"/>
-        [SkipIfSparkVersionIsLessThan(Versions.V2_4_2)]
+        ///
+        /// Delta 0.8.0 is not compatible with Spark 3.1.1
+        /// Disable Delta tests that have code paths that create an
+        /// `org.apache.spark.sql.catalyst.expressions.Alias` object.
+        [SkipIfSparkVersionIsNotInRange(Versions.V2_4_2, Versions.V3_1_1)]
         public void TestTutorialScenario()
         {
             using var tempDirectory = new TemporaryDirectory();
@@ -223,7 +227,11 @@ void testWrapper(
         /// <summary>
         /// Test that methods return the expected signature.
         /// </summary>
-        [SkipIfSparkVersionIsLessThan(Versions.V2_4_2)]
+        ///
+        /// Delta 0.8.0 is not compatible with Spark 3.1.1
+        /// Disable Delta tests that have code paths that create an
+        /// `org.apache.spark.sql.catalyst.expressions.Alias` object.
+        [SkipIfSparkVersionIsNotInRange(Versions.V2_4_2, Versions.V3_1_1)]
         public void TestSignaturesV2_4_X()
         {
             using var tempDirectory = new TemporaryDirectory();

diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs
@@ -69,12 +69,7 @@ public void TestSignaturesV2_3_X()
 
                 // TODO: Test dfw.Jdbc without running a local db.
 
-                dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");
-
-                dfw.InsertInto("TestTable");
-
-                dfw.Option("path", $"{tempDir.Path}TestSavePath1").Save();
-                dfw.Save($"{tempDir.Path}TestSavePath2");
+                dfw.Save($"{tempDir.Path}TestSavePath1");
 
                 dfw.Json($"{tempDir.Path}TestJsonPath");
 
@@ -85,6 +80,16 @@ public void TestSignaturesV2_3_X()
                 dfw.Text($"{tempDir.Path}TestTextPath");
 
                 dfw.Csv($"{tempDir.Path}TestCsvPath");
+
+                dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");
+
+                dfw.InsertInto("TestTable");
+
+                // In Spark 3.1.1+ setting the `path` Option and then calling .Save(path) is not
+                // supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set.
+                // .Json(path), .Parquet(path), etc follow the same code path so the conf
+                // needs to be set in these scenarios as well.
+                dfw.Option("path", $"{tempDir.Path}TestSavePath2").Save();
             }
         }
     }

diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamReaderTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamReaderTests.cs
@@ -52,7 +52,6 @@ public void TestSignaturesV2_3_X()
                     }));
 
             string jsonFilePath = Path.Combine(TestEnvironment.ResourceDirectory, "people.json");
-            Assert.IsType<DataFrame>(dsr.Format("json").Option("path", jsonFilePath).Load());
             Assert.IsType<DataFrame>(dsr.Format("json").Load(jsonFilePath));
             Assert.IsType<DataFrame>(dsr.Json(jsonFilePath));
             Assert.IsType<DataFrame>(
@@ -63,6 +62,12 @@ public void TestSignaturesV2_3_X()
                 dsr.Parquet(Path.Combine(TestEnvironment.ResourceDirectory, "users.parquet")));
             Assert.IsType<DataFrame>
                 (dsr.Text(Path.Combine(TestEnvironment.ResourceDirectory, "people.txt")));
+
+            // In Spark 3.1.1+ setting the `path` Option and then calling .Load(path) is not
+            // supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set.
+            // .Json(path), .Parquet(path), etc follow the same code path so the conf
+            // needs to be set in these scenarios as well.
+            Assert.IsType<DataFrame>(dsr.Format("json").Option("path", jsonFilePath).Load());
         }
     }
 }
diff --git a/src/csharp/Microsoft.Spark.Worker/Processor/CommandProcessor.cs b/src/csharp/Microsoft.Spark.Worker/Processor/CommandProcessor.cs
@@ -99,7 +99,7 @@ private static SqlCommand[] ReadSqlCommands(
             {
                 (2, 3) => SqlCommandProcessorV2_3_X.Process(evalType, stream),
                 (2, 4) => SqlCommandProcessorV2_4_X.Process(evalType, stream),
-                (3, 0) => SqlCommandProcessorV2_4_X.Process(evalType, stream),
+                (3, _) => SqlCommandProcessorV2_4_X.Process(evalType, stream),
                 _ => throw new NotSupportedException($"Spark {version} not supported.")
             };
         }

diff --git a/src/csharp/Microsoft.Spark.Worker/Processor/TaskContextProcessor.cs b/src/csharp/Microsoft.Spark.Worker/Processor/TaskContextProcessor.cs
@@ -23,7 +23,7 @@ internal TaskContext Process(Stream stream)
             {
                 (2, 3) => TaskContextProcessorV2_3_X.Process(stream),
                 (2, 4) => TaskContextProcessorV2_4_X.Process(stream),
-                (3, 0) => TaskContextProcessorV3_0_X.Process(stream),
+                (3, _) => TaskContextProcessorV3_0_X.Process(stream),
                 _ => throw new NotSupportedException($"Spark {_version} not supported.")
             };
         }

diff --git a/src/csharp/Microsoft.Spark/Broadcast.cs b/src/csharp/Microsoft.Spark/Broadcast.cs
@@ -129,7 +129,7 @@ private JvmObjectReference CreateBroadcast(SparkContext sc, T value)
                     CreateBroadcast_V2_3_1_AndBelow(javaSparkContext, value),
                 (2, 3) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value),
                 (2, 4) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value),
-                (3, 0) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value),
+                (3, _) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value),
                 _ => throw new NotSupportedException($"Spark {version} not supported.")
             };
         }

diff --git a/src/csharp/Microsoft.Spark/Sql/DataFrame.cs b/src/csharp/Microsoft.Spark/Sql/DataFrame.cs
@@ -1057,7 +1057,7 @@ private IEnumerable<Row> GetRows(string funcName, params object[] args)
                 // string to use for the authentication.
                 (2, 3, _) => ParseConnectionInfo(result, false),
                 (2, 4, _) => ParseConnectionInfo(result, false),
-                (3, 0, _) => ParseConnectionInfo(result, false),
+                (3, _, _) => ParseConnectionInfo(result, false),
                 _ => throw new NotSupportedException($"Spark {version} not supported.")
             };
         }

diff --git a/src/csharp/Microsoft.Spark/Versions.cs b/src/csharp/Microsoft.Spark/Versions.cs
@@ -13,5 +13,6 @@ internal static class Versions
         internal const string V2_4_0 = "2.4.0";
         internal const string V2_4_2 = "2.4.2";
         internal const string V3_0_0 = "3.0.0";
+        internal const string V3_1_1 = "3.1.1";
     }
 }
diff --git a/src/scala/microsoft-spark-3-0/pom.xml b/src/scala/microsoft-spark-3-0/pom.xml
@@ -10,7 +10,7 @@
   <inceptionYear>2019</inceptionYear>
   <properties>
     <encoding>UTF-8</encoding>
-    <scala.version>2.12.8</scala.version>
+    <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <spark.version>3.0.0</spark.version>
   </properties>

diff --git a/src/scala/pom.xml b/src/scala/pom.xml
@@ -14,6 +14,7 @@
     <module>microsoft-spark-2-3</module>
     <module>microsoft-spark-2-4</module>
     <module>microsoft-spark-3-0</module>
+    <module>microsoft-spark-3-1</module>
   </modules>
 
   <pluginRepositories>