-
Notifications
You must be signed in to change notification settings - Fork 327
Support Spark 3.1.1 #836
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support Spark 3.1.1 #836
Changes from all commits
4042538
91be8f4
ca930cf
4379cb4
9ed9061
ad795c2
2886ffd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,7 +30,11 @@ public DeltaTableTests(DeltaFixture fixture) | |
/// Run the end-to-end scenario from the Delta Quickstart tutorial. | ||
/// </summary> | ||
/// <see cref="https://docs.delta.io/latest/quick-start.html"/> | ||
[SkipIfSparkVersionIsLessThan(Versions.V2_4_2)] | ||
/// | ||
/// Delta 0.8.0 is not compatible with Spark 3.1.1 | ||
/// Disable Delta tests that have code paths that create an | ||
/// `org.apache.spark.sql.catalyst.expressions.Alias` object. | ||
[SkipIfSparkVersionIsNotInRange(Versions.V2_4_2, Versions.V3_1_1)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Disable Delta tests that have code path that creates a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should add this comment to the code? |
||
public void TestTutorialScenario() | ||
{ | ||
using var tempDirectory = new TemporaryDirectory(); | ||
|
@@ -223,7 +227,11 @@ void testWrapper( | |
/// <summary> | ||
/// Test that methods return the expected signature. | ||
/// </summary> | ||
[SkipIfSparkVersionIsLessThan(Versions.V2_4_2)] | ||
/// | ||
/// Delta 0.8.0 is not compatible with Spark 3.1.1 | ||
/// Disable Delta tests that have code paths that create an | ||
/// `org.apache.spark.sql.catalyst.expressions.Alias` object. | ||
[SkipIfSparkVersionIsNotInRange(Versions.V2_4_2, Versions.V3_1_1)] | ||
public void TestSignaturesV2_4_X() | ||
{ | ||
using var tempDirectory = new TemporaryDirectory(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,12 +69,7 @@ public void TestSignaturesV2_3_X() | |
|
||
// TODO: Test dfw.Jdbc without running a local db. | ||
|
||
dfw.Option("path", tempDir.Path).SaveAsTable("TestTable"); | ||
|
||
dfw.InsertInto("TestTable"); | ||
|
||
dfw.Option("path", $"{tempDir.Path}TestSavePath1").Save(); | ||
dfw.Save($"{tempDir.Path}TestSavePath2"); | ||
dfw.Save($"{tempDir.Path}TestSavePath1"); | ||
|
||
dfw.Json($"{tempDir.Path}TestJsonPath"); | ||
|
||
|
@@ -85,6 +80,16 @@ public void TestSignaturesV2_3_X() | |
dfw.Text($"{tempDir.Path}TestTextPath"); | ||
|
||
dfw.Csv($"{tempDir.Path}TestCsvPath"); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Setting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here. i would add this to the code. |
||
dfw.Option("path", tempDir.Path).SaveAsTable("TestTable"); | ||
|
||
dfw.InsertInto("TestTable"); | ||
|
||
// In Spark 3.1.1+ setting the `path` Option and then calling .Save(path) is not | ||
// supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set. | ||
// .Json(path), .Parquet(path), etc follow the same code path so the conf | ||
// needs to be set in these scenarios as well. | ||
dfw.Option("path", $"{tempDir.Path}TestSavePath2").Save(); | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,7 +52,6 @@ public void TestSignaturesV2_3_X() | |
})); | ||
|
||
string jsonFilePath = Path.Combine(TestEnvironment.ResourceDirectory, "people.json"); | ||
Assert.IsType<DataFrame>(dsr.Format("json").Option("path", jsonFilePath).Load()); | ||
Assert.IsType<DataFrame>(dsr.Format("json").Load(jsonFilePath)); | ||
Assert.IsType<DataFrame>(dsr.Json(jsonFilePath)); | ||
Assert.IsType<DataFrame>( | ||
|
@@ -63,6 +62,12 @@ public void TestSignaturesV2_3_X() | |
dsr.Parquet(Path.Combine(TestEnvironment.ResourceDirectory, "users.parquet"))); | ||
Assert.IsType<DataFrame> | ||
(dsr.Text(Path.Combine(TestEnvironment.ResourceDirectory, "people.txt"))); | ||
|
||
// In Spark 3.1.1+ setting the `path` Option and then calling .Load(path) is not | ||
// supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set. | ||
// .Json(path), .Parquet(path), etc follow the same code path so the conf | ||
// needs to be set in these scenarios as well. | ||
Assert.IsType<DataFrame>(dsr.Format("json").Option("path", jsonFilePath).Load()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Setting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto. |
||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So it seems that we need 2.0?
Assuming we release 1.1, if the client is using 1.1 for Spark 3.1, and the worker installed is 1.0, things will break. Right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the 1.0.0 Worker will break any time we introduce a version that supports a new minor version of Spark and the user is using that version.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Quick question, why is the above ^ a must? If the Spark minor version doesn't involve changes to how stream is sent, can't we just do what Terry suggests here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No one said it's a must, it's just the current pattern we have it set up with. The suggestion that Terry brought up was already discussed here