[SPARK-53184][PS] melt when "value" has MultiIndex column labels

xinrong-meng · xinrong-meng · commit 73f8a8483cdc · 2025-08-11T11:29:09.000-07:00
### What changes were proposed in this pull request? Fix the issue when [SPARK-53184][PS] `melt` when "value" has MultiIndex column labels. ### Why are the changes needed? Ensure pandas on spark works well under ANSI ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Existing tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51914 from xinrong-meng/melt_multi. Authored-by: Xinrong Meng <xinrong@apache.org> Signed-off-by: Xinrong Meng <xinrong@apache.org>
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
@@ -10638,15 +10638,20 @@ def melt(
         else:
             var_name = [var_name]  # type: ignore[list-item]
 
-        value_col_types = [
-            self._internal.spark_column_for(label).expr.dataType for label in value_vars
-        ]
-        # If any value column is of StringType, cast all value columns to StringType to avoid
-        # ANSI mode errors during explode - mixing strings and integers.
-        string_cast_required_type = (
-            StringType() if any(isinstance(t, StringType) for t in value_col_types) else None
-        )
         use_cast = is_ansi_mode_enabled(self._internal.spark_frame.sparkSession)
+        string_cast_required_type = None
+        if use_cast:
+            field_by_label = {
+                label: field
+                for label, field in zip(self._internal.column_labels, self._internal.data_fields)
+            }
+
+            value_col_types = [field_by_label[label].spark_type for label in value_vars]
+            # If any value column is of StringType, cast all value columns to StringType to avoid
+            # ANSI mode errors during explode - mixing strings and integers.
+            string_cast_required_type = (
+                StringType() if any(isinstance(t, StringType) for t in value_col_types) else None
+            )
 
         pairs = F.explode(
             F.array(
@@ -13824,16 +13829,12 @@ def _test() -> None:
     import uuid
     from pyspark.sql import SparkSession
     import pyspark.pandas.frame
-    from pyspark.testing.utils import is_ansi_mode_test
 
     os.chdir(os.environ["SPARK_HOME"])
 
     globs = pyspark.pandas.frame.__dict__.copy()
     globs["ps"] = pyspark.pandas
 
-    if is_ansi_mode_test:
-        del pyspark.pandas.frame.DataFrame.melt.__doc__
-
     spark = (
         SparkSession.builder.master("local[4]").appName("pyspark.pandas.frame tests").getOrCreate()
     )
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
@@ -3879,7 +3879,6 @@ def _test() -> None:
     from pyspark.sql import SparkSession
     import pyspark.pandas.namespace
     from pandas.util.version import Version
-    from pyspark.testing.utils import is_ansi_mode_test
 
     os.chdir(os.environ["SPARK_HOME"])
 
@@ -3893,9 +3892,6 @@ def _test() -> None:
     globs["ps"] = pyspark.pandas
     globs["sf"] = F
 
-    if is_ansi_mode_test:
-        del pyspark.pandas.namespace.melt.__doc__
-
     spark = (
         SparkSession.builder.master("local[4]")
         .appName("pyspark.pandas.namespace tests")