Fix #1334: Update DataFrame.from_records signature and add tests (#1335)

iclectic · web-flow · commit 3033eea474b7 · 2025-08-25T14:07:32.000-04:00
* feat: add type hints and tests for DataFrame.from_records method * Fix DataFrame.from_records type annotations and add pd.Index test- Change np.ndarray to np_2d_array for data parameter- Change SequenceNotStr[str] to ListLike for columns and exclude parameters- Add test case with pd.Index as columns parameterAddresses review feedback from Dr-Irv * Fix DataFrame.from_records type annotations- Update data parameter to use Sequence[SequenceNotStr] | Sequence[Mapping[str, Scalar]] | Mapping[str, Sequence[Scalar]]- Update columns and exclude parameters to use ListLike | None = None- Update index parameter to use SequenceNotStr for better type precisionAddresses review feedback from Dr-Irv on issue #1334 * feat: add type hints and tests for DataFrame.from_records method- Add np_2darray support to data parameter type annotation- Add comprehensive tests for DataFrame.from_records in test_frame.py- Fix NumPy 2.0 compatibility in test (S1 instead of a1)- Test covers np.ndarray, list of tuples, pd.Index columns, and structured arrays- Addresses GitHub issue #1334 * fix: improve DataFrame.from_records type annotations - Update data parameter types to accept Sequence[Mapping[str, Any]] and Mapping[str, SequenceNotStr[Any]] - Add comprehensive tests for np.ndarray, tuples, and mapping inputs - Address GitHub issue #1334 per Dr-Irv feedback - All 207 DataFrame tests pass with no issues * fix: enhance DataFrame.from_records type annotations per issue #1334 The main changes include: - Updated data parameter types from overly restrictive Scalar to more flexible Any types - Added .reshape(2, 2) to numpy array test to handle CI compatibility issues across different numpy versions - Included a test for mapping of sequences using DataFrame constructor (which seems to be the right approach for that data type) All 207 DataFrame tests still pass * fix: enhance DataFrame.from_records type annotations per issue #1334 Addresses Dr-Irv's feedback: - Updated data parameter types from restrictive Scalar to flexible Any - Added .reshape(2, 2) to numpy test for CI compatibility - Added proper dictionary tests (list and single) without tuple conversion - Added Mapping[str, Any] type support for single dictionaries - Used DataFrame constructor for mapping sequences test All tests pass. * fix: change index parameter to list[str] for type compatibility * fix: update DataFrame.from_records index parameter to accept Hashable values - Change index parameter type from SequenceNotStr[str] to SequenceNotStr[Hashable] - Apply black formatting to test files - Resolves CI type checking issues per Dr-Irv feedback This allows index parameter to accept integers and other hashable values, not just strings, matching pandas runtime behavior. * fix: use DataFrame.from_records instead of DataFrame constructor for mapping dict test. Applied black formatting and pre-commit fixes
diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
@@ -545,10 +545,16 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack):
     @classmethod
     def from_records(
         cls,
-        data,
-        index=...,
-        exclude: SequenceNotStr[str] | None = None,
-        columns: SequenceNotStr[str] | None = None,
+        data: (
+            np_2darray
+            | Sequence[SequenceNotStr]
+            | Sequence[Mapping[str, Any]]
+            | Mapping[str, Any]
+            | Mapping[str, SequenceNotStr[Any]]
+        ),
+        index: str | SequenceNotStr[Hashable] | None = None,
+        columns: ListLike | None = None,
+        exclude: ListLike | None = None,
         coerce_float: bool = False,
         nrows: int | None = None,
     ) -> Self: ...
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -4688,3 +4688,125 @@ def test_unstack() -> None:
         ),
         pd.DataFrame,
     )
+
+
+def test_from_records() -> None:
+
+    # test with np.ndarray
+    arr = np.array([[1, "a"], [2, "b"]], dtype=object).reshape(2, 2)
+    check(assert_type(pd.DataFrame.from_records(arr), pd.DataFrame), pd.DataFrame)
+
+    # testing with list of tuples
+    data_tuples = [(1, "a"), (2, "b"), (3, "c")]
+    check(
+        assert_type(
+            pd.DataFrame.from_records(data_tuples, columns=["id", "name"]),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+    # testing with pd.Index as columns parameter
+    check(
+        assert_type(
+            pd.DataFrame.from_records(data_tuples, columns=pd.Index(["id", "name"])),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+    # Testing with list of tuples (instead of structured array for type compatibility)
+    data_array_tuples = [(1, "a"), (2, "b")]
+    check(
+        assert_type(
+            pd.DataFrame.from_records(data_array_tuples, columns=["id", "name"]),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+    # testing with list of dictionaries
+    data_dict_list = [{"id": 1, "name": "a"}, {"id": 2, "name": "b"}]
+    check(
+        assert_type(
+            pd.DataFrame.from_records(data_dict_list, columns=["id", "name"]),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+    # test with single dictionary
+    data_single_dict = {"id": 1, "name": "a"}
+    check(
+        assert_type(
+            pd.DataFrame.from_records(data_single_dict, index=["0"]), pd.DataFrame
+        ),
+        pd.DataFrame,
+    )
+
+    # testing with mapping of sequences
+    data_mapping_dict = {"id": [1, 2], "name": ["a", "b"]}
+    check(
+        assert_type(pd.DataFrame.from_records(data_mapping_dict), pd.DataFrame),
+        pd.DataFrame,
+    )
+
+    # Testing with index parameter as string
+    check(
+        assert_type(
+            pd.DataFrame.from_records(data_tuples, columns=["id", "name"], index="id"),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+    # Testing with index parameter as sequence
+    check(
+        assert_type(
+            pd.DataFrame.from_records(
+                data_tuples, columns=["id", "name"], index=["id"]
+            ),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+    # Testing  with exclude parameter
+    check(
+        assert_type(
+            pd.DataFrame.from_records(
+                [(1, "a", "extra"), (2, "b", "extra")],
+                columns=["id", "name", "extra"],
+                exclude=["extra"],
+            ),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+    # Testing with all parameters
+    check(
+        assert_type(
+            pd.DataFrame.from_records(
+                data_tuples,
+                index=None,
+                columns=["id", "name"],
+                exclude=None,
+                coerce_float=True,
+                nrows=2,
+            ),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+    # Testing parameter order
+    check(
+        assert_type(
+            pd.DataFrame.from_records(
+                data_tuples, columns=["id", "name"], exclude=None
+            ),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )