From 80edcb3dc568c123df7852ed92380842b89aad25 Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com> Date: Sat, 6 Apr 2024 00:33:14 +0300 Subject: [PATCH 01/26] ENH: Implement PDEP-17 --- doc/source/reference/testing.rst | 6 ++ doc/source/whatsnew/v3.0.0.rst | 6 ++ pandas/_libs/tslibs/timestamps.pyx | 8 ++- pandas/core/frame.py | 49 +++++++++++---- pandas/core/generic.py | 9 +-- pandas/core/groupby/generic.py | 7 ++- pandas/core/indexes/accessors.py | 5 +- pandas/core/internals/api.py | 3 +- pandas/core/reshape/concat.py | 3 +- pandas/core/series.py | 59 ++++++++++++++----- pandas/errors/__init__.py | 36 +++++++++++ pandas/io/feather_format.py | 3 +- pandas/io/parquet.py | 9 ++- pandas/io/parsers/arrow_parser_wrapper.py | 7 ++- .../tests/copy_view/test_copy_deprecation.py | 20 ++++--- pandas/tests/extension/test_arrow.py | 5 +- .../tests/frame/methods/test_reindex_like.py | 6 +- pandas/tests/groupby/test_all_methods.py | 6 +- pandas/tests/groupby/test_apply.py | 4 +- pandas/tests/groupby/test_categorical.py | 8 ++- pandas/tests/groupby/test_groupby_dropna.py | 5 +- pandas/tests/groupby/test_numeric_only.py | 5 +- pandas/tests/groupby/test_raises.py | 27 ++++++--- .../tests/groupby/transform/test_transform.py | 5 +- pandas/tests/io/formats/test_to_markdown.py | 4 +- pandas/tests/io/formats/test_to_string.py | 4 +- .../tests/io/json/test_json_table_schema.py | 4 +- pandas/tests/io/json/test_pandas.py | 43 +++++++------- pandas/tests/io/test_gcs.py | 3 +- pandas/tests/resample/test_datetime_index.py | 3 +- .../scalar/timestamp/test_constructors.py | 9 ++- .../tests/scalar/timestamp/test_timestamp.py | 7 ++- .../series/accessors/test_cat_accessor.py | 4 +- .../series/accessors/test_dt_accessor.py | 3 +- .../tests/series/methods/test_reindex_like.py | 10 ++-- pandas/tests/tools/test_to_datetime.py | 4 +- pandas/tests/util/test_deprecate.py | 10 +++- pandas/tests/util/test_deprecate_kwarg.py | 8 +-- .../test_deprecate_nonkeyword_arguments.py | 17 ++++-- .../util/test_pandas_deprecation_warning.py | 25 ++++++++ pandas/util/_decorators.py | 26 +++++--- 41 files changed, 350 insertions(+), 135 deletions(-) create mode 100644 pandas/tests/util/test_pandas_deprecation_warning.py diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 1f164d1aa98b4..d91c7ba081850 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -51,6 +51,12 @@ Exceptions and warnings errors.OptionError errors.OutOfBoundsDatetime errors.OutOfBoundsTimedelta + errors.PandasChangeWarning + errors.Pandas4Warning + errors.Pandas5Warning + errors.PandasPendingDeprecationWarning + errors.PandasDeprecationWarning + errors.PandasFutureWarning errors.ParserError errors.ParserWarning errors.PerformanceWarning diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 099e5bc48353a..7a213aeaae25b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -24,6 +24,12 @@ Enhancement1 Enhancement2 ^^^^^^^^^^^^ +New Deprecation Policy +^^^^^^^^^^^^^^^^^^^^^^ +pandas 3.0.0 introduces a new 3-stage deprecation policy: using ``DeprecationWarning`` initially, then switching to ``FutureWarning`` for broader visibility in the last minor version before the next major release, and then removal of the deprecated functionality in the major release. + +This was done to give downstream packages more time to adjust to pandas deprecations, which should reduce the amount of warnings that a user gets from code that isn't theirs. + .. _whatsnew_300.enhancements.other: Other enhancements diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 390267db8267f..5af4c80d0de8e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1994,12 +1994,14 @@ class Timestamp(_Timestamp): >>> pd.Timestamp.utcnow() # doctest: +SKIP Timestamp('2020-11-16 22:50:18.092888+0000', tz='UTC') """ + from pandas.errors import Pandas4Warning + warnings.warn( # The stdlib datetime.utcnow is deprecated, so we deprecate to match. # GH#56680 "Timestamp.utcnow is deprecated and will be removed in a future " "version. Use Timestamp.now('UTC') instead.", - FutureWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) return cls.now(UTC) @@ -2036,13 +2038,15 @@ class Timestamp(_Timestamp): >>> pd.Timestamp.utcfromtimestamp(1584199972) Timestamp('2020-03-14 15:32:52+0000', tz='UTC') """ + from pandas.errors import Pandas4Warning + # GH#22451 warnings.warn( # The stdlib datetime.utcfromtimestamp is deprecated, so we deprecate # to match. GH#56680 "Timestamp.utcfromtimestamp is deprecated and will be removed in a " "future version. Use Timestamp.fromtimestamp(ts, 'UTC') instead.", - FutureWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) return cls.fromtimestamp(ts, tz="UTC") diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 44a215ab082b7..c146de59da862 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -55,6 +55,7 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, + Pandas4Warning, ) from pandas.errors.cow import ( _chained_assignment_method_msg, @@ -11909,7 +11910,9 @@ def all( **kwargs, ) -> Series | bool: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="all") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="all" + ) @doc(make_doc("all", ndim=1)) def all( self, @@ -11956,7 +11959,9 @@ def min( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="min") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="min" + ) @doc(make_doc("min", ndim=2)) def min( self, @@ -12003,7 +12008,9 @@ def max( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="max") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="max" + ) @doc(make_doc("max", ndim=2)) def max( self, @@ -12019,7 +12026,9 @@ def max( result = result.__finalize__(self, method="max") return result - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="sum") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="sum" + ) def sum( self, axis: Axis | None = 0, @@ -12120,7 +12129,9 @@ def sum( result = result.__finalize__(self, method="sum") return result - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="prod") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="prod" + ) def prod( self, axis: Axis | None = 0, @@ -12238,7 +12249,9 @@ def mean( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="mean") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="mean" + ) @doc(make_doc("mean", ndim=2)) def mean( self, @@ -12285,7 +12298,9 @@ def median( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="median") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="median" + ) @doc(make_doc("median", ndim=2)) def median( self, @@ -12335,7 +12350,9 @@ def sem( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="sem") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="sem" + ) def sem( self, axis: Axis | None = 0, @@ -12455,7 +12472,9 @@ def var( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="var") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="var" + ) def var( self, axis: Axis | None = 0, @@ -12574,7 +12593,9 @@ def std( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="std") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="std" + ) def std( self, axis: Axis | None = 0, @@ -12697,7 +12718,9 @@ def skew( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="skew") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="skew" + ) def skew( self, axis: Axis | None = 0, @@ -12817,7 +12840,9 @@ def kurt( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="kurt") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="kurt" + ) def kurt( self, axis: Axis | None = 0, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 13585d7de6beb..a6b8bd2983ff5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -90,6 +90,7 @@ AbstractMethodError, ChainedAssignmentError, InvalidIndexError, + Pandas4Warning, ) from pandas.errors.cow import _chained_assignment_method_msg from pandas.util._decorators import ( @@ -2594,7 +2595,7 @@ def to_json( warnings.warn( "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead.", - FutureWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) elif date_format == "epoch": @@ -2602,7 +2603,7 @@ def to_json( warnings.warn( "'epoch' date format is deprecated and will be removed in a future " "version, please use 'iso' date format instead.", - FutureWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) @@ -4381,12 +4382,12 @@ def _check_copy_deprecation(copy): "version. Copy-on-Write is active in pandas since 3.0 which utilizes " "a lazy copy mechanism that defers copies until necessary. Use " ".copy() to make an eager copy if necessary.", - DeprecationWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) # issue 58667 - @deprecate_kwarg("method", None) + @deprecate_kwarg("method", klass=Pandas4Warning, new_arg_name=None) @final def reindex_like( self, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b520ad69aae96..0a7b23a4f0852 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -27,7 +27,10 @@ from pandas._libs import Interval from pandas._libs.hashtable import duplicated -from pandas.errors import SpecificationError +from pandas.errors import ( + Pandas4Warning, + SpecificationError, +) from pandas.util._decorators import ( Appender, Substitution, @@ -3330,7 +3333,7 @@ def corrwith( """ warnings.warn( "DataFrameGroupBy.corrwith is deprecated", - FutureWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) result = self._op_via_apply( diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index c404323a1168c..b9a0af6b0bc28 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -14,6 +14,7 @@ import numpy as np from pandas._libs import lib +from pandas.errors import Pandas4Warning from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( @@ -218,7 +219,7 @@ def to_pytimedelta(self): "in a future version this will return a Series containing python " "datetime.timedelta objects instead of an ndarray. To retain the " "old behavior, call `np.array` on the result", - FutureWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) return cast(ArrowExtensionArray, self._parent.array)._dt_to_pytimedelta() @@ -501,7 +502,7 @@ def to_pytimedelta(self) -> np.ndarray: "in a future version this will return a Series containing python " "datetime.timedelta objects instead of an ndarray. To retain the " "old behavior, call `np.array` on the result", - FutureWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) return self._get_values().to_pytimedelta() diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py index 04944db2ebd9c..077b855771086 100644 --- a/pandas/core/internals/api.py +++ b/pandas/core/internals/api.py @@ -15,6 +15,7 @@ import numpy as np from pandas._libs.internals import BlockPlacement +from pandas.errors import Pandas4Warning from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import ( @@ -93,7 +94,7 @@ def make_block( "make_block is deprecated and will be removed in a future version. " "Use pd.api.internals.create_dataframe_from_blocks or " "(recommended) higher-level public APIs instead.", - DeprecationWarning, + Pandas4Warning, stacklevel=2, ) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 5efaf0dc051bd..c74393363a068 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -17,6 +17,7 @@ import numpy as np from pandas._libs import lib +from pandas.errors import Pandas4Warning from pandas.util._decorators import set_module from pandas.util._exceptions import find_stack_level @@ -392,7 +393,7 @@ def concat( "version. Copy-on-Write is active in pandas since 3.0 which utilizes " "a lazy copy mechanism that defers copies until necessary. Use " ".copy() to make an eager copy if necessary.", - DeprecationWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) if join == "outer": diff --git a/pandas/core/series.py b/pandas/core/series.py index 5ed094349caaa..548b898438c51 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -40,6 +40,7 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, + Pandas4Warning, ) from pandas.errors.cow import ( _chained_assignment_method_msg, @@ -1472,7 +1473,10 @@ def to_string( ) -> None: ... @deprecate_nonkeyword_arguments( - version="4.0", allowed_args=["self", "buf"], name="to_string" + version="4.0", + klass=Pandas4Warning, + allowed_args=["self", "buf"], + name="to_string", ) def to_string( self, @@ -1630,7 +1634,10 @@ def to_markdown( ), ) @deprecate_nonkeyword_arguments( - version="4.0", allowed_args=["self", "buf"], name="to_markdown" + version="4.0", + klass=Pandas4Warning, + allowed_args=["self", "buf"], + name="to_markdown", ) def to_markdown( self, @@ -6643,7 +6650,9 @@ def any( # type: ignore[override] filter_type="bool", ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="all") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="all" + ) @Appender(make_doc("all", ndim=1)) def all( self, @@ -6663,7 +6672,9 @@ def all( filter_type="bool", ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="min") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="min" + ) def min( self, axis: Axis | None = 0, @@ -6734,7 +6745,9 @@ def min( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="max") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="max" + ) def max( self, axis: Axis | None = 0, @@ -6805,7 +6818,9 @@ def max( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="sum") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="sum" + ) def sum( self, axis: Axis | None = None, @@ -6906,7 +6921,9 @@ def sum( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="prod") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="prod" + ) @doc(make_doc("prod", ndim=1)) def prod( self, @@ -6925,7 +6942,9 @@ def prod( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="mean") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="mean" + ) def mean( self, axis: Axis | None = 0, @@ -6979,7 +6998,9 @@ def mean( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="median") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="median" + ) def median( self, axis: Axis | None = 0, @@ -7060,7 +7081,9 @@ def median( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="sem") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="sem" + ) @doc(make_doc("sem", ndim=1)) def sem( self, @@ -7079,7 +7102,9 @@ def sem( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="var") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="var" + ) def var( self, axis: Axis | None = None, @@ -7166,7 +7191,9 @@ def var( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="std") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="std" + ) @doc(make_doc("std", ndim=1)) def std( self, @@ -7185,7 +7212,9 @@ def std( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="skew") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="skew" + ) @doc(make_doc("skew", ndim=1)) def skew( self, @@ -7198,7 +7227,9 @@ def skew( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="kurt") + @deprecate_nonkeyword_arguments( + version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="kurt" + ) def kurt( self, axis: Axis | None = 0, diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 2b5bc450e41d6..1179e5d37f414 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -91,6 +91,42 @@ class PerformanceWarning(Warning): """ +class PandasChangeWarning(Warning): + """ + Warning raised for any pending deprecation. + """ + + +class Pandas4Warning(PandasChangeWarning, DeprecationWarning): + """ + Warning raised for a pending deprecation that will be enforced in pandas 4.0. + """ + + +class Pandas5Warning(PandasChangeWarning, PendingDeprecationWarning): + """ + Warning raised for a pending deprecation that will be enforced in pandas 5.0. + """ + + +class PandasPendingDeprecationWarning(PandasChangeWarning, PendingDeprecationWarning): + """ + Warning raised for a pending deprecation that is a PendingDeprecationWarning. + """ + + +class PandasDeprecationWarning(PandasChangeWarning, DeprecationWarning): + """ + Warning raised for a pending deprecation that is a DeprecationWarning. + """ + + +class PandasFutureWarning(PandasChangeWarning, FutureWarning): + """ + Warning raised for a pending deprecation that is a FutureWarning. + """ + + class UnsupportedFunctionCall(ValueError): """ Exception raised when attempting to call a unsupported numpy function. diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 565c53f0f3fc5..ebb678c26db30 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -12,6 +12,7 @@ from pandas._libs import lib from pandas.compat._optional import import_optional_dependency +from pandas.errors import Pandas4Warning from pandas.util._decorators import doc from pandas.util._validators import check_dtype_backend @@ -144,7 +145,7 @@ def read_feather( warnings.filterwarnings( "ignore", "make_block is deprecated", - DeprecationWarning, + Pandas4Warning, ) return feather.read_feather( diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 6a5a83088e986..dcbb41f9ad233 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -17,7 +17,10 @@ from pandas._libs import lib from pandas.compat._optional import import_optional_dependency -from pandas.errors import AbstractMethodError +from pandas.errors import ( + AbstractMethodError, + Pandas4Warning, +) from pandas.util._decorators import doc from pandas.util._validators import check_dtype_backend @@ -265,7 +268,7 @@ def read( filterwarnings( "ignore", "make_block is deprecated", - DeprecationWarning, + Pandas4Warning, ) result = arrow_table_to_pandas( pa_table, @@ -393,7 +396,7 @@ def read( filterwarnings( "ignore", "make_block is deprecated", - DeprecationWarning, + Pandas4Warning, ) return parquet_file.to_pandas( columns=columns, filters=filters, **kwargs diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 8cadde1ad6537..5edf0432c55fe 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -6,10 +6,13 @@ from pandas._libs import lib from pandas.compat._optional import import_optional_dependency from pandas.errors import ( + Pandas4Warning, ParserError, ParserWarning, ) -from pandas.util._exceptions import find_stack_level +from pandas.util._exceptions import ( + find_stack_level, +) from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.inference import is_integer @@ -286,7 +289,7 @@ def read(self) -> DataFrame: warnings.filterwarnings( "ignore", "make_block is deprecated", - DeprecationWarning, + Pandas4Warning, ) frame = arrow_table_to_pandas( table, dtype_backend=dtype_backend, null_to_int64=True diff --git a/pandas/tests/copy_view/test_copy_deprecation.py b/pandas/tests/copy_view/test_copy_deprecation.py index 8ee37213b92ab..67ee804a755fb 100644 --- a/pandas/tests/copy_view/test_copy_deprecation.py +++ b/pandas/tests/copy_view/test_copy_deprecation.py @@ -1,5 +1,7 @@ import pytest +from pandas.errors import Pandas4Warning + import pandas as pd from pandas import ( concat, @@ -38,11 +40,11 @@ def test_copy_deprecation(meth, kwargs): df = df.set_index(["b", "c"]) if meth != "swaplevel": - with tm.assert_produces_warning(DeprecationWarning, match="copy"): + with tm.assert_produces_warning(Pandas4Warning, match="copy"): getattr(df, meth)(copy=False, **kwargs) if meth != "transpose": - with tm.assert_produces_warning(DeprecationWarning, match="copy"): + with tm.assert_produces_warning(Pandas4Warning, match="copy"): getattr(df.a, meth)(copy=False, **kwargs) @@ -50,22 +52,22 @@ def test_copy_deprecation_reindex_like_align(): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) # Somehow the stack level check is incorrect here with tm.assert_produces_warning( - DeprecationWarning, match="copy", check_stacklevel=False + Pandas4Warning, match="copy", check_stacklevel=False ): df.reindex_like(df, copy=False) with tm.assert_produces_warning( - DeprecationWarning, match="copy", check_stacklevel=False + Pandas4Warning, match="copy", check_stacklevel=False ): df.a.reindex_like(df.a, copy=False) with tm.assert_produces_warning( - DeprecationWarning, match="copy", check_stacklevel=False + Pandas4Warning, match="copy", check_stacklevel=False ): df.align(df, copy=False) with tm.assert_produces_warning( - DeprecationWarning, match="copy", check_stacklevel=False + Pandas4Warning, match="copy", check_stacklevel=False ): df.a.align(df.a, copy=False) @@ -74,16 +76,16 @@ def test_copy_deprecation_merge_concat(): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) with tm.assert_produces_warning( - DeprecationWarning, match="copy", check_stacklevel=False + Pandas4Warning, match="copy", check_stacklevel=False ): df.merge(df, copy=False) with tm.assert_produces_warning( - DeprecationWarning, match="copy", check_stacklevel=False + Pandas4Warning, match="copy", check_stacklevel=False ): merge(df, df, copy=False) with tm.assert_produces_warning( - DeprecationWarning, match="copy", check_stacklevel=False + Pandas4Warning, match="copy", check_stacklevel=False ): concat([df, df], copy=False) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fc5930ebcd8ac..2aa7aec23c707 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -45,6 +45,7 @@ pa_version_under19p0, pa_version_under20p0, ) +from pandas.errors import Pandas4Warning from pandas.core.dtypes.dtypes import ( ArrowDtype, @@ -2849,14 +2850,14 @@ def test_dt_to_pytimedelta(): ser = pd.Series(data, dtype=ArrowDtype(pa.duration("ns"))) msg = "The behavior of ArrowTemporalProperties.to_pytimedelta is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): result = ser.dt.to_pytimedelta() expected = np.array(data, dtype=object) tm.assert_numpy_array_equal(result, expected) assert all(type(res) is timedelta for res in result) msg = "The behavior of TimedeltaProperties.to_pytimedelta is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): expected = ser.astype("timedelta64[ns]").dt.to_pytimedelta() tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reindex_like.py b/pandas/tests/frame/methods/test_reindex_like.py index 03968dcbb6314..73e3d2ecd6215 100644 --- a/pandas/tests/frame/methods/test_reindex_like.py +++ b/pandas/tests/frame/methods/test_reindex_like.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import DataFrame import pandas._testing as tm @@ -22,10 +24,10 @@ def test_reindex_like(self, float_frame): def test_reindex_like_methods(self, method, expected_values): df = DataFrame({"x": list(range(5))}) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(Pandas4Warning): result = df.reindex_like(df, method=method, tolerance=0) tm.assert_frame_equal(df, result) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(Pandas4Warning): result = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) tm.assert_frame_equal(df, result) diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py index 4625c5c27a803..2310c3bf59e15 100644 --- a/pandas/tests/groupby/test_all_methods.py +++ b/pandas/tests/groupby/test_all_methods.py @@ -13,6 +13,8 @@ import pytest +from pandas.errors import Pandas4Warning + import pandas as pd from pandas import DataFrame import pandas._testing as tm @@ -26,7 +28,7 @@ def test_multiindex_group_all_columns_when_empty(groupby_func): method = getattr(gb, groupby_func) args = get_groupby_method_args(groupby_func, df) if groupby_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None @@ -72,7 +74,7 @@ def test_dup_labels_output_shape(groupby_func, idx): args = get_groupby_method_args(groupby_func, df) if groupby_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 5bf16ee9ad0b8..635393e41bd9d 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + import pandas as pd from pandas import ( DataFrame, @@ -1109,7 +1111,7 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): grp = df.groupby(by="a") args = get_groupby_method_args(reduction_func, df) if reduction_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index cae3013642739..97c2be7584837 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + import pandas as pd from pandas import ( Categorical, @@ -1488,7 +1490,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun args = get_groupby_method_args(reduction_func, df) if reduction_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None @@ -1534,7 +1536,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( return if reduction_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None @@ -1933,7 +1935,7 @@ def test_category_order_reducer( getattr(gb, reduction_func)(*args) return if reduction_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 8c4ab42b7be7a..c6368385602f5 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -2,6 +2,7 @@ import pytest from pandas.compat.pyarrow import pa_version_under10p1 +from pandas.errors import Pandas4Warning from pandas.core.dtypes.missing import na_value_for_dtype @@ -542,7 +543,7 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki gb_filled = df_filled.groupby(keys, observed=observed, sort=sort, as_index=True) if reduction_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None @@ -573,7 +574,7 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki expected = expected["size"].rename(None) if reduction_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py index 99a88a5d8fe7c..b79ca8bf1ee3a 100644 --- a/pandas/tests/groupby/test_numeric_only.py +++ b/pandas/tests/groupby/test_numeric_only.py @@ -3,6 +3,7 @@ import pytest from pandas._libs import lib +from pandas.errors import Pandas4Warning import pandas as pd from pandas import ( @@ -266,7 +267,7 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys): if has_arg and numeric_only is True: # Cases where b does not appear in the result if kernel == "corrwith": - warn = FutureWarning + warn = Pandas4Warning msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None @@ -311,7 +312,7 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys): msg = "'>' not supported between instances of 'type' and 'type'" with pytest.raises(exception, match=msg): if kernel == "corrwith": - warn = FutureWarning + warn = Pandas4Warning msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 864b9e5d55991..1473b95bf9476 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import ( Categorical, DataFrame, @@ -84,9 +86,12 @@ def df_with_cat_col(): return df -def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""): - warn_klass = None if warn_msg == "" else FutureWarning - with tm.assert_produces_warning(warn_klass, match=warn_msg, check_stacklevel=False): +def _call_and_check( + klass, msg, how, gb, groupby_func, args, warn_category=None, warn_msg="" +): + with tm.assert_produces_warning( + warn_category, match=warn_msg, check_stacklevel=False + ): if klass is None: if how == "method": getattr(gb, groupby_func)(*args) @@ -213,10 +218,12 @@ def test_groupby_raises_string( msg = "Cannot perform reduction 'mean' with string dtype" if groupby_func == "corrwith": + warn_category = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: + warn_category = None warn_msg = "" - _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg) + _call_and_check(klass, msg, how, gb, groupby_func, args, warn_category, warn_msg) @pytest.mark.parametrize("how", ["agg", "transform"]) @@ -337,10 +344,12 @@ def test_groupby_raises_datetime( }[groupby_func] if groupby_func == "corrwith": + warn_category = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: + warn_category = None warn_msg = "" - _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=warn_msg) + _call_and_check(klass, msg, how, gb, groupby_func, args, warn_category, warn_msg) @pytest.mark.parametrize("how", ["agg", "transform"]) @@ -541,10 +550,12 @@ def test_groupby_raises_category( }[groupby_func] if groupby_func == "corrwith": + warn_category = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: + warn_category = None warn_msg = "" - _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg) + _call_and_check(klass, msg, how, gb, groupby_func, args, warn_category, warn_msg) @pytest.mark.parametrize("how", ["agg", "transform"]) @@ -724,7 +735,9 @@ def test_groupby_raises_category_on_category( }[groupby_func] if groupby_func == "corrwith": + warn_category = Pandas4Warning warn_msg = "DataFrameGroupBy.corrwith is deprecated" else: + warn_category = None warn_msg = "" - _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg) + _call_and_check(klass, msg, how, gb, groupby_func, args, warn_category, warn_msg) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index fecd20fd6cece..59092385fbf3b 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -4,6 +4,7 @@ import pytest from pandas._libs import lib +from pandas.errors import Pandas4Warning from pandas.core.dtypes.common import ensure_platform_int @@ -1103,7 +1104,7 @@ def test_transform_agg_by_name(request, reduction_func, frame_or_series): args = get_groupby_method_args(reduction_func, obj) if func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None @@ -1470,7 +1471,7 @@ def test_as_index_no_change(keys, df, groupby_func): gb_as_index_true = df.groupby(keys, as_index=True) gb_as_index_false = df.groupby(keys, as_index=False) if groupby_func == "corrwith": - warn = FutureWarning + warn = Pandas4Warning msg = "DataFrameGroupBy.corrwith is deprecated" else: warn = None diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py index f3d9b88cc91e2..9b1e22ce1bb06 100644 --- a/pandas/tests/io/formats/test_to_markdown.py +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -2,6 +2,8 @@ import pytest +from pandas.errors import Pandas4Warning + import pandas as pd import pandas._testing as tm @@ -15,7 +17,7 @@ def test_keyword_deprecation(): "except for the argument 'buf' will be keyword-only." ) s = pd.Series() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): s.to_markdown(None, "wt") diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 0866581535c2f..1d414cef152c7 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -12,6 +12,8 @@ from pandas._config import using_string_dtype +from pandas.errors import Pandas4Warning + from pandas import ( CategoricalIndex, DataFrame, @@ -42,7 +44,7 @@ def test_keyword_deprecation(self): "except for the argument 'buf' will be keyword-only." ) s = Series(["a", "b"]) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): s.to_string(None, "NaN") def test_to_string_masked_ea_with_formatter(self): diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 7936982e4a055..769249ef4c47b 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -460,7 +462,7 @@ def test_date_format_raises(self, df_table): "version, please use 'iso' date format instead." ) with pytest.raises(ValueError, match=error_msg): - with tm.assert_produces_warning(FutureWarning, match=warning_msg): + with tm.assert_produces_warning(Pandas4Warning, match=warning_msg): df_table.to_json(orient="table", date_format="epoch") # others work diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 32eeb30de4b69..62f0cc50aea64 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -15,6 +15,7 @@ from pandas._config import using_string_dtype from pandas.compat import IS64 +from pandas.errors import Pandas4Warning import pandas.util._test_decorators as td import pandas as pd @@ -145,7 +146,7 @@ def test_frame_non_unique_columns(self, orient, data, request): "in a future version, please use 'iso' date format instead." ) if df.iloc[:, 0].dtype == "datetime64[s]": - expected_warning = FutureWarning + expected_warning = Pandas4Warning with tm.assert_produces_warning(expected_warning, match=msg): result = read_json( @@ -780,7 +781,7 @@ def test_series_with_dtype_datetime(self, dtype, expected): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): data = StringIO(s.to_json()) result = read_json(data, typ="series", dtype=dtype) tm.assert_series_equal(result, expected) @@ -831,13 +832,13 @@ def test_convert_dates(self, datetime_series, datetime_frame): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): json = StringIO(df.to_json()) result = read_json(json) tm.assert_frame_equal(result, df) df["foo"] = 1.0 - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): json = StringIO(df.to_json(date_unit="ns")) result = read_json(json, convert_dates=False) @@ -848,7 +849,7 @@ def test_convert_dates(self, datetime_series, datetime_frame): # series ts = Series(Timestamp("20130101").as_unit("ns"), index=datetime_series.index) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): json = StringIO(ts.to_json()) result = read_json(json, typ="series") tm.assert_series_equal(result, ts) @@ -870,7 +871,7 @@ def test_date_index_and_values(self, date_format, as_object, date_typ): expected_warning = None if date_format == "epoch": expected = '{"1577836800000":1577836800000,"null":null}' - expected_warning = FutureWarning + expected_warning = Pandas4Warning else: expected = ( '{"2020-01-01T00:00:00.000":"2020-01-01T00:00:00.000","null":null}' @@ -984,7 +985,7 @@ def test_date_unit(self, unit, datetime_frame): "'epoch' date format is deprecated and will be removed in a future " "version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): json = df.to_json(date_format="epoch", date_unit=unit) # force date unit @@ -1004,13 +1005,13 @@ def test_date_unit(self, unit, datetime_frame): DataFrame( {"A": ["a", "b", "c"], "B": pd.to_timedelta(np.arange(3), unit="D")} ), - FutureWarning, + Pandas4Warning, ), ( DataFrame( {"A": pd.to_datetime(["2020-01-01", "2020-02-01", "2020-03-01"])} ), - FutureWarning, + Pandas4Warning, ), ], ) @@ -1096,7 +1097,7 @@ def test_doc_example(self): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): json = StringIO(dfj2.to_json()) result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) tm.assert_frame_equal(result, result) @@ -1138,20 +1139,20 @@ def test_timedelta(self): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): result = read_json(StringIO(ser.to_json()), typ="series").apply(converter) tm.assert_series_equal(result, ser) ser = Series([timedelta(23), timedelta(seconds=5)], index=Index([0, 1])) assert ser.dtype == "timedelta64[ns]" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): result = read_json(StringIO(ser.to_json()), typ="series").apply(converter) tm.assert_series_equal(result, ser) frame = DataFrame([timedelta(23), timedelta(seconds=5)]) assert frame[0].dtype == "timedelta64[ns]" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): json = frame.to_json() tm.assert_frame_equal(frame, read_json(StringIO(json)).apply(converter)) @@ -1167,7 +1168,7 @@ def test_timedelta2(self): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): data = StringIO(frame.to_json(date_unit="ns")) result = read_json(data) result["a"] = pd.to_timedelta(result.a, unit="ns") @@ -1202,7 +1203,7 @@ def test_timedelta_to_json(self, as_object, date_format, timedelta_typ): '{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}' ) else: - expected_warning = FutureWarning + expected_warning = Pandas4Warning expected = '{"86400000":86400000,"172800000":172800000,"null":null}' if as_object: @@ -1221,7 +1222,7 @@ def test_timedelta_to_json(self, as_object, date_format, timedelta_typ): def test_timedelta_to_json_fractional_precision(self, as_object, timedelta_typ): data = [timedelta_typ(milliseconds=42)] ser = Series(data, index=data) - warn = FutureWarning + warn = Pandas4Warning if as_object: ser = ser.astype(object) warn = None @@ -1315,13 +1316,13 @@ def test_datetime_tz(self): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): expected = df_naive.to_json() assert expected == df.to_json() stz = Series(tz_range) s_naive = Series(tz_naive) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): assert stz.to_json() == s_naive.to_json() def test_sparse(self): @@ -1588,7 +1589,7 @@ def test_to_json_from_json_columns_dtypes(self, orient): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): dfjson = expected.to_json(orient=orient) result = read_json( @@ -1790,7 +1791,7 @@ def test_timedelta_as_label(self, date_format, key): expected_warning = None if date_format == "epoch": - expected_warning = FutureWarning + expected_warning = Pandas4Warning msg = ( "'epoch' date format is deprecated and will be removed in a future " @@ -2032,7 +2033,7 @@ def test_json_pandas_nulls(self, nulls_fixture): "in a future version, please use 'iso' date format instead." ) if nulls_fixture is pd.NaT: - expected_warning = FutureWarning + expected_warning = Pandas4Warning with tm.assert_produces_warning(expected_warning, match=msg): result = DataFrame([[nulls_fixture]]).to_json() diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index f68ef5fa2e0e5..f5a2a811ff433 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -8,6 +8,7 @@ import pytest from pandas.compat.pyarrow import pa_version_under17p0 +from pandas.errors import Pandas4Warning from pandas import ( DataFrame, @@ -84,7 +85,7 @@ def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys, request): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): df1.to_json(path) df2 = read_json(path, convert_dates=["dt"]) elif format == "parquet": diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index f871c0bf0218c..a3f97c818e335 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -8,6 +8,7 @@ from pandas._libs import lib from pandas._typing import DatetimeNaTType from pandas.compat import is_platform_windows +from pandas.errors import Pandas4Warning import pandas.util._test_decorators as td import pandas as pd @@ -1313,7 +1314,7 @@ def test_resample_consistency(unit): s10 = s.reindex(index=i10, method="bfill") s10_2 = s.reindex(index=i10, method="bfill", limit=2) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(Pandas4Warning): rl = s.reindex_like(s10, method="bfill", limit=2) r10_2 = s.resample("10Min").bfill(limit=2) r10 = s.resample("10Min").bfill() diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 2c97c4a32e0aa..be624855ca2a5 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -17,7 +17,10 @@ import pytest from pandas._libs.tslibs.dtypes import NpyDatetimeUnit -from pandas.errors import OutOfBoundsDatetime +from pandas.errors import ( + OutOfBoundsDatetime, + Pandas4Warning, +) from pandas import ( NA, @@ -325,13 +328,13 @@ class TestTimestampClassMethodConstructors: def test_utcnow_deprecated(self): # GH#56680 msg = "Timestamp.utcnow is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): Timestamp.utcnow() def test_utcfromtimestamp_deprecated(self): # GH#56680 msg = "Timestamp.utcfromtimestamp is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): Timestamp.utcfromtimestamp(43) def test_constructor_strptime(self): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 38d0ddfbc13bd..345386025b209 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -30,6 +30,7 @@ tz_compare, ) from pandas.compat import IS64 +from pandas.errors import Pandas4Warning from pandas import ( NaT, @@ -268,7 +269,7 @@ def test_disallow_setting_tz(self, tz): def test_default_to_stdlib_utc(self): msg = "Timestamp.utcnow is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): assert Timestamp.utcnow().tz is timezone.utc assert Timestamp.now("UTC").tz is timezone.utc assert Timestamp("2016-01-01", tz="UTC").tz is timezone.utc @@ -313,13 +314,13 @@ def compare(x, y): compare(Timestamp.now("UTC"), datetime.now(timezone.utc)) compare(Timestamp.now("UTC"), datetime.now(tzutc())) msg = "Timestamp.utcnow is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): compare(Timestamp.utcnow(), datetime.now(timezone.utc)) compare(Timestamp.today(), datetime.today()) current_time = calendar.timegm(datetime.now().utctimetuple()) msg = "Timestamp.utcfromtimestamp is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): ts_utc = Timestamp.utcfromtimestamp(current_time) assert ts_utc.timestamp() == current_time compare( diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index f017ccd963972..7fb33f7d09d97 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import ( Categorical, DataFrame, @@ -202,7 +204,7 @@ def test_dt_accessor_api_for_categorical(self, idx): warn_cls.append(UserWarning) elif func == "to_pytimedelta": # GH 57463 - warn_cls.append(FutureWarning) + warn_cls.append(Pandas4Warning) if warn_cls: warn_cls = tuple(warn_cls) else: diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 2c441a6ed91c1..f69c90ced2828 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -11,6 +11,7 @@ import pytest from pandas._libs.tslibs.timezones import maybe_get_tz +from pandas.errors import Pandas4Warning from pandas.core.dtypes.common import ( is_integer_dtype, @@ -193,7 +194,7 @@ def test_dt_namespace_accessor_timedelta(self): tm.assert_index_equal(result.index, ser.index) msg = "The behavior of TimedeltaProperties.to_pytimedelta is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): result = ser.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object diff --git a/pandas/tests/series/methods/test_reindex_like.py b/pandas/tests/series/methods/test_reindex_like.py index 10b8ac5817636..15cd1502dadfb 100644 --- a/pandas/tests/series/methods/test_reindex_like.py +++ b/pandas/tests/series/methods/test_reindex_like.py @@ -2,6 +2,8 @@ import numpy as np +from pandas.errors import Pandas4Warning + from pandas import Series import pandas._testing as tm @@ -20,7 +22,7 @@ def test_reindex_like(datetime_series): series1 = Series([5, None, None], [day1, day2, day3]) series2 = Series([None, None], [day1, day3]) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(Pandas4Warning): result = series1.reindex_like(series2, method="pad") expected = Series([5, np.nan], index=[day1, day3]) tm.assert_series_equal(result, expected) @@ -33,13 +35,13 @@ def test_reindex_like_nearest(): other = ser.reindex(target, method="nearest") expected = Series(np.around(target).astype("int64"), target) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(Pandas4Warning): result = ser.reindex_like(other, method="nearest") tm.assert_series_equal(expected, result) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(Pandas4Warning): result = ser.reindex_like(other, method="nearest", tolerance=1) tm.assert_series_equal(expected, result) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(Pandas4Warning): result = ser.reindex_like(other, method="nearest", tolerance=[1, 2, 3, 4]) tm.assert_series_equal(expected, result) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b02fab70fb825..74f02a0b33396 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1051,7 +1051,9 @@ def test_to_datetime_today(self, tz): def test_to_datetime_today_now_unicode_bytes(self, arg): to_datetime([arg]) - @pytest.mark.filterwarnings("ignore:Timestamp.utcnow is deprecated:FutureWarning") + @pytest.mark.filterwarnings( + "ignore:Timestamp.utcnow is deprecated:DeprecationWarning" + ) @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") @pytest.mark.parametrize( "format, expected_ds", diff --git a/pandas/tests/util/test_deprecate.py b/pandas/tests/util/test_deprecate.py index 92f422b8269f5..bdbf2ca2d028d 100644 --- a/pandas/tests/util/test_deprecate.py +++ b/pandas/tests/util/test_deprecate.py @@ -37,7 +37,9 @@ def new_func_with_deprecation(): def test_deprecate_ok(): - depr_func = deprecate("depr_func", new_func, "1.0", msg="Use new_func instead.") + depr_func = deprecate( + "depr_func", new_func, "1.0", msg="Use new_func instead.", klass=FutureWarning + ) with tm.assert_produces_warning(FutureWarning): result = depr_func() @@ -48,7 +50,11 @@ def test_deprecate_ok(): def test_deprecate_no_docstring(): depr_func = deprecate( - "depr_func", new_func_no_docstring, "1.0", msg="Use new_func instead." + "depr_func", + new_func_no_docstring, + "1.0", + msg="Use new_func instead.", + klass=FutureWarning, ) with tm.assert_produces_warning(FutureWarning): result = depr_func() diff --git a/pandas/tests/util/test_deprecate_kwarg.py b/pandas/tests/util/test_deprecate_kwarg.py index b165e9fba0e4f..bbd37a379ede1 100644 --- a/pandas/tests/util/test_deprecate_kwarg.py +++ b/pandas/tests/util/test_deprecate_kwarg.py @@ -5,7 +5,7 @@ import pandas._testing as tm -@deprecate_kwarg("old", "new") +@deprecate_kwarg("old", new_arg_name="new", klass=FutureWarning) def _f1(new=False): return new @@ -13,7 +13,7 @@ def _f1(new=False): _f2_mappings = {"yes": True, "no": False} -@deprecate_kwarg("old", "new", _f2_mappings) +@deprecate_kwarg("old", new_arg_name="new", mapping=_f2_mappings, klass=FutureWarning) def _f2(new=False): return new @@ -22,7 +22,7 @@ def _f3_mapping(x): return x + 1 -@deprecate_kwarg("old", "new", _f3_mapping) +@deprecate_kwarg("old", new_arg_name="new", mapping=_f3_mapping, klass=FutureWarning) def _f3(new=0): return new @@ -70,7 +70,7 @@ def f4(new=None): return new -@deprecate_kwarg("old", None) +@deprecate_kwarg("old", new_arg_name=None, klass=FutureWarning) def _f4(old=True, unchanged=True): return old, unchanged diff --git a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py index f81d32b574682..ae2bd0b487019 100644 --- a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py +++ b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py @@ -10,7 +10,7 @@ @deprecate_nonkeyword_arguments( - version="1.1", allowed_args=["a", "b"], name="f_add_inputs" + version="1.1", allowed_args=["a", "b"], name="f_add_inputs", klass=FutureWarning ) def f(a, b=0, c=0, d=0): return a + b + c + d @@ -59,7 +59,7 @@ def test_three_arguments_with_name_in_warning(): assert f(6, 3, 3) == 12 -@deprecate_nonkeyword_arguments(version="1.1") +@deprecate_nonkeyword_arguments(version="1.1", klass=FutureWarning) def g(a, b=0, c=0, d=0): with tm.assert_produces_warning(None): return a + b + c + d @@ -88,7 +88,7 @@ def test_three_positional_argument_with_warning_message_analysis(): assert g(6, 3, 3) == 12 -@deprecate_nonkeyword_arguments(version="1.1") +@deprecate_nonkeyword_arguments(version="1.1", klass=FutureWarning) def h(a=0, b=0, c=0, d=0): return a + b + c + d @@ -113,7 +113,7 @@ def test_one_positional_argument_with_warning_message_analysis(): assert h(19) == 19 -@deprecate_nonkeyword_arguments(version="1.1") +@deprecate_nonkeyword_arguments(version="1.1", klass=UserWarning) def i(a=0, /, b=0, *, c=0, d=0): return a + b + c + d @@ -122,8 +122,15 @@ def test_i_signature(): assert str(inspect.signature(i)) == "(*, a=0, b=0, c=0, d=0)" +def test_i_warns_klass(): + with tm.assert_produces_warning(UserWarning): + assert i(1, 2) == 3 + + class Foo: - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "bar"]) + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "bar"], klass=FutureWarning + ) def baz(self, bar=None, foobar=None): ... diff --git a/pandas/tests/util/test_pandas_deprecation_warning.py b/pandas/tests/util/test_pandas_deprecation_warning.py new file mode 100644 index 0000000000000..55b132f843adc --- /dev/null +++ b/pandas/tests/util/test_pandas_deprecation_warning.py @@ -0,0 +1,25 @@ +import warnings + +from pandas.errors import PandasChangeWarning +from pandas.util._decorators import deprecate_kwarg + +import pandas._testing as tm + + +def f1(): + warnings.warn("f1", PandasChangeWarning) + + +def test_function_warns_pandas_deprecation_warning(): + with tm.assert_produces_warning(PandasChangeWarning): + f1() + + +@deprecate_kwarg("old", klass=PandasChangeWarning, new_arg_name="new") +def f2(new=0): + return new + + +def test_decorator_warns_pandas_deprecation_warning(): + with tm.assert_produces_warning(PandasChangeWarning): + f2(old=1) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index a1a0d51a7c72b..120ad166004c6 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -28,8 +28,8 @@ def deprecate( name: str, alternative: Callable[..., Any], version: str, + klass: type[Warning], alt_name: str | None = None, - klass: type[Warning] | None = None, stacklevel: int = 2, msg: str | None = None, ) -> Callable[[F], F]: @@ -52,14 +52,14 @@ def deprecate( Version of pandas in which the method has been deprecated. alt_name : str, optional Name to use in preference of alternative.__name__. - klass : Warning, default FutureWarning + klass : Warning, optional + The warning class to use. stacklevel : int, default 2 msg : str The message to display in the warning. Default is '{name} is deprecated. Use {alt_name} instead.' """ alt_name = alt_name or alternative.__name__ - klass = klass or FutureWarning warning_msg = msg or f"{name} is deprecated, use {alt_name} instead." @wraps(alternative) @@ -101,6 +101,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: def deprecate_kwarg( old_arg_name: str, + klass: type[Warning], new_arg_name: str | None, mapping: Mapping[Any, Any] | Callable[[Any], Any] | None = None, stacklevel: int = 2, @@ -111,7 +112,7 @@ def deprecate_kwarg( Parameters ---------- old_arg_name : str - Name of argument in function to deprecate + Name of argument in function to deprecate. new_arg_name : str or None Name of preferred argument in function. Use None to raise warning that ``old_arg_name`` keyword is deprecated. @@ -119,6 +120,9 @@ def deprecate_kwarg( If mapping is present, use it to translate old arguments to new arguments. A callable must do its own value checking; values not found in a dict will be forwarded unchanged. + klass : Warning, optional + The warning class to use. + stacklevel : int, default 2 Examples -------- @@ -153,14 +157,14 @@ def deprecate_kwarg( ... print(cols) >>> f(cols="should raise warning") # doctest: +SKIP FutureWarning: the 'cols' keyword is deprecated and will be removed in a - future version please takes steps to stop use of 'cols' + future version. Please take steps to stop the use of 'cols' should raise warning >>> f(another_param="should not raise warning") # doctest: +SKIP should not raise warning >>> f(cols="should raise warning", another_param="") # doctest: +SKIP FutureWarning: the 'cols' keyword is deprecated and will be removed in a - future version please takes steps to stop use of 'cols' + future version. Please take steps to stop the use of 'cols' should raise warning """ if mapping is not None and not hasattr(mapping, "get") and not callable(mapping): @@ -180,7 +184,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: "will be removed in a future version. Please take " f"steps to stop the use of {old_arg_name!r}" ) - warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + warnings.warn(msg, klass, stacklevel=stacklevel) kwargs[old_arg_name] = old_arg_value return func(*args, **kwargs) @@ -201,7 +205,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: f"use {new_arg_name!r} instead." ) - warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + warnings.warn(msg, klass, stacklevel=stacklevel) if kwargs.get(new_arg_name) is not None: msg = ( f"Can only specify {old_arg_name!r} " @@ -264,6 +268,7 @@ def future_version_msg(version: str | None) -> str: def deprecate_nonkeyword_arguments( version: str | None, + klass: type[Warning], allowed_args: list[str] | None = None, name: str | None = None, ) -> Callable[[F], F]: @@ -288,6 +293,9 @@ def deprecate_nonkeyword_arguments( The specific name of the function to show in the warning message. If None, then the Qualified name of the function is used. + + klass : Warning, optional + The warning class to use. """ def decorate(func): @@ -326,7 +334,7 @@ def wrapper(*args, **kwargs): if len(args) > num_allow_args: warnings.warn( msg.format(arguments=_format_argument_list(allow_args)), - FutureWarning, + klass, stacklevel=find_stack_level(), ) return func(*args, **kwargs) From 054398dfb6f50e4a2ffb28a5f337cbdc6af004e9 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 20 May 2025 18:30:26 -0400 Subject: [PATCH 02/26] Refinements --- pandas/errors/__init__.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 1179e5d37f414..2ed1cc65d3c68 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -93,37 +93,37 @@ class PerformanceWarning(Warning): class PandasChangeWarning(Warning): """ - Warning raised for any pending deprecation. + Warning raised for any an upcoming change. """ -class Pandas4Warning(PandasChangeWarning, DeprecationWarning): +class PandasPendingDeprecationWarning(PandasChangeWarning, PendingDeprecationWarning): """ - Warning raised for a pending deprecation that will be enforced in pandas 4.0. + Warning raised for an upcoming change that is a PendingDeprecationWarning. """ -class Pandas5Warning(PandasChangeWarning, PendingDeprecationWarning): +class PandasDeprecationWarning(PandasChangeWarning, DeprecationWarning): """ - Warning raised for a pending deprecation that will be enforced in pandas 5.0. + Warning raised for an upcoming change that is a DeprecationWarning. """ -class PandasPendingDeprecationWarning(PandasChangeWarning, PendingDeprecationWarning): +class PandasFutureWarning(PandasChangeWarning, FutureWarning): """ - Warning raised for a pending deprecation that is a PendingDeprecationWarning. + Warning raised for an upcoming change that is a FutureWarning. """ -class PandasDeprecationWarning(PandasChangeWarning, DeprecationWarning): +class Pandas4Warning(PandasDeprecationWarning): """ - Warning raised for a pending deprecation that is a DeprecationWarning. + Warning raised for an upcoming change that will be enforced in pandas 4.0. """ -class PandasFutureWarning(PandasChangeWarning, FutureWarning): +class Pandas5Warning(PandasPendingDeprecationWarning): """ - Warning raised for a pending deprecation that is a FutureWarning. + Warning raised for an upcoming change that will be enforced in pandas 5.0. """ From 4e46d69f0b26d3208d5b61266d651b728bb2bb07 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 21 May 2025 00:00:21 -0400 Subject: [PATCH 03/26] Refinements --- pandas/core/frame.py | 46 ++++++---------------- pandas/core/generic.py | 2 +- pandas/core/series.py | 56 +++++++-------------------- pandas/errors/__init__.py | 60 +++++++++++++++++++++++++++++ pandas/tests/test_errors.py | 25 ++++++++++++ pandas/tests/util/test_deprecate.py | 10 +++-- pandas/util/_decorators.py | 41 +++++++++++--------- 7 files changed, 141 insertions(+), 99 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c146de59da862..b7bd526c37493 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11910,9 +11910,7 @@ def all( **kwargs, ) -> Series | bool: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="all" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="all") @doc(make_doc("all", ndim=1)) def all( self, @@ -11959,9 +11957,7 @@ def min( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="min" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="min") @doc(make_doc("min", ndim=2)) def min( self, @@ -12008,9 +12004,7 @@ def max( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="max" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="max") @doc(make_doc("max", ndim=2)) def max( self, @@ -12026,9 +12020,7 @@ def max( result = result.__finalize__(self, method="max") return result - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="sum" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="sum") def sum( self, axis: Axis | None = 0, @@ -12129,9 +12121,7 @@ def sum( result = result.__finalize__(self, method="sum") return result - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="prod" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="prod") def prod( self, axis: Axis | None = 0, @@ -12249,9 +12239,7 @@ def mean( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="mean" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") @doc(make_doc("mean", ndim=2)) def mean( self, @@ -12299,7 +12287,7 @@ def median( ) -> Series | Any: ... @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="median" + Pandas4Warning, allowed_args=["self"], name="median" ) @doc(make_doc("median", ndim=2)) def median( @@ -12350,9 +12338,7 @@ def sem( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="sem" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="sem") def sem( self, axis: Axis | None = 0, @@ -12472,9 +12458,7 @@ def var( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="var" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="var") def var( self, axis: Axis | None = 0, @@ -12593,9 +12577,7 @@ def std( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="std" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="std") def std( self, axis: Axis | None = 0, @@ -12718,9 +12700,7 @@ def skew( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="skew" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="skew") def skew( self, axis: Axis | None = 0, @@ -12840,9 +12820,7 @@ def kurt( **kwargs, ) -> Series | Any: ... - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="kurt" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="kurt") def kurt( self, axis: Axis | None = 0, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a6b8bd2983ff5..e72fd92236969 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4387,7 +4387,7 @@ def _check_copy_deprecation(copy): ) # issue 58667 - @deprecate_kwarg("method", klass=Pandas4Warning, new_arg_name=None) + @deprecate_kwarg(Pandas4Warning, "method", new_arg_name=None) @final def reindex_like( self, diff --git a/pandas/core/series.py b/pandas/core/series.py index 548b898438c51..f98e8c2563abb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1473,10 +1473,7 @@ def to_string( ) -> None: ... @deprecate_nonkeyword_arguments( - version="4.0", - klass=Pandas4Warning, - allowed_args=["self", "buf"], - name="to_string", + Pandas4Warning, allowed_args=["self", "buf"], name="to_string" ) def to_string( self, @@ -1634,10 +1631,7 @@ def to_markdown( ), ) @deprecate_nonkeyword_arguments( - version="4.0", - klass=Pandas4Warning, - allowed_args=["self", "buf"], - name="to_markdown", + Pandas4Warning, allowed_args=["self", "buf"], name="to_markdown" ) def to_markdown( self, @@ -6650,9 +6644,7 @@ def any( # type: ignore[override] filter_type="bool", ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="all" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="all") @Appender(make_doc("all", ndim=1)) def all( self, @@ -6672,9 +6664,7 @@ def all( filter_type="bool", ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="min" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="min") def min( self, axis: Axis | None = 0, @@ -6745,9 +6735,7 @@ def min( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="max" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="max") def max( self, axis: Axis | None = 0, @@ -6818,9 +6806,7 @@ def max( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="sum" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="sum") def sum( self, axis: Axis | None = None, @@ -6921,9 +6907,7 @@ def sum( **kwargs, ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="prod" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="prod") @doc(make_doc("prod", ndim=1)) def prod( self, @@ -6942,9 +6926,7 @@ def prod( **kwargs, ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="mean" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") def mean( self, axis: Axis | None = 0, @@ -6999,7 +6981,7 @@ def mean( ) @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="median" + Pandas4Warning, allowed_args=["self"], name="median" ) def median( self, @@ -7081,9 +7063,7 @@ def median( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="sem" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="sem") @doc(make_doc("sem", ndim=1)) def sem( self, @@ -7102,9 +7082,7 @@ def sem( **kwargs, ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="var" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="var") def var( self, axis: Axis | None = None, @@ -7191,9 +7169,7 @@ def var( **kwargs, ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="std" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="std") @doc(make_doc("std", ndim=1)) def std( self, @@ -7212,9 +7188,7 @@ def std( **kwargs, ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="skew" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="skew") @doc(make_doc("skew", ndim=1)) def skew( self, @@ -7227,9 +7201,7 @@ def skew( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments( - version="4.0", klass=Pandas4Warning, allowed_args=["self"], name="kurt" - ) + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="kurt") def kurt( self, axis: Axis | None = 0, diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 2ed1cc65d3c68..57be6a07c464d 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -94,36 +94,90 @@ class PerformanceWarning(Warning): class PandasChangeWarning(Warning): """ Warning raised for any an upcoming change. + + See Also + -------- + errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + + Examples + -------- + >>> pd.errors.PandasChangeWarning + """ class PandasPendingDeprecationWarning(PandasChangeWarning, PendingDeprecationWarning): """ Warning raised for an upcoming change that is a PendingDeprecationWarning. + + See Also + -------- + errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + + Examples + -------- + >>> pd.errors.PandasPendingDeprecationWarning + """ class PandasDeprecationWarning(PandasChangeWarning, DeprecationWarning): """ Warning raised for an upcoming change that is a DeprecationWarning. + + See Also + -------- + errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + + Examples + -------- + >>> pd.errors.PandasDeprecationWarning + """ class PandasFutureWarning(PandasChangeWarning, FutureWarning): """ Warning raised for an upcoming change that is a FutureWarning. + + See Also + -------- + errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + + Examples + -------- + >>> pd.errors.PandasFutureWarning + """ class Pandas4Warning(PandasDeprecationWarning): """ Warning raised for an upcoming change that will be enforced in pandas 4.0. + + See Also + -------- + errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + + Examples + -------- + >>> pd.errors.Pandas4Warning + """ class Pandas5Warning(PandasPendingDeprecationWarning): """ Warning raised for an upcoming change that will be enforced in pandas 5.0. + + See Also + -------- + errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + + Examples + -------- + >>> pd.errors.Pandas5Warning + """ @@ -968,6 +1022,12 @@ class InvalidComparison(Exception): "OptionError", "OutOfBoundsDatetime", "OutOfBoundsTimedelta", + "Pandas4Warning", + "Pandas5Warning", + "PandasChangeWarning", + "PandasDeprecationWarning", + "PandasFutureWarning", + "PandasPendingDeprecationWarning", "ParserError", "ParserWarning", "PerformanceWarning", diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index c5c4b234eb129..02b784a187f88 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -1,11 +1,19 @@ +import warnings + import pytest from pandas.errors import ( AbstractMethodError, + Pandas4Warning, + Pandas5Warning, + PandasChangeWarning, + PandasDeprecationWarning, + PandasPendingDeprecationWarning, UndefinedVariableError, ) import pandas as pd +import pandas._testing as tm @pytest.mark.parametrize( @@ -102,3 +110,20 @@ def test_AbstractMethodError_classmethod(): xpr = "This method must be defined in the concrete class Foo" with pytest.raises(AbstractMethodError, match=xpr): Foo().method() + + +@pytest.mark.parametrize( + "warn_category, filter_category", + [ + (Pandas4Warning, PandasChangeWarning), + (Pandas4Warning, PandasDeprecationWarning), + (Pandas5Warning, PandasChangeWarning), + (Pandas5Warning, PandasPendingDeprecationWarning), + ], +) +def test_pandas_warnings_filter(warn_category, filter_category): + # https://github.com/pandas-dev/pandas/pull/61468 + # Ensure users can suppress warnings. + with tm.assert_produces_warning(None), warnings.catch_warnings(): + warnings.filterwarnings(category=filter_category, action="ignore") + warnings.warn("test", category=warn_category) diff --git a/pandas/tests/util/test_deprecate.py b/pandas/tests/util/test_deprecate.py index bdbf2ca2d028d..94c8fe7fd45d1 100644 --- a/pandas/tests/util/test_deprecate.py +++ b/pandas/tests/util/test_deprecate.py @@ -38,7 +38,7 @@ def new_func_with_deprecation(): def test_deprecate_ok(): depr_func = deprecate( - "depr_func", new_func, "1.0", msg="Use new_func instead.", klass=FutureWarning + FutureWarning, "depr_func", new_func, "1.0", msg="Use new_func instead." ) with tm.assert_produces_warning(FutureWarning): @@ -50,11 +50,11 @@ def test_deprecate_ok(): def test_deprecate_no_docstring(): depr_func = deprecate( + FutureWarning, "depr_func", new_func_no_docstring, "1.0", msg="Use new_func instead.", - klass=FutureWarning, ) with tm.assert_produces_warning(FutureWarning): result = depr_func() @@ -65,5 +65,9 @@ def test_deprecate_wrong_docstring(): msg = "deprecate needs a correctly formatted docstring" with pytest.raises(AssertionError, match=msg): deprecate( - "depr_func", new_func_wrong_docstring, "1.0", msg="Use new_func instead." + FutureWarning, + "depr_func", + new_func_wrong_docstring, + "1.0", + msg="Use new_func instead.", ) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 120ad166004c6..803b608d282bf 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -25,10 +25,10 @@ def deprecate( + klass: type[Warning], name: str, alternative: Callable[..., Any], version: str, - klass: type[Warning], alt_name: str | None = None, stacklevel: int = 2, msg: str | None = None, @@ -44,6 +44,8 @@ def deprecate( Parameters ---------- + klass : Warning + The warning class to use. name : str Name of function to deprecate. alternative : func @@ -52,8 +54,6 @@ def deprecate( Version of pandas in which the method has been deprecated. alt_name : str, optional Name to use in preference of alternative.__name__. - klass : Warning, optional - The warning class to use. stacklevel : int, default 2 msg : str The message to display in the warning. @@ -100,8 +100,8 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: def deprecate_kwarg( - old_arg_name: str, klass: type[Warning], + old_arg_name: str, new_arg_name: str | None, mapping: Mapping[Any, Any] | Callable[[Any], Any] | None = None, stacklevel: int = 2, @@ -111,6 +111,8 @@ def deprecate_kwarg( Parameters ---------- + klass : Warning + The warning class to use. old_arg_name : str Name of argument in function to deprecate. new_arg_name : str or None @@ -120,15 +122,13 @@ def deprecate_kwarg( If mapping is present, use it to translate old arguments to new arguments. A callable must do its own value checking; values not found in a dict will be forwarded unchanged. - klass : Warning, optional - The warning class to use. stacklevel : int, default 2 Examples -------- The following deprecates 'cols', using 'columns' instead - >>> @deprecate_kwarg(old_arg_name="cols", new_arg_name="columns") + >>> @deprecate_kwarg(FutureWarning, old_arg_name="cols", new_arg_name="columns") ... def f(columns=""): ... print(columns) >>> f(columns="should work ok") @@ -142,7 +142,7 @@ def deprecate_kwarg( >>> f(cols="should error", columns="can't pass do both") # doctest: +SKIP TypeError: Can only specify 'cols' or 'columns', not both - >>> @deprecate_kwarg("old", "new", {"yes": True, "no": False}) + >>> @deprecate_kwarg(FutureWarning, "old", "new", {"yes": True, "no": False}) ... def f(new=False): ... print("yes!" if new else "no!") >>> f(old="yes") # doctest: +SKIP @@ -152,7 +152,7 @@ def deprecate_kwarg( To raise a warning that a keyword will be removed entirely in the future - >>> @deprecate_kwarg(old_arg_name="cols", new_arg_name=None) + >>> @deprecate_kwarg(FutureWarning, old_arg_name="cols", new_arg_name=None) ... def f(cols="", another_param=""): ... print(cols) >>> f(cols="should raise warning") # doctest: +SKIP @@ -267,7 +267,6 @@ def future_version_msg(version: str | None) -> str: def deprecate_nonkeyword_arguments( - version: str | None, klass: type[Warning], allowed_args: list[str] | None = None, name: str | None = None, @@ -277,26 +276,30 @@ def deprecate_nonkeyword_arguments( Parameters ---------- - version : str, optional - The version in which positional arguments will become - keyword-only. If None, then the warning message won't - specify any particular version. - + klass : Warning, optional + The warning class to use. allowed_args : list, optional In case of list, it must be the list of names of some first arguments of the decorated functions that are OK to be given as positional arguments. In case of None value, defaults to list of all arguments not having the default value. - name : str, optional The specific name of the function to show in the warning message. If None, then the Qualified name of the function is used. - - klass : Warning, optional - The warning class to use. """ + from pandas.errors import ( + Pandas4Warning, + Pandas5Warning, + ) + + if klass is Pandas4Warning: + version = "4.0" + elif klass is Pandas5Warning: + version = "5.0" + else: + raise AssertionError(f"{type(klass)=} must be a versioned warning") def decorate(func): old_sig = inspect.signature(func) From 3f3293f5532ac2973f546c6741203d377139c097 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 21 May 2025 00:22:31 -0400 Subject: [PATCH 04/26] Fixup --- pandas/tests/util/test_deprecate_kwarg.py | 10 ++--- .../test_deprecate_nonkeyword_arguments.py | 41 ++++++++++--------- .../util/test_pandas_deprecation_warning.py | 2 +- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/pandas/tests/util/test_deprecate_kwarg.py b/pandas/tests/util/test_deprecate_kwarg.py index bbd37a379ede1..ae3519638ca03 100644 --- a/pandas/tests/util/test_deprecate_kwarg.py +++ b/pandas/tests/util/test_deprecate_kwarg.py @@ -5,7 +5,7 @@ import pandas._testing as tm -@deprecate_kwarg("old", new_arg_name="new", klass=FutureWarning) +@deprecate_kwarg(FutureWarning, "old", new_arg_name="new") def _f1(new=False): return new @@ -13,7 +13,7 @@ def _f1(new=False): _f2_mappings = {"yes": True, "no": False} -@deprecate_kwarg("old", new_arg_name="new", mapping=_f2_mappings, klass=FutureWarning) +@deprecate_kwarg(FutureWarning, "old", new_arg_name="new", mapping=_f2_mappings) def _f2(new=False): return new @@ -22,7 +22,7 @@ def _f3_mapping(x): return x + 1 -@deprecate_kwarg("old", new_arg_name="new", mapping=_f3_mapping, klass=FutureWarning) +@deprecate_kwarg(FutureWarning, "old", new_arg_name="new", mapping=_f3_mapping) def _f3(new=0): return new @@ -65,12 +65,12 @@ def test_bad_deprecate_kwarg(): with pytest.raises(TypeError, match=msg): - @deprecate_kwarg("old", "new", 0) + @deprecate_kwarg(FutureWarning, "old", "new", 0) def f4(new=None): return new -@deprecate_kwarg("old", new_arg_name=None, klass=FutureWarning) +@deprecate_kwarg(FutureWarning, "old", new_arg_name=None) def _f4(old=True, unchanged=True): return old, unchanged diff --git a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py index ae2bd0b487019..8ca6482096da7 100644 --- a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py +++ b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py @@ -4,13 +4,16 @@ import inspect +from pandas.errors import Pandas4Warning from pandas.util._decorators import deprecate_nonkeyword_arguments import pandas._testing as tm +WARNING_CATEGORY = Pandas4Warning + @deprecate_nonkeyword_arguments( - version="1.1", allowed_args=["a", "b"], name="f_add_inputs", klass=FutureWarning + WARNING_CATEGORY, allowed_args=["a", "b"], name="f_add_inputs" ) def f(a, b=0, c=0, d=0): return a + b + c + d @@ -41,25 +44,25 @@ def test_two_and_two_arguments(): def test_three_arguments(): - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(WARNING_CATEGORY): assert f(6, 3, 3) == 12 def test_four_arguments(): - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(WARNING_CATEGORY): assert f(1, 2, 3, 4) == 10 def test_three_arguments_with_name_in_warning(): msg = ( - "Starting with pandas version 1.1 all arguments of f_add_inputs " + "Starting with pandas version 4.0 all arguments of f_add_inputs " "except for the arguments 'a' and 'b' will be keyword-only." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(WARNING_CATEGORY, match=msg): assert f(6, 3, 3) == 12 -@deprecate_nonkeyword_arguments(version="1.1", klass=FutureWarning) +@deprecate_nonkeyword_arguments(WARNING_CATEGORY) def g(a, b=0, c=0, d=0): with tm.assert_produces_warning(None): return a + b + c + d @@ -75,20 +78,20 @@ def test_one_and_three_arguments_default_allowed_args(): def test_three_arguments_default_allowed_args(): - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(WARNING_CATEGORY): assert g(6, 3, 3) == 12 def test_three_positional_argument_with_warning_message_analysis(): msg = ( - "Starting with pandas version 1.1 all arguments of g " + "Starting with pandas version 4.0 all arguments of g " "except for the argument 'a' will be keyword-only." ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(WARNING_CATEGORY, match=msg): assert g(6, 3, 3) == 12 -@deprecate_nonkeyword_arguments(version="1.1", klass=FutureWarning) +@deprecate_nonkeyword_arguments(WARNING_CATEGORY) def h(a=0, b=0, c=0, d=0): return a + b + c + d @@ -103,17 +106,17 @@ def test_all_keyword_arguments(): def test_one_positional_argument(): - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(WARNING_CATEGORY): assert h(23) == 23 def test_one_positional_argument_with_warning_message_analysis(): - msg = "Starting with pandas version 1.1 all arguments of h will be keyword-only." - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = "Starting with pandas version 4.0 all arguments of h will be keyword-only." + with tm.assert_produces_warning(WARNING_CATEGORY, match=msg): assert h(19) == 19 -@deprecate_nonkeyword_arguments(version="1.1", klass=UserWarning) +@deprecate_nonkeyword_arguments(WARNING_CATEGORY) def i(a=0, /, b=0, *, c=0, d=0): return a + b + c + d @@ -123,14 +126,12 @@ def test_i_signature(): def test_i_warns_klass(): - with tm.assert_produces_warning(UserWarning): + with tm.assert_produces_warning(WARNING_CATEGORY): assert i(1, 2) == 3 class Foo: - @deprecate_nonkeyword_arguments( - version=None, allowed_args=["self", "bar"], klass=FutureWarning - ) + @deprecate_nonkeyword_arguments(WARNING_CATEGORY, allowed_args=["self", "bar"]) def baz(self, bar=None, foobar=None): ... @@ -140,8 +141,8 @@ def test_foo_signature(): def test_class(): msg = ( - r"In a future version of pandas all arguments of Foo\.baz " + r"Starting with pandas version 4.0 all arguments of Foo\.baz " r"except for the argument \'bar\' will be keyword-only" ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(WARNING_CATEGORY, match=msg): Foo().baz("qux", "quox") diff --git a/pandas/tests/util/test_pandas_deprecation_warning.py b/pandas/tests/util/test_pandas_deprecation_warning.py index 55b132f843adc..fe9d04f3ab10d 100644 --- a/pandas/tests/util/test_pandas_deprecation_warning.py +++ b/pandas/tests/util/test_pandas_deprecation_warning.py @@ -15,7 +15,7 @@ def test_function_warns_pandas_deprecation_warning(): f1() -@deprecate_kwarg("old", klass=PandasChangeWarning, new_arg_name="new") +@deprecate_kwarg(PandasChangeWarning, "old", new_arg_name="new") def f2(new=0): return new From 638ca5b46858dbfa8d7718335ad7e2771f850b41 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 2 Jun 2025 06:46:16 -0400 Subject: [PATCH 05/26] Refinements --- doc/source/whatsnew/v3.0.0.rst | 10 +++++-- pandas/errors/__init__.py | 27 +++++++++++++++++-- pandas/tests/api/test_api.py | 3 ++- pandas/tests/groupby/test_groupby_subclass.py | 4 ++- .../test_deprecate_nonkeyword_arguments.py | 4 +-- pandas/util/_decorators.py | 17 +++--------- 6 files changed, 44 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4511d8a7c8252..d3d0e8aee6a29 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -26,9 +26,15 @@ Enhancement2 New Deprecation Policy ^^^^^^^^^^^^^^^^^^^^^^ -pandas 3.0.0 introduces a new 3-stage deprecation policy: using ``DeprecationWarning`` initially, then switching to ``FutureWarning`` for broader visibility in the last minor version before the next major release, and then removal of the deprecated functionality in the major release. +pandas 3.0.0 introduces a new 3-stage deprecation policy: using ``DeprecationWarning`` initially, then switching to ``FutureWarning`` for broader visibility in the last minor version before the next major release, and then removal of the deprecated functionality in the major release. This was done to give downstream packages more time to adjust to pandas deprecations, which should reduce the amount of warnings that a user gets from code that isn't theirs. See `PDEP 17 `_ for more details. -This was done to give downstream packages more time to adjust to pandas deprecations, which should reduce the amount of warnings that a user gets from code that isn't theirs. +All warnings for upcoming changes in pandas will have the base class :class:`pandas.errors.PandasChangeWarning`. Users may also use the following subclasses to control warnings. + +- :class:`pandas.errors.Pandas4Warning`: Warnings which will be enforced in pandas 4.0. +- :class:`pandas.errors.Pandas4Warning`: Warnings which will be enforced in pandas 5.0. +- :class:`pandas.errors.PandasPendingDeprecationWarning`: Warnings which will emit a ``PendingDeprecationWarning``, independent of the version they will be enforced. +- :class:`pandas.errors.PandasDeprecationWarning`: Warnings which will emit a ``DeprecationWarning``, independent of the version they will be enforced. +- :class:`pandas.errors.PandasFutureWarning`: Warnings which will emit a ``PandasFutureWarning``, independent of the version they will be enforced. .. _whatsnew_300.enhancements.other: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 57be6a07c464d..726c90b2653b7 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -4,6 +4,7 @@ from __future__ import annotations +import abc import ctypes from pandas._config.config import OptionError @@ -93,11 +94,12 @@ class PerformanceWarning(Warning): class PandasChangeWarning(Warning): """ - Warning raised for any an upcoming change. + Warning raised for any upcoming change. See Also -------- errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples -------- @@ -105,6 +107,11 @@ class PandasChangeWarning(Warning): """ + @classmethod + @abc.abstractmethod + def version(cls) -> str: + """Version where change will be enforced.""" + class PandasPendingDeprecationWarning(PandasChangeWarning, PendingDeprecationWarning): """ @@ -113,6 +120,7 @@ class PandasPendingDeprecationWarning(PandasChangeWarning, PendingDeprecationWar See Also -------- errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples -------- @@ -128,6 +136,7 @@ class PandasDeprecationWarning(PandasChangeWarning, DeprecationWarning): See Also -------- errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples -------- @@ -157,7 +166,7 @@ class Pandas4Warning(PandasDeprecationWarning): See Also -------- - errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples -------- @@ -165,6 +174,11 @@ class Pandas4Warning(PandasDeprecationWarning): """ + @classmethod + def version(cls) -> str: + """Version where change will be enforced.""" + return "4.0" + class Pandas5Warning(PandasPendingDeprecationWarning): """ @@ -173,6 +187,7 @@ class Pandas5Warning(PandasPendingDeprecationWarning): See Also -------- errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples -------- @@ -180,6 +195,14 @@ class Pandas5Warning(PandasPendingDeprecationWarning): """ + @classmethod + def version(cls) -> str: + """Version where change will be enforced.""" + return "5.0" + + +_CurrentDeprecationWarning = Pandas4Warning + class UnsupportedFunctionCall(ValueError): """ diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 871e977cbe2f8..b811a62846481 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -367,7 +367,8 @@ def test_api_executors(self): class TestErrors(Base): def test_errors(self): - self.check(pd.errors, pd.errors.__all__, ignored=["ctypes", "cow"]) + ignored = ["abstractmethod", "ctypes", "cow"] + self.check(pd.errors, pd.errors.__all__, ignored=ignored) class TestUtil(Base): diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 3ee9c9ea0c7fd..5ffb3bc147fdf 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import ( DataFrame, Index, @@ -36,7 +38,7 @@ def test_groupby_preserves_subclass(obj, groupby_func): args = get_groupby_method_args(groupby_func, obj) - warn = FutureWarning if groupby_func == "corrwith" else None + warn = Pandas4Warning if groupby_func == "corrwith" else None msg = f"{type(grouped).__name__}.corrwith is deprecated" with tm.assert_produces_warning(warn, match=msg): result1 = getattr(grouped, groupby_func)(*args) diff --git a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py index 8ca6482096da7..7039a13a447a4 100644 --- a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py +++ b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py @@ -4,12 +4,12 @@ import inspect -from pandas.errors import Pandas4Warning +from pandas.errors import _CurrentDeprecationWarning from pandas.util._decorators import deprecate_nonkeyword_arguments import pandas._testing as tm -WARNING_CATEGORY = Pandas4Warning +WARNING_CATEGORY = _CurrentDeprecationWarning @deprecate_nonkeyword_arguments( diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 803b608d282bf..981dc0c4197d1 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -23,6 +23,8 @@ Mapping, ) + from pandas.errors import PandasChangeWarning + def deprecate( klass: type[Warning], @@ -267,7 +269,7 @@ def future_version_msg(version: str | None) -> str: def deprecate_nonkeyword_arguments( - klass: type[Warning], + klass: type[PandasChangeWarning], allowed_args: list[str] | None = None, name: str | None = None, ) -> Callable[[F], F]: @@ -289,17 +291,6 @@ def deprecate_nonkeyword_arguments( message. If None, then the Qualified name of the function is used. """ - from pandas.errors import ( - Pandas4Warning, - Pandas5Warning, - ) - - if klass is Pandas4Warning: - version = "4.0" - elif klass is Pandas5Warning: - version = "5.0" - else: - raise AssertionError(f"{type(klass)=} must be a versioned warning") def decorate(func): old_sig = inspect.signature(func) @@ -328,7 +319,7 @@ def decorate(func): num_allow_args = len(allow_args) msg = ( - f"{future_version_msg(version)} all arguments of " + f"{future_version_msg(klass.version())} all arguments of " f"{name or func.__qualname__}{{arguments}} will be keyword-only." ) From e2960b6cb555e06856bede7820f86b7c22d763bd Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 2 Jun 2025 06:57:25 -0400 Subject: [PATCH 06/26] Add implementation details to PDEP-17 --- doc/source/whatsnew/v3.0.0.rst | 6 +++--- ...0017-backwards-compatibility-and-deprecation-policy.md | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d3d0e8aee6a29..83f01220077d9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -32,9 +32,9 @@ All warnings for upcoming changes in pandas will have the base class :class:`pan - :class:`pandas.errors.Pandas4Warning`: Warnings which will be enforced in pandas 4.0. - :class:`pandas.errors.Pandas4Warning`: Warnings which will be enforced in pandas 5.0. -- :class:`pandas.errors.PandasPendingDeprecationWarning`: Warnings which will emit a ``PendingDeprecationWarning``, independent of the version they will be enforced. -- :class:`pandas.errors.PandasDeprecationWarning`: Warnings which will emit a ``DeprecationWarning``, independent of the version they will be enforced. -- :class:`pandas.errors.PandasFutureWarning`: Warnings which will emit a ``PandasFutureWarning``, independent of the version they will be enforced. +- :class:`pandas.errors.PandasPendingDeprecationWarning`: Base class of all warnings which emit a ``PendingDeprecationWarning``, independent of the version they will be enforced. +- :class:`pandas.errors.PandasDeprecationWarning`: Base class of all warnings which emit a ``DeprecationWarning``, independent of the version they will be enforced. +- :class:`pandas.errors.PandasFutureWarning`: Base class of all warnings which emit a ``PandasFutureWarning``, independent of the version they will be enforced. .. _whatsnew_300.enhancements.other: diff --git a/web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md b/web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md index b8eba90f399c9..561d774e21261 100644 --- a/web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md +++ b/web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md @@ -58,8 +58,12 @@ Additionally, when one introduces a deprecation, they should: ### Which warning class to use -Deprecations should initially use ``DeprecationWarning``, and then be switched to ``FutureWarning`` for broader visibility in the last minor release before the major release they are planned to be removed in. -This implementation detail can be ignored by using the appropriate ``PandasDeprecationWarning`` variable, which will be aliased to the proper warning class based on the pandas version. +Starting in pandas 3.0, pandas will provide version-specific warnings. For example, ``Pandas4Warnings`` for all deprecation warnings that will be enforced in pandas 4.0. In addition to these, pandas exposes four additional classes to give users more control over pandas deprecation warnings. + +- :class:`pandas.errors.PandasChangeWarning`: Base class of all pandas deprecation warnings. +- :class:`pandas.errors.PandasPendingDeprecationWarning`: Base class of all warnings which emit a ``PendingDeprecationWarning``, independent of the version they will be enforced. +- :class:`pandas.errors.PandasDeprecationWarning`: Base class of all warnings which emit a ``DeprecationWarning``, independent of the version they will be enforced. +- :class:`pandas.errors.PandasFutureWarning`: Base class of all warnings which emit a ``PandasFutureWarning``, independent of the version they will be enforced. ### Enforcement of deprecations From 92a4e7e34ab387fe39b3424db7cc1754adf522c3 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 2 Jun 2025 17:12:06 -0400 Subject: [PATCH 07/26] API test fixup --- pandas/tests/api/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index b811a62846481..c2e77b69aadcb 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -367,7 +367,7 @@ def test_api_executors(self): class TestErrors(Base): def test_errors(self): - ignored = ["abstractmethod", "ctypes", "cow"] + ignored = ["_CurrentDeprecationWarning", "abc", "ctypes", "cow"] self.check(pd.errors, pd.errors.__all__, ignored=ignored) From 6bca579939e4005226c8ff45788caef50130c52c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 3 Jun 2025 20:42:08 -0400 Subject: [PATCH 08/26] Refinements --- pandas/errors/__init__.py | 30 +++++++++++++++---- pandas/tests/test_errors.py | 15 ++++++++++ .../test_deprecate_nonkeyword_arguments.py | 15 ++++++---- .../util/test_pandas_deprecation_warning.py | 25 ---------------- 4 files changed, 49 insertions(+), 36 deletions(-) delete mode 100644 pandas/tests/util/test_pandas_deprecation_warning.py diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 726c90b2653b7..6097a86498532 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -98,7 +98,10 @@ class PandasChangeWarning(Warning): See Also -------- - errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasPendingDeprecationWarning : Class for deprecations that will raise a + PendingDeprecationWarning. + errors.PandasDeprecationWarning : Class for deprecations that will raise a + DeprecationWarning. errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples @@ -119,7 +122,9 @@ class PandasPendingDeprecationWarning(PandasChangeWarning, PendingDeprecationWar See Also -------- - errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasChangeWarning: Class for deprecations that will raise any warning. + errors.PandasDeprecationWarning : Class for deprecations that will raise a + DeprecationWarning. errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples @@ -135,7 +140,9 @@ class PandasDeprecationWarning(PandasChangeWarning, DeprecationWarning): See Also -------- - errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasChangeWarning: Class for deprecations that will raise any warning. + errors.PandasPendingDeprecationWarning : Class for deprecations that will raise a + PendingDeprecationWarning. errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples @@ -151,7 +158,11 @@ class PandasFutureWarning(PandasChangeWarning, FutureWarning): See Also -------- - errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasChangeWarning: Class for deprecations that will raise any warning. + errors.PandasPendingDeprecationWarning : Class for deprecations that will raise a + PendingDeprecationWarning. + errors.PandasDeprecationWarning : Class for deprecations that will raise a + DeprecationWarning. Examples -------- @@ -166,6 +177,11 @@ class Pandas4Warning(PandasDeprecationWarning): See Also -------- + errors.PandasChangeWarning: Class for deprecations that will raise any warning. + errors.PandasPendingDeprecationWarning : Class for deprecations that will raise a + PendingDeprecationWarning. + errors.PandasDeprecationWarning : Class for deprecations that will raise a + DeprecationWarning. errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples @@ -186,7 +202,11 @@ class Pandas5Warning(PandasPendingDeprecationWarning): See Also -------- - errors.Pandas4Warning : Class for deprecations to be enforced in pandas 4.0. + errors.PandasChangeWarning: Class for deprecations that will raise any warning. + errors.PandasPendingDeprecationWarning : Class for deprecations that will raise a + PendingDeprecationWarning. + errors.PandasDeprecationWarning : Class for deprecations that will raise a + DeprecationWarning. errors.PandasFutureWarning : Class for deprecations that will raise a FutureWarning. Examples diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index 02b784a187f88..a9895e89cbf24 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -112,6 +112,21 @@ def test_AbstractMethodError_classmethod(): Foo().method() +@pytest.mark.parametrize( + "warn_category, catch_category", + [ + (Pandas4Warning, PandasChangeWarning), + (Pandas4Warning, PandasDeprecationWarning), + (Pandas5Warning, PandasChangeWarning), + (Pandas5Warning, PandasPendingDeprecationWarning), + ], +) +def test_pandas_warnings(warn_category, catch_category): + # https://github.com/pandas-dev/pandas/pull/61468 + with tm.assert_produces_warning(catch_category): + warnings.warn("test", category=warn_category) + + @pytest.mark.parametrize( "warn_category, filter_category", [ diff --git a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py index 7039a13a447a4..f9300adffc0d6 100644 --- a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py +++ b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py @@ -55,8 +55,8 @@ def test_four_arguments(): def test_three_arguments_with_name_in_warning(): msg = ( - "Starting with pandas version 4.0 all arguments of f_add_inputs " - "except for the arguments 'a' and 'b' will be keyword-only." + f"Starting with pandas version {WARNING_CATEGORY.version()} all arguments of " + "f_add_inputs except for the arguments 'a' and 'b' will be keyword-only." ) with tm.assert_produces_warning(WARNING_CATEGORY, match=msg): assert f(6, 3, 3) == 12 @@ -84,7 +84,7 @@ def test_three_arguments_default_allowed_args(): def test_three_positional_argument_with_warning_message_analysis(): msg = ( - "Starting with pandas version 4.0 all arguments of g " + f"Starting with pandas version {WARNING_CATEGORY.version()} all arguments of g " "except for the argument 'a' will be keyword-only." ) with tm.assert_produces_warning(WARNING_CATEGORY, match=msg): @@ -111,7 +111,10 @@ def test_one_positional_argument(): def test_one_positional_argument_with_warning_message_analysis(): - msg = "Starting with pandas version 4.0 all arguments of h will be keyword-only." + msg = ( + f"Starting with pandas version {WARNING_CATEGORY.version()} all arguments " + "of h will be keyword-only." + ) with tm.assert_produces_warning(WARNING_CATEGORY, match=msg): assert h(19) == 19 @@ -141,8 +144,8 @@ def test_foo_signature(): def test_class(): msg = ( - r"Starting with pandas version 4.0 all arguments of Foo\.baz " - r"except for the argument \'bar\' will be keyword-only" + rf"Starting with pandas version {WARNING_CATEGORY.version()} all arguments " + r"of Foo\.baz except for the argument \'bar\' will be keyword-only" ) with tm.assert_produces_warning(WARNING_CATEGORY, match=msg): Foo().baz("qux", "quox") diff --git a/pandas/tests/util/test_pandas_deprecation_warning.py b/pandas/tests/util/test_pandas_deprecation_warning.py deleted file mode 100644 index fe9d04f3ab10d..0000000000000 --- a/pandas/tests/util/test_pandas_deprecation_warning.py +++ /dev/null @@ -1,25 +0,0 @@ -import warnings - -from pandas.errors import PandasChangeWarning -from pandas.util._decorators import deprecate_kwarg - -import pandas._testing as tm - - -def f1(): - warnings.warn("f1", PandasChangeWarning) - - -def test_function_warns_pandas_deprecation_warning(): - with tm.assert_produces_warning(PandasChangeWarning): - f1() - - -@deprecate_kwarg(PandasChangeWarning, "old", new_arg_name="new") -def f2(new=0): - return new - - -def test_decorator_warns_pandas_deprecation_warning(): - with tm.assert_produces_warning(PandasChangeWarning): - f2(old=1) From 3b9617db2ac54abbc4f604cfefdd404f17c9720c Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Thu, 26 Jun 2025 16:46:39 -0400 Subject: [PATCH 09/26] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 83f01220077d9..217bb3c468982 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -31,7 +31,7 @@ pandas 3.0.0 introduces a new 3-stage deprecation policy: using ``DeprecationWar All warnings for upcoming changes in pandas will have the base class :class:`pandas.errors.PandasChangeWarning`. Users may also use the following subclasses to control warnings. - :class:`pandas.errors.Pandas4Warning`: Warnings which will be enforced in pandas 4.0. -- :class:`pandas.errors.Pandas4Warning`: Warnings which will be enforced in pandas 5.0. +- :class:`pandas.errors.Pandas5Warning`: Warnings which will be enforced in pandas 5.0. - :class:`pandas.errors.PandasPendingDeprecationWarning`: Base class of all warnings which emit a ``PendingDeprecationWarning``, independent of the version they will be enforced. - :class:`pandas.errors.PandasDeprecationWarning`: Base class of all warnings which emit a ``DeprecationWarning``, independent of the version they will be enforced. - :class:`pandas.errors.PandasFutureWarning`: Base class of all warnings which emit a ``PandasFutureWarning``, independent of the version they will be enforced. From 087dba6b8f2984abb5175cc593d9f32d57505832 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Sat, 19 Jul 2025 08:56:40 -0400 Subject: [PATCH 10/26] Update web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- .../0017-backwards-compatibility-and-deprecation-policy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md b/web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md index 561d774e21261..7bceecab897e6 100644 --- a/web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md +++ b/web/pandas/pdeps/0017-backwards-compatibility-and-deprecation-policy.md @@ -63,7 +63,7 @@ Starting in pandas 3.0, pandas will provide version-specific warnings. For examp - :class:`pandas.errors.PandasChangeWarning`: Base class of all pandas deprecation warnings. - :class:`pandas.errors.PandasPendingDeprecationWarning`: Base class of all warnings which emit a ``PendingDeprecationWarning``, independent of the version they will be enforced. - :class:`pandas.errors.PandasDeprecationWarning`: Base class of all warnings which emit a ``DeprecationWarning``, independent of the version they will be enforced. -- :class:`pandas.errors.PandasFutureWarning`: Base class of all warnings which emit a ``PandasFutureWarning``, independent of the version they will be enforced. +- :class:`pandas.errors.PandasFutureWarning`: Base class of all warnings which emit a ``FutureWarning``, independent of the version they will be enforced. ### Enforcement of deprecations From 25405f29601db593824312cc354bf73847347fb1 Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 19 Jul 2025 09:50:27 -0400 Subject: [PATCH 11/26] Debug docs build --- .github/workflows/docbuild-and-upload.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index ba9e30e088c66..c1eb2a5288bf1 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -57,7 +57,11 @@ jobs: run: python web/pandas_web.py web/pandas --target-path=web/build - name: Build documentation - run: doc/make.py --warnings-are-errors + run: doc/make.py --warnings-are-errors --num-jobs=1 + + - name: Show error log + if: always() + run: cat /tmp/sphinx-err-svc_vpms.log - name: Build the interactive terminal working-directory: web/interactive_terminal From 9df8b32ea9c5a762687c38cc01948cc5cc87d717 Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 19 Jul 2025 10:14:53 -0400 Subject: [PATCH 12/26] Revert --- .github/workflows/docbuild-and-upload.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index c1eb2a5288bf1..ba9e30e088c66 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -57,11 +57,7 @@ jobs: run: python web/pandas_web.py web/pandas --target-path=web/build - name: Build documentation - run: doc/make.py --warnings-are-errors --num-jobs=1 - - - name: Show error log - if: always() - run: cat /tmp/sphinx-err-svc_vpms.log + run: doc/make.py --warnings-are-errors - name: Build the interactive terminal working-directory: web/interactive_terminal From 31b2460acd9b93049fb3a9975b7067e787d40b54 Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 19 Jul 2025 10:29:42 -0400 Subject: [PATCH 13/26] Debug docs build --- .github/workflows/docbuild-and-upload.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index ba9e30e088c66..53e5a14485d98 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -59,6 +59,10 @@ jobs: - name: Build documentation run: doc/make.py --warnings-are-errors + - name: Show error log + if: always() + run: cat /tmp/sphinx-err-svc_vpms.log + - name: Build the interactive terminal working-directory: web/interactive_terminal run: jupyter lite build From 8e85f5c3f23133dec8e09895e6ddb19327a349a3 Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 19 Jul 2025 11:13:45 -0400 Subject: [PATCH 14/26] Debug docs build --- .github/workflows/docbuild-and-upload.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index 53e5a14485d98..9186fc7b4e312 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -57,11 +57,7 @@ jobs: run: python web/pandas_web.py web/pandas --target-path=web/build - name: Build documentation - run: doc/make.py --warnings-are-errors - - - name: Show error log - if: always() - run: cat /tmp/sphinx-err-svc_vpms.log + run: doc/make.py --warnings-are-errors --num-jobs=1 - name: Build the interactive terminal working-directory: web/interactive_terminal From ce085826288959a858f0437941730602fa2535f0 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 21 Jul 2025 17:22:54 -0400 Subject: [PATCH 15/26] Fixup & revert --- .github/workflows/docbuild-and-upload.yml | 2 +- pandas/util/_decorators.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index 9186fc7b4e312..ba9e30e088c66 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -57,7 +57,7 @@ jobs: run: python web/pandas_web.py web/pandas --target-path=web/build - name: Build documentation - run: doc/make.py --warnings-are-errors --num-jobs=1 + run: doc/make.py --warnings-are-errors - name: Build the interactive terminal working-directory: web/interactive_terminal diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 981dc0c4197d1..6a73615534b22 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -278,7 +278,7 @@ def deprecate_nonkeyword_arguments( Parameters ---------- - klass : Warning, optional + klass : Warning The warning class to use. allowed_args : list, optional In case of list, it must be the list of names of some From 1fcdd4ad0b03220af6fb05ca050131d12dfacb72 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 21 Jul 2025 17:52:28 -0400 Subject: [PATCH 16/26] Try removing file --- doc/source/whatsnew/index.rst | 1 - doc/source/whatsnew/v0.23.0.rst | 1535 ------------------------------- 2 files changed, 1536 deletions(-) delete mode 100644 doc/source/whatsnew/v0.23.0.rst diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index 9da73c8fd76d4..a28d2e888e982 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -167,7 +167,6 @@ Version 0.23 v0.23.3 v0.23.2 v0.23.1 - v0.23.0 Version 0.22 ------------ diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst deleted file mode 100644 index 7f7609edc27b6..0000000000000 --- a/doc/source/whatsnew/v0.23.0.rst +++ /dev/null @@ -1,1535 +0,0 @@ -.. _whatsnew_0230: - -What's new in 0.23.0 (May 15, 2018) ------------------------------------ - -{{ header }} - -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - - -This is a major release from 0.22.0 and includes a number of API changes, -deprecations, new features, enhancements, and performance improvements along -with a large number of bug fixes. We recommend that all users upgrade to this -version. - -Highlights include: - -- :ref:`Round-trippable JSON format with 'table' orient `. -- :ref:`Instantiation from dicts respects order for Python 3.6+ `. -- :ref:`Dependent column arguments for assign `. -- :ref:`Merging / sorting on a combination of columns and index levels `. -- :ref:`Extending pandas with custom types `. -- :ref:`Excluding unobserved categories from groupby `. -- :ref:`Changes to make output shape of DataFrame.apply consistent `. - -Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. - -.. warning:: - - Starting January 1, 2019, pandas feature releases will support Python 3 only. - See `Dropping Python 2.7 `_ for more. - -.. contents:: What's new in v0.23.0 - :local: - :backlinks: none - :depth: 2 - -.. _whatsnew_0230.enhancements: - -New features -~~~~~~~~~~~~ - -.. _whatsnew_0230.enhancements.round-trippable_json: - -JSON read/write round-trippable with ``orient='table'`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata. - -.. code-block:: ipython - - In [1]: df = pd.DataFrame({'foo': [1, 2, 3, 4], - ...: 'bar': ['a', 'b', 'c', 'd'], - ...: 'baz': pd.date_range('2018-01-01', freq='d', periods=4), - ...: 'qux': pd.Categorical(['a', 'b', 'c', 'c'])}, - ...: index=pd.Index(range(4), name='idx')) - - In [2]: df - Out[2]: - foo bar baz qux - idx - 0 1 a 2018-01-01 a - 1 2 b 2018-01-02 b - 2 3 c 2018-01-03 c - 3 4 d 2018-01-04 c - - [4 rows x 4 columns] - - In [3]: df.dtypes - Out[3]: - foo int64 - bar object - baz datetime64[ns] - qux category - Length: 4, dtype: object - - In [4]: df.to_json('test.json', orient='table') - - In [5]: new_df = pd.read_json('test.json', orient='table') - - In [6]: new_df - Out[6]: - foo bar baz qux - idx - 0 1 a 2018-01-01 a - 1 2 b 2018-01-02 b - 2 3 c 2018-01-03 c - 3 4 d 2018-01-04 c - - [4 rows x 4 columns] - - In [7]: new_df.dtypes - Out[7]: - foo int64 - bar object - baz datetime64[ns] - qux category - Length: 4, dtype: object - -Please note that the string ``index`` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. - -.. ipython:: python - :okwarning: - - df.index.name = 'index' - - df.to_json('test.json', orient='table') - new_df = pd.read_json('test.json', orient='table') - new_df - new_df.dtypes - -.. ipython:: python - :suppress: - - import os - os.remove('test.json') - - -.. _whatsnew_0230.enhancements.assign_dependent: - - -Method ``.assign()`` accepts dependent arguments -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The :func:`DataFrame.assign` now accepts dependent keyword arguments for python version later than 3.6 (see also `PEP 468 -`_). Later keyword arguments may now refer to earlier ones if the argument is a callable. See the -:ref:`documentation here ` (:issue:`14207`) - -.. ipython:: python - - df = pd.DataFrame({'A': [1, 2, 3]}) - df - df.assign(B=df.A, C=lambda x: x['A'] + x['B']) - -.. warning:: - - This may subtly change the behavior of your code when you're - using ``.assign()`` to update an existing column. Previously, callables - referring to other variables being updated would get the "old" values - - Previous behavior: - - .. code-block:: ipython - - In [2]: df = pd.DataFrame({"A": [1, 2, 3]}) - - In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1) - Out[3]: - A C - 0 2 -1 - 1 3 -2 - 2 4 -3 - - New behavior: - - .. ipython:: python - - df.assign(A=df.A + 1, C=lambda df: df.A * -1) - - - -.. _whatsnew_0230.enhancements.merge_on_columns_and_levels: - -Merging on a combination of columns and index levels -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Strings passed to :meth:`DataFrame.merge` as the ``on``, ``left_on``, and ``right_on`` -parameters may now refer to either column names or index level names. -This enables merging ``DataFrame`` instances on a combination of index levels -and columns without resetting indexes. See the :ref:`Merge on columns and -levels ` documentation section. -(:issue:`14355`) - -.. ipython:: python - - left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') - - left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], - 'B': ['B0', 'B1', 'B2', 'B3'], - 'key2': ['K0', 'K1', 'K0', 'K1']}, - index=left_index) - - right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') - - right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], - 'D': ['D0', 'D1', 'D2', 'D3'], - 'key2': ['K0', 'K0', 'K0', 'K1']}, - index=right_index) - - left.merge(right, on=['key1', 'key2']) - -.. _whatsnew_0230.enhancements.sort_by_columns_and_levels: - -Sorting by a combination of columns and index levels -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Strings passed to :meth:`DataFrame.sort_values` as the ``by`` parameter may -now refer to either column names or index level names. This enables sorting -``DataFrame`` instances by a combination of index levels and columns without -resetting indexes. See the :ref:`Sorting by Indexes and Values -` documentation section. -(:issue:`14353`) - -.. ipython:: python - - # Build MultiIndex - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), - ('b', 2), ('b', 1), ('b', 1)]) - idx.names = ['first', 'second'] - - # Build DataFrame - df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, - index=idx) - df_multi - - # Sort by 'second' (index) and 'A' (column) - df_multi.sort_values(by=['second', 'A']) - - -.. _whatsnew_023.enhancements.extension: - -Extending pandas with custom types (experimental) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -pandas now supports storing array-like objects that aren't necessarily 1-D NumPy -arrays as columns in a DataFrame or values in a Series. This allows third-party -libraries to implement extensions to NumPy's types, similar to how pandas -implemented categoricals, datetimes with timezones, periods, and intervals. - -As a demonstration, we'll use cyberpandas_, which provides an ``IPArray`` type -for storing ip addresses. - -.. code-block:: ipython - - In [1]: from cyberpandas import IPArray - - In [2]: values = IPArray([ - ...: 0, - ...: 3232235777, - ...: 42540766452641154071740215577757643572 - ...: ]) - ...: - ...: - -``IPArray`` isn't a normal 1-D NumPy array, but because it's a pandas -:class:`~pandas.api.extensions.ExtensionArray`, it can be stored properly inside pandas' containers. - -.. code-block:: ipython - - In [3]: ser = pd.Series(values) - - In [4]: ser - Out[4]: - 0 0.0.0.0 - 1 192.168.1.1 - 2 2001:db8:85a3::8a2e:370:7334 - dtype: ip - -Notice that the dtype is ``ip``. The missing value semantics of the underlying -array are respected: - -.. code-block:: ipython - - In [5]: ser.isna() - Out[5]: - 0 True - 1 False - 2 False - dtype: bool - -For more, see the :ref:`extension types ` -documentation. If you build an extension array, publicize it on `the ecosystem page `_. - -.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest/ - - -.. _whatsnew_0230.enhancements.categorical_grouping: - -New ``observed`` keyword for excluding unobserved categories in ``GroupBy`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Grouping by a categorical includes the unobserved categories in the output. -When grouping by multiple categorical columns, this means you get the cartesian product of all the -categories, including combinations where there are no observations, which can result in a large -number of groups. We have added a keyword ``observed`` to control this behavior, it defaults to -``observed=False`` for backward-compatibility. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`, :issue:`20902`) - -.. ipython:: python - - cat1 = pd.Categorical(["a", "a", "b", "b"], - categories=["a", "b", "z"], ordered=True) - cat2 = pd.Categorical(["c", "d", "c", "d"], - categories=["c", "d", "y"], ordered=True) - df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) - df['C'] = ['foo', 'bar'] * 2 - df - -To show all values, the previous behavior: - -.. ipython:: python - - df.groupby(['A', 'B', 'C'], observed=False).count() - - -To show only observed values: - -.. ipython:: python - - df.groupby(['A', 'B', 'C'], observed=True).count() - -For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: - -.. ipython:: python - - cat1 = pd.Categorical(["a", "a", "b", "b"], - categories=["a", "b", "z"], ordered=True) - cat2 = pd.Categorical(["c", "d", "c", "d"], - categories=["c", "d", "y"], ordered=True) - df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) - df - - -.. code-block:: ipython - - In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True) - - Out[1]: - values - A B - a c 1.0 - d 2.0 - b c 3.0 - d 4.0 - - In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False) - - Out[2]: - values - A B - a c 1.0 - d 2.0 - y NaN - b c 3.0 - d 4.0 - y NaN - z c NaN - d NaN - y NaN - - -.. _whatsnew_0230.enhancements.window_raw: - -Rolling/Expanding.apply() accepts ``raw=False`` to pass a ``Series`` to the function -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:func:`Series.rolling().apply() <.Rolling.apply>`, :func:`DataFrame.rolling().apply() <.Rolling.apply>`, -:func:`Series.expanding().apply() <.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <.Expanding.apply>` have gained a ``raw=None`` parameter. -This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The -default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``. -In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`) - -.. ipython:: python - - s = pd.Series(np.arange(5), np.arange(5) + 1) - s - -Pass a ``Series``: - -.. ipython:: python - - s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False) - -Mimic the original behavior of passing a ndarray: - -.. ipython:: python - - s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True) - - -.. _whatsnew_0210.enhancements.limit_area: - -``DataFrame.interpolate`` has gained the ``limit_area`` kwarg -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:meth:`DataFrame.interpolate` has gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. -Use ``limit_area='inside'`` to fill only NaNs surrounded by valid values or use ``limit_area='outside'`` to fill only ``NaN`` s -outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. - - -.. ipython:: python - - ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, - np.nan, 13, np.nan, np.nan]) - ser - -Fill one consecutive inside value in both directions - -.. ipython:: python - - ser.interpolate(limit_direction='both', limit_area='inside', limit=1) - -Fill all consecutive outside values backward - -.. ipython:: python - - ser.interpolate(limit_direction='backward', limit_area='outside') - -Fill all consecutive outside values in both directions - -.. ipython:: python - - ser.interpolate(limit_direction='both', limit_area='outside') - -.. _whatsnew_0210.enhancements.get_dummies_dtype: - -Function ``get_dummies`` now supports ``dtype`` argument -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtype for the new columns. The default remains uint8. (:issue:`18330`) - -.. ipython:: python - - df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) - pd.get_dummies(df, columns=['c']).dtypes - pd.get_dummies(df, columns=['c'], dtype=bool).dtypes - - -.. _whatsnew_0230.enhancements.timedelta_mod: - -Timedelta mod method -^^^^^^^^^^^^^^^^^^^^ - -``mod`` (%) and ``divmod`` operations are now defined on ``Timedelta`` objects -when operating with either timedelta-like or with numeric arguments. -See the :ref:`documentation here `. (:issue:`19365`) - -.. ipython:: python - - td = pd.Timedelta(hours=37) - td % pd.Timedelta(minutes=45) - -.. _whatsnew_0230.enhancements.ran_inf: - -Method ``.rank()`` handles ``inf`` values when ``NaN`` are present -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In previous versions, ``.rank()`` would assign ``inf`` elements ``NaN`` as their ranks. Now ranks are calculated properly. (:issue:`6945`) - -.. ipython:: python - - s = pd.Series([-np.inf, 0, 1, np.nan, np.inf]) - s - -Previous behavior: - -.. code-block:: ipython - - In [11]: s.rank() - Out[11]: - 0 1.0 - 1 2.0 - 2 3.0 - 3 NaN - 4 NaN - dtype: float64 - -Current behavior: - -.. ipython:: python - - s.rank() - -Furthermore, previously if you rank ``inf`` or ``-inf`` values together with ``NaN`` values, the calculation won't distinguish ``NaN`` from infinity when using 'top' or 'bottom' argument. - -.. ipython:: python - - s = pd.Series([np.nan, np.nan, -np.inf, -np.inf]) - s - -Previous behavior: - -.. code-block:: ipython - - In [15]: s.rank(na_option='top') - Out[15]: - 0 2.5 - 1 2.5 - 2 2.5 - 3 2.5 - dtype: float64 - -Current behavior: - -.. ipython:: python - - s.rank(na_option='top') - -These bugs were squashed: - -- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`) -- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) -- Bug in :func:`DataFrameGroupBy.rank` where ranks were incorrect when both infinity and ``NaN`` were present (:issue:`20561`) - - -.. _whatsnew_0230.enhancements.str_cat_align: - -``Series.str.cat`` has gained the ``join`` kwarg -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Previously, :meth:`Series.str.cat` did not -- in contrast to most of ``pandas`` -- align :class:`Series` on their index before concatenation (see :issue:`18657`). -The method has now gained a keyword ``join`` to control the manner of alignment, see examples below and :ref:`here `. - -In v.0.23 ``join`` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. - -.. ipython:: python - :okwarning: - - s = pd.Series(['a', 'b', 'c', 'd']) - t = pd.Series(['b', 'd', 'e', 'c'], index=[1, 3, 4, 2]) - s.str.cat(t) - s.str.cat(t, join='left', na_rep='-') - -Furthermore, :meth:`Series.str.cat` now works for ``CategoricalIndex`` as well (previously raised a ``ValueError``; see :issue:`20842`). - -.. _whatsnew_0230.enhancements.astype_category: - -``DataFrame.astype`` performs column-wise conversion to ``Categorical`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:meth:`DataFrame.astype` can now perform column-wise conversion to ``Categorical`` by supplying the string ``'category'`` or -a :class:`~pandas.api.types.CategoricalDtype`. Previously, attempting this would raise a ``NotImplementedError``. See the -:ref:`categorical.objectcreation` section of the documentation for more details and examples. (:issue:`12860`, :issue:`18099`) - -Supplying the string ``'category'`` performs column-wise conversion, with only labels appearing in a given column set as categories: - -.. ipython:: python - - df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) - df = df.astype('category') - df['A'].dtype - df['B'].dtype - - -Supplying a ``CategoricalDtype`` will make the categories in each column consistent with the supplied dtype: - -.. ipython:: python - - from pandas.api.types import CategoricalDtype - df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) - cdt = CategoricalDtype(categories=list('abcd'), ordered=True) - df = df.astype(cdt) - df['A'].dtype - df['B'].dtype - - -.. _whatsnew_0230.enhancements.other: - -Other enhancements -^^^^^^^^^^^^^^^^^^ - -- Unary ``+`` now permitted for ``Series`` and ``DataFrame`` as numeric operator (:issue:`16073`) -- Better support for :meth:`~pandas.io.formats.style.Styler.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`) -- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`) -- :func:`MultiIndex.unique` now supports the ``level=`` argument, to get unique values from a specific index level (:issue:`17896`) -- :class:`pandas.io.formats.style.Styler` now has method ``hide_index()`` to determine whether the index will be rendered in output (:issue:`14194`) -- :class:`pandas.io.formats.style.Styler` now has method ``hide_columns()`` to determine whether columns will be hidden in output (:issue:`14194`) -- Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`) -- :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`) -- :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`) -- Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) -- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). -- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) -- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) -- :func:`read_excel` has gained the ``nrows`` parameter (:issue:`16645`) -- :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`) -- :meth:`DataFrame.to_json` and :meth:`Series.to_json` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) -- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) -- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` - can now take a callable as their argument (:issue:`18862`) -- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`) -- ``Resampler`` objects now have a functioning :attr:`.Resampler.pipe` method. - Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). -- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). -- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`). -- Added :func:`pandas.api.extensions.register_dataframe_accessor`, - :func:`pandas.api.extensions.register_series_accessor`, and - :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas - to register custom accessors like ``.cat`` on pandas objects. See - :ref:`Registering Custom Accessors ` for more (:issue:`14781`). - -- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) -- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) -- Added :func:`.SeriesGroupBy.is_monotonic_increasing` and :func:`.SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) -- For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) -- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) -- Added option ``display.html.use_mathjax`` so `MathJax `_ can be disabled when rendering tables in ``Jupyter`` notebooks (:issue:`19856`, :issue:`19824`) -- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) -- :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) -- :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) -- :meth:`DataFrame.to_sql` now performs a multi-value insert if the underlying connection supports itk rather than inserting row by row. - ``SQLAlchemy`` dialects supporting multi-value inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) -- :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) -- :func:`read_html` now reads all ```` elements in a ````, not just the first. (:issue:`20690`) -- :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile` now accept the ``interpolation`` keyword, ``linear`` by default (:issue:`20497`) -- zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`) -- :class:`~pandas.tseries.offsets.WeekOfMonth` constructor now supports ``n=0`` (:issue:`20517`). -- :class:`DataFrame` and :class:`Series` now support matrix multiplication (``@``) operator (:issue:`10259`) for Python>=3.5 -- Updated :meth:`DataFrame.to_gbq` and :meth:`pandas.read_gbq` signature and documentation to reflect changes from - the pandas-gbq library version 0.4.0. Adds intersphinx mapping to pandas-gbq - library. (:issue:`20564`) -- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`) -- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`) -- :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`) -- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`) - -.. _whatsnew_0230.api_breaking: - -Backwards incompatible API changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. _whatsnew_0230.api_breaking.deps: - -Dependencies have increased minimum versions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We have updated our minimum supported versions of dependencies (:issue:`15184`). -If installed, we now require: - -+-----------------+-----------------+----------+---------------+ -| Package | Minimum Version | Required | Issue | -+=================+=================+==========+===============+ -| python-dateutil | 2.5.0 | X | :issue:`15184`| -+-----------------+-----------------+----------+---------------+ -| openpyxl | 2.4.0 | | :issue:`15184`| -+-----------------+-----------------+----------+---------------+ -| beautifulsoup4 | 4.2.1 | | :issue:`20082`| -+-----------------+-----------------+----------+---------------+ -| setuptools | 24.2.0 | | :issue:`20698`| -+-----------------+-----------------+----------+---------------+ - -.. _whatsnew_0230.api_breaking.dict_insertion_order: - -Instantiation from dicts preserves dict insertion order for Python 3.6+ -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Until Python 3.6, dicts in Python had no formally defined ordering. For Python -version 3.6 and later, dicts are ordered by insertion order, see -`PEP 468 `_. -pandas will use the dict's insertion order, when creating a ``Series`` or -``DataFrame`` from a dict and you're using Python version 3.6 or -higher. (:issue:`19884`) - -Previous behavior (and current behavior if on Python < 3.6): - -.. code-block:: ipython - - In [16]: pd.Series({'Income': 2000, - ....: 'Expenses': -1500, - ....: 'Taxes': -200, - ....: 'Net result': 300}) - Out[16]: - Expenses -1500 - Income 2000 - Net result 300 - Taxes -200 - dtype: int64 - -Note the Series above is ordered alphabetically by the index values. - -New behavior (for Python >= 3.6): - -.. ipython:: python - - pd.Series({'Income': 2000, - 'Expenses': -1500, - 'Taxes': -200, - 'Net result': 300}) - -Notice that the Series is now ordered by insertion order. This new behavior is -used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries`` -and ``SparseDataFrame``). - -If you wish to retain the old behavior while using Python >= 3.6, you can use -``.sort_index()``: - -.. ipython:: python - - pd.Series({'Income': 2000, - 'Expenses': -1500, - 'Taxes': -200, - 'Net result': 300}).sort_index() - -.. _whatsnew_0230.api_breaking.deprecate_panel: - -Deprecate Panel -^^^^^^^^^^^^^^^ - -``Panel`` was deprecated in the 0.20.x release, showing as a ``DeprecationWarning``. Using ``Panel`` will now show a ``FutureWarning``. The recommended way to represent 3-D data are -with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. pandas -provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`13563`, :issue:`18324`). - -.. code-block:: ipython - - In [75]: import pandas._testing as tm - - In [76]: p = tm.makePanel() - - In [77]: p - Out[77]: - - Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) - Items axis: ItemA to ItemC - Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 - Minor_axis axis: A to D - -Convert to a MultiIndex DataFrame - -.. code-block:: ipython - - In [78]: p.to_frame() - Out[78]: - ItemA ItemB ItemC - major minor - 2000-01-03 A 0.469112 0.721555 0.404705 - B -1.135632 0.271860 -1.039268 - C 0.119209 0.276232 -1.344312 - D -2.104569 0.113648 -0.109050 - 2000-01-04 A -0.282863 -0.706771 0.577046 - B 1.212112 -0.424972 -0.370647 - C -1.044236 -1.087401 0.844885 - D -0.494929 -1.478427 1.643563 - 2000-01-05 A -1.509059 -1.039575 -1.715002 - B -0.173215 0.567020 -1.157892 - C -0.861849 -0.673690 1.075770 - D 1.071804 0.524988 -1.469388 - - [12 rows x 3 columns] - -Convert to an xarray DataArray - -.. code-block:: ipython - - In [79]: p.to_xarray() - Out[79]: - - array([[[ 0.469112, -1.135632, 0.119209, -2.104569], - [-0.282863, 1.212112, -1.044236, -0.494929], - [-1.509059, -0.173215, -0.861849, 1.071804]], - - [[ 0.721555, 0.27186 , 0.276232, 0.113648], - [-0.706771, -0.424972, -1.087401, -1.478427], - [-1.039575, 0.56702 , -0.67369 , 0.524988]], - - [[ 0.404705, -1.039268, -1.344312, -0.10905 ], - [ 0.577046, -0.370647, 0.844885, 1.643563], - [-1.715002, -1.157892, 1.07577 , -1.469388]]]) - Coordinates: - * items (items) object 'ItemA' 'ItemB' 'ItemC' - * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 - * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' - - -.. _whatsnew_0230.api_breaking.core_common: - -pandas.core.common removals -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The following error & warning messages are removed from ``pandas.core.common`` (:issue:`13634`, :issue:`19769`): - -- ``PerformanceWarning`` -- ``UnsupportedFunctionCall`` -- ``UnsortedIndexError`` -- ``AbstractMethodError`` - -These are available from import from ``pandas.errors`` (since 0.19.0). - - -.. _whatsnew_0230.api_breaking.apply: - -Changes to make output of ``DataFrame.apply`` consistent -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies -are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case -where a list-like (e.g. ``tuple`` or ``list`` is returned) (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, -:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`). - -.. ipython:: python - - df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, - columns=['A', 'B', 'C']) - df - -Previous behavior: if the returned shape happened to match the length of original columns, this would return a ``DataFrame``. -If the return shape did not match, a ``Series`` with lists was returned. - -.. code-block:: python - - In [3]: df.apply(lambda x: [1, 2, 3], axis=1) - Out[3]: - A B C - 0 1 2 3 - 1 1 2 3 - 2 1 2 3 - 3 1 2 3 - 4 1 2 3 - 5 1 2 3 - - In [4]: df.apply(lambda x: [1, 2], axis=1) - Out[4]: - 0 [1, 2] - 1 [1, 2] - 2 [1, 2] - 3 [1, 2] - 4 [1, 2] - 5 [1, 2] - dtype: object - - -New behavior: When the applied function returns a list-like, this will now *always* return a ``Series``. - -.. ipython:: python - - df.apply(lambda x: [1, 2, 3], axis=1) - df.apply(lambda x: [1, 2], axis=1) - -To have expanded columns, you can use ``result_type='expand'`` - -.. ipython:: python - - df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') - -To broadcast the result across the original columns (the old behaviour for -list-likes of the correct length), you can use ``result_type='broadcast'``. -The shape must match the original columns. - -.. ipython:: python - - df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') - -Returning a ``Series`` allows one to control the exact return structure and column names: - -.. ipython:: python - - df.apply(lambda x: pd.Series([1, 2, 3], index=['D', 'E', 'F']), axis=1) - -.. _whatsnew_0230.api_breaking.concat: - -Concatenation will no longer sort -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned. -The current behavior is the same as the previous (sorting), but now a warning is issued when ``sort`` is not specified and the non-concatenation axis is not aligned (:issue:`4588`). - -.. ipython:: python - :okwarning: - - df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a']) - df2 = pd.DataFrame({"a": [4, 5]}) - - pd.concat([df1, df2]) - -To keep the previous behavior (sorting) and silence the warning, pass ``sort=True`` - -.. ipython:: python - - pd.concat([df1, df2], sort=True) - -To accept the future behavior (no sorting), pass ``sort=False`` - -.. ipython - - pd.concat([df1, df2], sort=False) - -Note that this change also applies to :meth:`DataFrame.append`, which has also received a ``sort`` keyword for controlling this behavior. - - -.. _whatsnew_0230.api_breaking.build_changes: - -Build changes -^^^^^^^^^^^^^ - -- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) -- Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) -- Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) - -.. _whatsnew_0230.api_breaking.index_division_by_zero: - -Index division by zero fills correctly -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and ``0 / 0`` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) - -Previous behavior: - -.. code-block:: ipython - - In [6]: index = pd.Int64Index([-1, 0, 1]) - - In [7]: index / 0 - Out[7]: Int64Index([0, 0, 0], dtype='int64') - - # Previous behavior yielded different results depending on the type of zero in the divisor - In [8]: index / 0.0 - Out[8]: Float64Index([-inf, nan, inf], dtype='float64') - - In [9]: index = pd.UInt64Index([0, 1]) - - In [10]: index / np.array([0, 0], dtype=np.uint64) - Out[10]: UInt64Index([0, 0], dtype='uint64') - - In [11]: pd.RangeIndex(1, 5) / 0 - ZeroDivisionError: integer division or modulo by zero - -Current behavior: - -.. code-block:: ipython - - In [12]: index = pd.Int64Index([-1, 0, 1]) - # division by zero gives -infinity where negative, - # +infinity where positive, and NaN for 0 / 0 - In [13]: index / 0 - - # The result of division by zero should not depend on - # whether the zero is int or float - In [14]: index / 0.0 - - In [15]: index = pd.UInt64Index([0, 1]) - In [16]: index / np.array([0, 0], dtype=np.uint64) - - In [17]: pd.RangeIndex(1, 5) / 0 - -.. _whatsnew_0230.api_breaking.extract: - -Extraction of matching patterns from strings -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -By default, extracting matching patterns from strings with :func:`str.extract` used to return a -``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was -extracted). As of pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless -``expand`` is set to ``False``. Finally, ``None`` was an accepted value for -the ``expand`` parameter (which was equivalent to ``False``), but now raises a ``ValueError``. (:issue:`11386`) - -Previous behavior: - -.. code-block:: ipython - - In [1]: s = pd.Series(['number 10', '12 eggs']) - - In [2]: extracted = s.str.extract(r'.*(\d\d).*') - - In [3]: extracted - Out [3]: - 0 10 - 1 12 - dtype: object - - In [4]: type(extracted) - Out [4]: - pandas.core.series.Series - -New behavior: - -.. ipython:: python - - s = pd.Series(['number 10', '12 eggs']) - extracted = s.str.extract(r'.*(\d\d).*') - extracted - type(extracted) - -To restore previous behavior, simply set ``expand`` to ``False``: - -.. ipython:: python - - s = pd.Series(['number 10', '12 eggs']) - extracted = s.str.extract(r'.*(\d\d).*', expand=False) - extracted - type(extracted) - -.. _whatsnew_0230.api_breaking.cdt_ordered: - -Default value for the ``ordered`` parameter of ``CategoricalDtype`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The default value of the ``ordered`` parameter for :class:`~pandas.api.types.CategoricalDtype` has changed from ``False`` to ``None`` to allow updating of ``categories`` without impacting ``ordered``. Behavior should remain consistent for downstream objects, such as :class:`Categorical` (:issue:`18790`) - -In previous versions, the default value for the ``ordered`` parameter was ``False``. This could potentially lead to the ``ordered`` parameter unintentionally being changed from ``True`` to ``False`` when users attempt to update ``categories`` if ``ordered`` is not explicitly specified, as it would silently default to ``False``. The new behavior for ``ordered=None`` is to retain the existing value of ``ordered``. - -New behavior: - -.. code-block:: ipython - - In [2]: from pandas.api.types import CategoricalDtype - - In [3]: cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) - - In [4]: cat - Out[4]: - [a, b, c, a, b, a] - Categories (3, object): [c < b < a] - - In [5]: cdt = CategoricalDtype(categories=list('cbad')) - - In [6]: cat.astype(cdt) - Out[6]: - [a, b, c, a, b, a] - Categories (4, object): [c < b < a < d] - -Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. - -Note that the unintentional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. - -.. _whatsnew_0230.api_breaking.pretty_printing: - -Better pretty-printing of DataFrames in a terminal -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, the default value for the maximum number of columns was -``pd.options.display.max_columns=20``. This meant that relatively wide data -frames would not fit within the terminal width, and pandas would introduce line -breaks to display these 20 columns. This resulted in an output that was -relatively difficult to read: - -.. image:: ../_static/print_df_old.png - -If Python runs in a terminal, the maximum number of columns is now determined -automatically so that the printed data frame fits within the current terminal -width (``pd.options.display.max_columns=0``) (:issue:`17023`). If Python runs -as a Jupyter kernel (such as the Jupyter QtConsole or a Jupyter notebook, as -well as in many IDEs), this value cannot be inferred automatically and is thus -set to ``20`` as in previous versions. In a terminal, this results in a much -nicer output: - -.. image:: ../_static/print_df_new.png - -Note that if you don't like the new default, you can always set this option -yourself. To revert to the old setting, you can run this line: - -.. code-block:: python - - pd.options.display.max_columns = 20 - -.. _whatsnew_0230.api.datetimelike: - -Datetimelike API changes -^^^^^^^^^^^^^^^^^^^^^^^^ - -- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) -- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) -- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) -- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) -- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError`` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) -- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) -- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) -- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mismatched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) -- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) -- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) -- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) -- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) -- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) -- :attr:`Series.first` and :attr:`DataFrame.first` will now raise a ``TypeError`` - rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). -- :attr:`Series.last` and :attr:`DataFrame.last` will now raise a ``TypeError`` - rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). -- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). -- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) -- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) -- :class:`Timestamp` constructor now accepts a ``nanosecond`` keyword or positional argument (:issue:`18898`) -- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) -- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) - -.. _whatsnew_0230.api.other: - -Other API changes -^^^^^^^^^^^^^^^^^ - -- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`) -- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`) -- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) -- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). -- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) -- Both construction and renaming of ``Index``/``MultiIndex`` with non-hashable ``name``/``names`` will now raise ``TypeError`` (:issue:`20527`) -- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). -- :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) -- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) -- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) -- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) -- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) -- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) -- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) -- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) -- Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) -- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`) -- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) -- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) -- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`) -- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) -- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) -- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) -- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) -- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) -- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) -- :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) -- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) -- ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) -- :func:`Series.str.replace` now takes an optional ``regex`` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) -- :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) -- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). -- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) -- A user-defined-function that is passed to :func:`Series.rolling().aggregate() <.Rolling.aggregate>`, :func:`DataFrame.rolling().aggregate() <.Rolling.aggregate>`, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) -- Rolling and Expanding types raise ``NotImplementedError`` upon iteration (:issue:`11704`). - -.. _whatsnew_0230.deprecations: - -Deprecations -~~~~~~~~~~~~ - -- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`). -- ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`). -- ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`) -- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated. - In the future, a tuple passed to ``'by'`` will always refer to a single key - that is the actual tuple, instead of treating the tuple as multiple keys. To - retain the previous behavior, use a list instead of a tuple (:issue:`18314`) -- ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`). -- :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) -- :meth:`ExcelFile.parse` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with :func:`read_excel` (:issue:`20920`). -- The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). -- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) -- ``DataFrame.from_items`` is deprecated. Use :func:`DataFrame.from_dict` instead, or ``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`, :issue:`17312`) -- Indexing a :class:`MultiIndex` or a :class:`FloatIndex` with a list containing some missing keys will now show a :class:`FutureWarning`, which is consistent with other types of indexes (:issue:`17758`). - -- The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) -- The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) -- The ``order`` parameter of :func:`factorize` is deprecated and will be removed in a future release (:issue:`19727`) -- :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` are deprecated in favor of :meth:`Timestamp.day_name`, :meth:`DatetimeIndex.day_name`, and :meth:`Series.dt.day_name` (:issue:`12806`) - -- ``pandas.tseries.plotting.tsplot`` is deprecated. Use :func:`Series.plot` instead (:issue:`18627`) -- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`) -- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`) -- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`). -- :func:`Series.rolling().apply() <.Rolling.apply>`, :func:`DataFrame.rolling().apply() <.Rolling.apply>`, :func:`Series.expanding().apply() <.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <.Expanding.apply>` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`) -- The ``data``, ``base``, ``strides``, ``flags`` and ``itemsize`` properties - of the ``Series`` and ``Index`` classes have been deprecated and will be - removed in a future version (:issue:`20419`). -- ``DatetimeIndex.offset`` is deprecated. Use ``DatetimeIndex.freq`` instead (:issue:`20716`) -- Floor division between an integer ndarray and a :class:`Timedelta` is deprecated. Divide by :attr:`Timedelta.value` instead (:issue:`19761`) -- Setting ``PeriodIndex.freq`` (which was not guaranteed to work correctly) is deprecated. Use :meth:`PeriodIndex.asfreq` instead (:issue:`20678`) -- ``Index.get_duplicates()`` is deprecated and will be removed in a future version (:issue:`20239`) -- The previous default behavior of negative indices in ``Categorical.take`` is deprecated. In a future version it will change from meaning missing values to meaning positional indices from the right. The future behavior is consistent with :meth:`Series.take` (:issue:`20664`). -- Passing multiple axes to the ``axis`` parameter in :func:`DataFrame.dropna` has been deprecated and will be removed in a future version (:issue:`20987`) - - -.. _whatsnew_0230.prior_deprecations: - -Removal of prior version deprecations/changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`) -- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`). -- ``pd.tseries.util.pivot_annual`` has been removed (deprecated since v0.19). Use ``pivot_table`` instead (:issue:`18370`) -- ``pd.tseries.util.isleapyear`` has been removed (deprecated since v0.19). Use ``.is_leap_year`` property in Datetime-likes instead (:issue:`18370`) -- ``pd.ordered_merge`` has been removed (deprecated since v0.19). Use ``pd.merge_ordered`` instead (:issue:`18459`) -- The ``SparseList`` class has been removed (:issue:`14007`) -- The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`) -- ``Categorical.from_array`` has been removed (:issue:`13854`) -- The ``freq`` and ``how`` parameters have been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame - and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:`18601` & :issue:`18668`) -- ``DatetimeIndex.to_datetime``, ``Timestamp.to_datetime``, ``PeriodIndex.to_datetime``, and ``Index.to_datetime`` have been removed (:issue:`8254`, :issue:`14096`, :issue:`14113`) -- :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`) -- :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`) -- :func:`read_csv` has dropped the ``buffer_lines`` parameter (:issue:`13360`) -- :func:`read_csv` has dropped the ``compact_ints`` and ``use_unsigned`` parameters (:issue:`13323`) -- The ``Timestamp`` class has dropped the ``offset`` attribute in favor of ``freq`` (:issue:`13593`) -- The ``Series``, ``Categorical``, and ``Index`` classes have dropped the ``reshape`` method (:issue:`13012`) -- ``pandas.tseries.frequencies.get_standard_freq`` has been removed in favor of ``pandas.tseries.frequencies.to_offset(freq).rule_code`` (:issue:`13874`) -- The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) -- The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) -- The ``Panel`` class has dropped the ``to_long`` and ``toLong`` methods (:issue:`19077`) -- The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) -- The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attr:`Categorical.codes` (:issue:`7768`) -- The ``flavor`` parameter have been removed from :func:`to_sql` method (:issue:`13611`) -- The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) -- The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). - Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) -- Imports from ``pandas.core.common`` for functions such as ``is_datetime64_dtype`` are now removed. These are located in ``pandas.api.types``. (:issue:`13634`, :issue:`19769`) -- The ``infer_dst`` keyword in :meth:`Series.tz_localize`, :meth:`DatetimeIndex.tz_localize` - and :class:`DatetimeIndex` have been removed. ``infer_dst=True`` is equivalent to - ``ambiguous='infer'``, and ``infer_dst=False`` to ``ambiguous='raise'`` (:issue:`7963`). -- When ``.resample()`` was changed from an eager to a lazy operation, like ``.groupby()`` in v0.18.0, we put in place compatibility (with a ``FutureWarning``), - so operations would continue to work. This is now fully removed, so a ``Resampler`` will no longer forward compat operations (:issue:`20554`) -- Remove long deprecated ``axis=None`` parameter from ``.replace()`` (:issue:`20271`) - -.. _whatsnew_0230.performance: - -Performance improvements -~~~~~~~~~~~~~~~~~~~~~~~~ - -- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) -- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) -- :class:`DateOffset` arithmetic performance is improved (:issue:`18218`) -- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) -- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`) -- The overridden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) -- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) -- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) -- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`) -- Improved performance of :func:`IntervalIndex.symmetric_difference` (:issue:`18475`) -- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) -- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) -- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) -- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) -- Improved performance of :func:`MultiIndex.remove_unused_levels` when there are no unused levels, at the cost of a reduction in performance when there are (:issue:`19289`) -- Improved performance of :func:`Index.get_loc` for non-unique indexes (:issue:`19478`) -- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) -- Improved performance of :func:`.GroupBy.rank` (:issue:`15779`) -- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) -- Improved performance of :func:`.GroupBy.ffill` and :func:`.GroupBy.bfill` (:issue:`11296`) -- Improved performance of :func:`.GroupBy.any` and :func:`.GroupBy.all` (:issue:`15435`) -- Improved performance of :func:`.GroupBy.pct_change` (:issue:`19165`) -- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) -- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) -- Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) -- Improved performance of :func:`.Categorical.from_codes` (:issue:`18501`) - -.. _whatsnew_0230.docs: - -Documentation changes -~~~~~~~~~~~~~~~~~~~~~ - -Thanks to all of the contributors who participated in the pandas Documentation -Sprint, which took place on March 10th. We had about 500 participants from over -30 locations across the world. You should notice that many of the -:ref:`API docstrings ` have greatly improved. - -There were too many simultaneous contributions to include a release note for each -improvement, but this `GitHub search`_ should give you an idea of how many docstrings -were improved. - -Special thanks to `Marc Garcia`_ for organizing the sprint. For more information, -read the `NumFOCUS blogpost`_ recapping the sprint. - -.. _GitHub search: https://github.com/pandas-dev/pandas/pulls?utf8=%E2%9C%93&q=is%3Apr+label%3ADocs+created%3A2018-03-10..2018-03-15+ -.. _NumFOCUS blogpost: https://www.numfocus.org/blog/worldwide-pandas-sprint/ -.. _Marc Garcia: https://github.com/datapythonista - -- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`) -- Consistency when introducing code samples, using either colon or period. - Rewrote some sentences for greater clarity, added more dynamic references - to functions, methods and classes. - (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) -- Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) - -.. _whatsnew_0230.bug_fixes: - -Bug fixes -~~~~~~~~~ - -Categorical -^^^^^^^^^^^ - -.. warning:: - - A class of bugs were introduced in pandas 0.21 with ``CategoricalDtype`` that - affects the correctness of operations like ``merge``, ``concat``, and - indexing when comparing multiple unordered ``Categorical`` arrays that have - the same categories, but in a different order. We highly recommend upgrading - or manually aligning your categories before doing these operations. - -- Bug in ``Categorical.equals`` returning the wrong result when comparing two - unordered ``Categorical`` arrays with the same categories, but in a different - order (:issue:`16603`) -- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result - when for unordered categoricals with the categories in a different order. - This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). -- Bug in :func:`pandas.merge` returning the wrong result when joining on an - unordered ``Categorical`` that had the same categories but in a different - order (:issue:`19551`) -- Bug in :meth:`CategoricalIndex.get_indexer` returning the wrong result when - ``target`` was an unordered ``Categorical`` that had the same categories as - ``self`` but in a different order (:issue:`19551`) -- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) -- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) -- Bug in :meth:`Series.str.split` with ``expand=True`` incorrectly raising an IndexError on empty strings (:issue:`20002`). -- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19032`) -- Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19565`) -- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) -- Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`) -- Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`) -- Bug in :meth:`Series.take` with categorical data interpreting ``-1`` in ``indices`` as missing value markers, rather than the last element of the Series (:issue:`20664`) - -Datetimelike -^^^^^^^^^^^^ - -- Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) -- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) -- Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) -- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) -- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`) -- Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) -- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) -- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) -- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) -- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) -- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) -- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) -- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) -- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) -- Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where setting the ``freq`` attribute was not fully supported (:issue:`20678`) - -Timedelta -^^^^^^^^^ - -- Bug in :func:`Timedelta.__mul__` where multiplying by ``NaT`` returned ``NaT`` instead of raising a ``TypeError`` (:issue:`19819`) -- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) -- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) -- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) -- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) -- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) -- Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) -- Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) -- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`, :issue:`19834`) -- Bug in :func:`Timedelta.total_seconds` causing precision errors, for example ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) -- Bug in :func:`Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`) -- Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mismatch (:issue:`19333`) -- Bug in indexing a :class:`TimedeltaIndex` with a ``np.timedelta64`` object which was raising a ``TypeError`` (:issue:`20393`) - - -Timezones -^^^^^^^^^ - -- Bug in creating a ``Series`` from an array that contains both tz-naive and tz-aware values will result in a ``Series`` whose dtype is tz-aware instead of object (:issue:`16406`) -- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`) -- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`) -- Bug in comparing :class:`DatetimeIndex`, which failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) -- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`) -- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) -- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) -- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) -- Bug in :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) -- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) -- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`) -- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`) -- Bug in :func:`melt` that converted tz-aware dtypes to tz-naive (:issue:`15785`) -- Bug in :func:`Dataframe.count` that raised an ``ValueError``, if :func:`Dataframe.dropna` was called for a single column with timezone-aware values. (:issue:`13407`) - -Offsets -^^^^^^^ - -- Bug in :class:`WeekOfMonth` and :class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`, :issue:`18672`, :issue:`18864`) -- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) -- Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) -- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) -- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) - -Numeric -^^^^^^^ -- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) -- Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) -- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) -- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) -- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) -- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) -- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`) - -Strings -^^^^^^^ -- Bug in :func:`Series.str.get` with a dictionary in the values and the index not in the keys, raising ``KeyError`` (:issue:`20671`) - - -Indexing -^^^^^^^^ - -- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) -- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) -- Bug in :func:`DataFrame.drop`, :meth:`Panel.drop`, :meth:`Series.drop`, :meth:`Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) -- Bug in indexing a datetimelike ``Index`` that raised ``ValueError`` instead of ``IndexError`` (:issue:`18386`). -- :func:`Index.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) -- :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) -- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) -- Bug in indexing with iterator containing only missing keys, which raised no error (:issue:`20748`) -- Fixed inconsistency in ``.ix`` between list and scalar keys when the index has integer dtype and does not include the desired keys (:issue:`20753`) -- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) -- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) -- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) -- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) -- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) -- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (:issue:`19726`) -- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) -- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) -- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). -- Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) -- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) -- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) -- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) -- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`) -- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`) -- Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`) - -MultiIndex -^^^^^^^^^^ - -- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) -- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) -- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) -- Bug in :func:`MultiIndex.unique` when called on empty :class:`MultiIndex` (:issue:`20568`) -- Bug in :func:`MultiIndex.unique` which would not preserve level names (:issue:`20570`) -- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) -- Bug in :func:`MultiIndex.from_tuples` which would fail to take zipped tuples in python3 (:issue:`18434`) -- Bug in :func:`MultiIndex.get_loc` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) -- Bug in :func:`MultiIndex.get_loc` which would cast boolean to integer labels (:issue:`19086`) -- Bug in :func:`MultiIndex.get_loc` which would fail to locate keys containing ``NaN`` (:issue:`18485`) -- Bug in :func:`MultiIndex.get_loc` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) -- Bug in indexing where nested indexers having only numpy arrays are handled incorrectly (:issue:`19686`) - - -IO -^^ - -- :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) -- :meth:`DataFrame.to_html` now has an option to add an id to the leading ``
`` tag (:issue:`8496`) -- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) -- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) -- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) -- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`) -- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) -- Bug in :func:`DataFrame.to_latex` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) -- Bug in :func:`DataFrame.to_latex` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) -- Bug in :func:`DataFrame.to_latex` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`) -- Bug in :func:`DataFrame.to_latex` where the combination of an index name and the ``index_names=False`` option would result in incorrect output (:issue:`18326`) -- Bug in :func:`DataFrame.to_latex` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`) -- Bug in :func:`DataFrame.to_latex` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`) -- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) -- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) -- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) -- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`) -- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) -- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`) -- Bug in :meth:`pandas.io.json.json_normalize` where sub-records are not properly normalized if any sub-records values are NoneType (:issue:`20030`) -- Bug in ``usecols`` parameter in :func:`read_csv` where error is not raised correctly when passing a string. (:issue:`20529`) -- Bug in :func:`HDFStore.keys` when reading a file with a soft link causes exception (:issue:`20523`) -- Bug in :func:`HDFStore.select_column` where a key which is not a valid store raised an ``AttributeError`` instead of a ``KeyError`` (:issue:`17912`) - -Plotting -^^^^^^^^ - -- Better error message when attempting to plot but matplotlib is not installed (:issue:`19810`). -- :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) -- Bug in :func:`DataFrame.plot` when ``x`` and ``y`` arguments given as positions caused incorrect referenced columns for line, bar and area plots (:issue:`20056`) -- Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). -- :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). -- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) - - -GroupBy/resample/rolling -^^^^^^^^^^^^^^^^^^^^^^^^ - -- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) -- Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) -- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) -- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) -- Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) -- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) -- Bug in :func:`DataFrame.groupby` passing the ``on=`` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) -- Bug in :func:`DataFrame.resample().aggregate <.Resampler.aggregate>` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) -- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`) -- Bug in :func:`DataFrame.resample` that dropped timezone information (:issue:`13238`) -- Bug in :func:`DataFrame.groupby` where transformations using ``np.all`` and ``np.any`` were raising a ``ValueError`` (:issue:`20653`) -- Bug in :func:`DataFrame.resample` where ``ffill``, ``bfill``, ``pad``, ``backfill``, ``fillna``, ``interpolate``, and ``asfreq`` were ignoring ``loffset``. (:issue:`20744`) -- Bug in :func:`DataFrame.groupby` when applying a function that has mixed data types and the user supplied function can fail on the grouping column (:issue:`20949`) -- Bug in :func:`DataFrameGroupBy.rolling().apply() <.Rolling.apply>` where operations performed against the associated :class:`DataFrameGroupBy` object could impact the inclusion of the grouped item(s) in the result (:issue:`14013`) - -Sparse -^^^^^^ - -- Bug in which creating a :class:`SparseDataFrame` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) -- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) -- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) -- Bug in constructing a :class:`SparseArray`: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) - -Reshaping -^^^^^^^^^ - -- Bug in :func:`DataFrame.merge` where referencing a ``CategoricalIndex`` by name, where the ``by`` kwarg would ``KeyError`` (:issue:`20777`) -- Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) -- Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`) -- Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`) -- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) -- Fixed construction of a :class:`DataFrame` from a ``dict`` containing ``NaN`` as key (:issue:`18455`) -- Disabled construction of a :class:`Series` where len(index) > len(data) = 1, which previously would broadcast the data item, and now raises a ``ValueError`` (:issue:`18819`) -- Suppressed error in the construction of a :class:`DataFrame` from a ``dict`` containing scalar values when the corresponding keys are not included in the passed index (:issue:`18600`) - -- Fixed (changed from ``object`` to ``float64``) dtype of :class:`DataFrame` initialized with axes, no data, and ``dtype=int`` (:issue:`19646`) -- Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) -- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) -- Bug in :func:`DataFrame.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) -- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) -- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) -- Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) -- Bug in :func:`concat` when concatenating sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) -- Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) -- Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) -- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) -- Bug in :func:`~DataFrame.rename` where an Index of same-length tuples was converted to a MultiIndex (:issue:`19497`) -- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) -- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) -- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) -- Bug in :class:`Series` constructor with ``Categorical`` where a ``ValueError`` is not raised when an index of different length is given (:issue:`19342`) -- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) -- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`) -- Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`) -- Bug in :func:`get_dummies`, and :func:`select_dtypes`, where duplicate column names caused incorrect behavior (:issue:`20848`) -- Bug in :func:`isna`, which cannot handle ambiguous typed lists (:issue:`20675`) -- Bug in :func:`concat` which raises an error when concatenating TZ-aware dataframes and all-NaT dataframes (:issue:`12396`) -- Bug in :func:`concat` which raises an error when concatenating empty TZ-aware series (:issue:`18447`) - -Other -^^^^^ - -- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) -- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existent option key in some cases (:issue:`19789`) -- Bug in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) - -.. _whatsnew_0.23.0.contributors: - -Contributors -~~~~~~~~~~~~ - -.. contributors:: v0.22.0..v0.23.0 From ee3f4f2e0f953da75585944afada75bf8502f1cd Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 08:40:48 -0400 Subject: [PATCH 17/26] Restore v0.23.0 --- doc/source/whatsnew/v0.23.0.rst | 1535 +++++++++++++++++++++++++++++++ 1 file changed, 1535 insertions(+) create mode 100644 doc/source/whatsnew/v0.23.0.rst diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst new file mode 100644 index 0000000000000..7f7609edc27b6 --- /dev/null +++ b/doc/source/whatsnew/v0.23.0.rst @@ -0,0 +1,1535 @@ +.. _whatsnew_0230: + +What's new in 0.23.0 (May 15, 2018) +----------------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a major release from 0.22.0 and includes a number of API changes, +deprecations, new features, enhancements, and performance improvements along +with a large number of bug fixes. We recommend that all users upgrade to this +version. + +Highlights include: + +- :ref:`Round-trippable JSON format with 'table' orient `. +- :ref:`Instantiation from dicts respects order for Python 3.6+ `. +- :ref:`Dependent column arguments for assign `. +- :ref:`Merging / sorting on a combination of columns and index levels `. +- :ref:`Extending pandas with custom types `. +- :ref:`Excluding unobserved categories from groupby `. +- :ref:`Changes to make output shape of DataFrame.apply consistent `. + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.0 + :local: + :backlinks: none + :depth: 2 + +.. _whatsnew_0230.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0230.enhancements.round-trippable_json: + +JSON read/write round-trippable with ``orient='table'`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata. + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'foo': [1, 2, 3, 4], + ...: 'bar': ['a', 'b', 'c', 'd'], + ...: 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + ...: 'qux': pd.Categorical(['a', 'b', 'c', 'c'])}, + ...: index=pd.Index(range(4), name='idx')) + + In [2]: df + Out[2]: + foo bar baz qux + idx + 0 1 a 2018-01-01 a + 1 2 b 2018-01-02 b + 2 3 c 2018-01-03 c + 3 4 d 2018-01-04 c + + [4 rows x 4 columns] + + In [3]: df.dtypes + Out[3]: + foo int64 + bar object + baz datetime64[ns] + qux category + Length: 4, dtype: object + + In [4]: df.to_json('test.json', orient='table') + + In [5]: new_df = pd.read_json('test.json', orient='table') + + In [6]: new_df + Out[6]: + foo bar baz qux + idx + 0 1 a 2018-01-01 a + 1 2 b 2018-01-02 b + 2 3 c 2018-01-03 c + 3 4 d 2018-01-04 c + + [4 rows x 4 columns] + + In [7]: new_df.dtypes + Out[7]: + foo int64 + bar object + baz datetime64[ns] + qux category + Length: 4, dtype: object + +Please note that the string ``index`` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. + +.. ipython:: python + :okwarning: + + df.index.name = 'index' + + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes + +.. ipython:: python + :suppress: + + import os + os.remove('test.json') + + +.. _whatsnew_0230.enhancements.assign_dependent: + + +Method ``.assign()`` accepts dependent arguments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`DataFrame.assign` now accepts dependent keyword arguments for python version later than 3.6 (see also `PEP 468 +`_). Later keyword arguments may now refer to earlier ones if the argument is a callable. See the +:ref:`documentation here ` (:issue:`14207`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3]}) + df + df.assign(B=df.A, C=lambda x: x['A'] + x['B']) + +.. warning:: + + This may subtly change the behavior of your code when you're + using ``.assign()`` to update an existing column. Previously, callables + referring to other variables being updated would get the "old" values + + Previous behavior: + + .. code-block:: ipython + + In [2]: df = pd.DataFrame({"A": [1, 2, 3]}) + + In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1) + Out[3]: + A C + 0 2 -1 + 1 3 -2 + 2 4 -3 + + New behavior: + + .. ipython:: python + + df.assign(A=df.A + 1, C=lambda df: df.A * -1) + + + +.. _whatsnew_0230.enhancements.merge_on_columns_and_levels: + +Merging on a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.merge` as the ``on``, ``left_on``, and ``right_on`` +parameters may now refer to either column names or index level names. +This enables merging ``DataFrame`` instances on a combination of index levels +and columns without resetting indexes. See the :ref:`Merge on columns and +levels ` documentation section. +(:issue:`14355`) + +.. ipython:: python + + left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key2': ['K0', 'K1', 'K0', 'K1']}, + index=left_index) + + right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') + + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3'], + 'key2': ['K0', 'K0', 'K0', 'K1']}, + index=right_index) + + left.merge(right, on=['key1', 'key2']) + +.. _whatsnew_0230.enhancements.sort_by_columns_and_levels: + +Sorting by a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.sort_values` as the ``by`` parameter may +now refer to either column names or index level names. This enables sorting +``DataFrame`` instances by a combination of index levels and columns without +resetting indexes. See the :ref:`Sorting by Indexes and Values +` documentation section. +(:issue:`14353`) + +.. ipython:: python + + # Build MultiIndex + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), + ('b', 2), ('b', 1), ('b', 1)]) + idx.names = ['first', 'second'] + + # Build DataFrame + df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, + index=idx) + df_multi + + # Sort by 'second' (index) and 'A' (column) + df_multi.sort_values(by=['second', 'A']) + + +.. _whatsnew_023.enhancements.extension: + +Extending pandas with custom types (experimental) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas now supports storing array-like objects that aren't necessarily 1-D NumPy +arrays as columns in a DataFrame or values in a Series. This allows third-party +libraries to implement extensions to NumPy's types, similar to how pandas +implemented categoricals, datetimes with timezones, periods, and intervals. + +As a demonstration, we'll use cyberpandas_, which provides an ``IPArray`` type +for storing ip addresses. + +.. code-block:: ipython + + In [1]: from cyberpandas import IPArray + + In [2]: values = IPArray([ + ...: 0, + ...: 3232235777, + ...: 42540766452641154071740215577757643572 + ...: ]) + ...: + ...: + +``IPArray`` isn't a normal 1-D NumPy array, but because it's a pandas +:class:`~pandas.api.extensions.ExtensionArray`, it can be stored properly inside pandas' containers. + +.. code-block:: ipython + + In [3]: ser = pd.Series(values) + + In [4]: ser + Out[4]: + 0 0.0.0.0 + 1 192.168.1.1 + 2 2001:db8:85a3::8a2e:370:7334 + dtype: ip + +Notice that the dtype is ``ip``. The missing value semantics of the underlying +array are respected: + +.. code-block:: ipython + + In [5]: ser.isna() + Out[5]: + 0 True + 1 False + 2 False + dtype: bool + +For more, see the :ref:`extension types ` +documentation. If you build an extension array, publicize it on `the ecosystem page `_. + +.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest/ + + +.. _whatsnew_0230.enhancements.categorical_grouping: + +New ``observed`` keyword for excluding unobserved categories in ``GroupBy`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Grouping by a categorical includes the unobserved categories in the output. +When grouping by multiple categorical columns, this means you get the cartesian product of all the +categories, including combinations where there are no observations, which can result in a large +number of groups. We have added a keyword ``observed`` to control this behavior, it defaults to +``observed=False`` for backward-compatibility. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`, :issue:`20902`) + +.. ipython:: python + + cat1 = pd.Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + cat2 = pd.Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df['C'] = ['foo', 'bar'] * 2 + df + +To show all values, the previous behavior: + +.. ipython:: python + + df.groupby(['A', 'B', 'C'], observed=False).count() + + +To show only observed values: + +.. ipython:: python + + df.groupby(['A', 'B', 'C'], observed=True).count() + +For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: + +.. ipython:: python + + cat1 = pd.Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + cat2 = pd.Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df + + +.. code-block:: ipython + + In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True) + + Out[1]: + values + A B + a c 1.0 + d 2.0 + b c 3.0 + d 4.0 + + In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False) + + Out[2]: + values + A B + a c 1.0 + d 2.0 + y NaN + b c 3.0 + d 4.0 + y NaN + z c NaN + d NaN + y NaN + + +.. _whatsnew_0230.enhancements.window_raw: + +Rolling/Expanding.apply() accepts ``raw=False`` to pass a ``Series`` to the function +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`Series.rolling().apply() <.Rolling.apply>`, :func:`DataFrame.rolling().apply() <.Rolling.apply>`, +:func:`Series.expanding().apply() <.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <.Expanding.apply>` have gained a ``raw=None`` parameter. +This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The +default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``. +In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`) + +.. ipython:: python + + s = pd.Series(np.arange(5), np.arange(5) + 1) + s + +Pass a ``Series``: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False) + +Mimic the original behavior of passing a ndarray: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True) + + +.. _whatsnew_0210.enhancements.limit_area: + +``DataFrame.interpolate`` has gained the ``limit_area`` kwarg +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.interpolate` has gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. +Use ``limit_area='inside'`` to fill only NaNs surrounded by valid values or use ``limit_area='outside'`` to fill only ``NaN`` s +outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. + + +.. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, + np.nan, 13, np.nan, np.nan]) + ser + +Fill one consecutive inside value in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + +Fill all consecutive outside values backward + +.. ipython:: python + + ser.interpolate(limit_direction='backward', limit_area='outside') + +Fill all consecutive outside values in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='outside') + +.. _whatsnew_0210.enhancements.get_dummies_dtype: + +Function ``get_dummies`` now supports ``dtype`` argument +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtype for the new columns. The default remains uint8. (:issue:`18330`) + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) + pd.get_dummies(df, columns=['c']).dtypes + pd.get_dummies(df, columns=['c'], dtype=bool).dtypes + + +.. _whatsnew_0230.enhancements.timedelta_mod: + +Timedelta mod method +^^^^^^^^^^^^^^^^^^^^ + +``mod`` (%) and ``divmod`` operations are now defined on ``Timedelta`` objects +when operating with either timedelta-like or with numeric arguments. +See the :ref:`documentation here `. (:issue:`19365`) + +.. ipython:: python + + td = pd.Timedelta(hours=37) + td % pd.Timedelta(minutes=45) + +.. _whatsnew_0230.enhancements.ran_inf: + +Method ``.rank()`` handles ``inf`` values when ``NaN`` are present +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, ``.rank()`` would assign ``inf`` elements ``NaN`` as their ranks. Now ranks are calculated properly. (:issue:`6945`) + +.. ipython:: python + + s = pd.Series([-np.inf, 0, 1, np.nan, np.inf]) + s + +Previous behavior: + +.. code-block:: ipython + + In [11]: s.rank() + Out[11]: + 0 1.0 + 1 2.0 + 2 3.0 + 3 NaN + 4 NaN + dtype: float64 + +Current behavior: + +.. ipython:: python + + s.rank() + +Furthermore, previously if you rank ``inf`` or ``-inf`` values together with ``NaN`` values, the calculation won't distinguish ``NaN`` from infinity when using 'top' or 'bottom' argument. + +.. ipython:: python + + s = pd.Series([np.nan, np.nan, -np.inf, -np.inf]) + s + +Previous behavior: + +.. code-block:: ipython + + In [15]: s.rank(na_option='top') + Out[15]: + 0 2.5 + 1 2.5 + 2 2.5 + 3 2.5 + dtype: float64 + +Current behavior: + +.. ipython:: python + + s.rank(na_option='top') + +These bugs were squashed: + +- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) +- Bug in :func:`DataFrameGroupBy.rank` where ranks were incorrect when both infinity and ``NaN`` were present (:issue:`20561`) + + +.. _whatsnew_0230.enhancements.str_cat_align: + +``Series.str.cat`` has gained the ``join`` kwarg +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :meth:`Series.str.cat` did not -- in contrast to most of ``pandas`` -- align :class:`Series` on their index before concatenation (see :issue:`18657`). +The method has now gained a keyword ``join`` to control the manner of alignment, see examples below and :ref:`here `. + +In v.0.23 ``join`` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. + +.. ipython:: python + :okwarning: + + s = pd.Series(['a', 'b', 'c', 'd']) + t = pd.Series(['b', 'd', 'e', 'c'], index=[1, 3, 4, 2]) + s.str.cat(t) + s.str.cat(t, join='left', na_rep='-') + +Furthermore, :meth:`Series.str.cat` now works for ``CategoricalIndex`` as well (previously raised a ``ValueError``; see :issue:`20842`). + +.. _whatsnew_0230.enhancements.astype_category: + +``DataFrame.astype`` performs column-wise conversion to ``Categorical`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.astype` can now perform column-wise conversion to ``Categorical`` by supplying the string ``'category'`` or +a :class:`~pandas.api.types.CategoricalDtype`. Previously, attempting this would raise a ``NotImplementedError``. See the +:ref:`categorical.objectcreation` section of the documentation for more details and examples. (:issue:`12860`, :issue:`18099`) + +Supplying the string ``'category'`` performs column-wise conversion, with only labels appearing in a given column set as categories: + +.. ipython:: python + + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + df = df.astype('category') + df['A'].dtype + df['B'].dtype + + +Supplying a ``CategoricalDtype`` will make the categories in each column consistent with the supplied dtype: + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + cdt = CategoricalDtype(categories=list('abcd'), ordered=True) + df = df.astype(cdt) + df['A'].dtype + df['B'].dtype + + +.. _whatsnew_0230.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Unary ``+`` now permitted for ``Series`` and ``DataFrame`` as numeric operator (:issue:`16073`) +- Better support for :meth:`~pandas.io.formats.style.Styler.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`) +- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`) +- :func:`MultiIndex.unique` now supports the ``level=`` argument, to get unique values from a specific index level (:issue:`17896`) +- :class:`pandas.io.formats.style.Styler` now has method ``hide_index()`` to determine whether the index will be rendered in output (:issue:`14194`) +- :class:`pandas.io.formats.style.Styler` now has method ``hide_columns()`` to determine whether columns will be hidden in output (:issue:`14194`) +- Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`) +- :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`) +- :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`) +- Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) +- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). +- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) +- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) +- :func:`read_excel` has gained the ``nrows`` parameter (:issue:`16645`) +- :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`) +- :meth:`DataFrame.to_json` and :meth:`Series.to_json` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) +- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) +- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` + can now take a callable as their argument (:issue:`18862`) +- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`) +- ``Resampler`` objects now have a functioning :attr:`.Resampler.pipe` method. + Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). +- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`). +- Added :func:`pandas.api.extensions.register_dataframe_accessor`, + :func:`pandas.api.extensions.register_series_accessor`, and + :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas + to register custom accessors like ``.cat`` on pandas objects. See + :ref:`Registering Custom Accessors ` for more (:issue:`14781`). + +- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) +- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) +- Added :func:`.SeriesGroupBy.is_monotonic_increasing` and :func:`.SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) +- For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) +- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) +- Added option ``display.html.use_mathjax`` so `MathJax `_ can be disabled when rendering tables in ``Jupyter`` notebooks (:issue:`19856`, :issue:`19824`) +- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) +- :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) +- :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) +- :meth:`DataFrame.to_sql` now performs a multi-value insert if the underlying connection supports itk rather than inserting row by row. + ``SQLAlchemy`` dialects supporting multi-value inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) +- :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) +- :func:`read_html` now reads all ```` elements in a ``
``, not just the first. (:issue:`20690`) +- :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile` now accept the ``interpolation`` keyword, ``linear`` by default (:issue:`20497`) +- zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`) +- :class:`~pandas.tseries.offsets.WeekOfMonth` constructor now supports ``n=0`` (:issue:`20517`). +- :class:`DataFrame` and :class:`Series` now support matrix multiplication (``@``) operator (:issue:`10259`) for Python>=3.5 +- Updated :meth:`DataFrame.to_gbq` and :meth:`pandas.read_gbq` signature and documentation to reflect changes from + the pandas-gbq library version 0.4.0. Adds intersphinx mapping to pandas-gbq + library. (:issue:`20564`) +- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`) +- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`) +- :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`) +- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`) + +.. _whatsnew_0230.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0230.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15184`). +If installed, we now require: + ++-----------------+-----------------+----------+---------------+ +| Package | Minimum Version | Required | Issue | ++=================+=================+==========+===============+ +| python-dateutil | 2.5.0 | X | :issue:`15184`| ++-----------------+-----------------+----------+---------------+ +| openpyxl | 2.4.0 | | :issue:`15184`| ++-----------------+-----------------+----------+---------------+ +| beautifulsoup4 | 4.2.1 | | :issue:`20082`| ++-----------------+-----------------+----------+---------------+ +| setuptools | 24.2.0 | | :issue:`20698`| ++-----------------+-----------------+----------+---------------+ + +.. _whatsnew_0230.api_breaking.dict_insertion_order: + +Instantiation from dicts preserves dict insertion order for Python 3.6+ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Until Python 3.6, dicts in Python had no formally defined ordering. For Python +version 3.6 and later, dicts are ordered by insertion order, see +`PEP 468 `_. +pandas will use the dict's insertion order, when creating a ``Series`` or +``DataFrame`` from a dict and you're using Python version 3.6 or +higher. (:issue:`19884`) + +Previous behavior (and current behavior if on Python < 3.6): + +.. code-block:: ipython + + In [16]: pd.Series({'Income': 2000, + ....: 'Expenses': -1500, + ....: 'Taxes': -200, + ....: 'Net result': 300}) + Out[16]: + Expenses -1500 + Income 2000 + Net result 300 + Taxes -200 + dtype: int64 + +Note the Series above is ordered alphabetically by the index values. + +New behavior (for Python >= 3.6): + +.. ipython:: python + + pd.Series({'Income': 2000, + 'Expenses': -1500, + 'Taxes': -200, + 'Net result': 300}) + +Notice that the Series is now ordered by insertion order. This new behavior is +used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries`` +and ``SparseDataFrame``). + +If you wish to retain the old behavior while using Python >= 3.6, you can use +``.sort_index()``: + +.. ipython:: python + + pd.Series({'Income': 2000, + 'Expenses': -1500, + 'Taxes': -200, + 'Net result': 300}).sort_index() + +.. _whatsnew_0230.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` was deprecated in the 0.20.x release, showing as a ``DeprecationWarning``. Using ``Panel`` will now show a ``FutureWarning``. The recommended way to represent 3-D data are +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`13563`, :issue:`18324`). + +.. code-block:: ipython + + In [75]: import pandas._testing as tm + + In [76]: p = tm.makePanel() + + In [77]: p + Out[77]: + + Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + +Convert to a MultiIndex DataFrame + +.. code-block:: ipython + + In [78]: p.to_frame() + Out[78]: + ItemA ItemB ItemC + major minor + 2000-01-03 A 0.469112 0.721555 0.404705 + B -1.135632 0.271860 -1.039268 + C 0.119209 0.276232 -1.344312 + D -2.104569 0.113648 -0.109050 + 2000-01-04 A -0.282863 -0.706771 0.577046 + B 1.212112 -0.424972 -0.370647 + C -1.044236 -1.087401 0.844885 + D -0.494929 -1.478427 1.643563 + 2000-01-05 A -1.509059 -1.039575 -1.715002 + B -0.173215 0.567020 -1.157892 + C -0.861849 -0.673690 1.075770 + D 1.071804 0.524988 -1.469388 + + [12 rows x 3 columns] + +Convert to an xarray DataArray + +.. code-block:: ipython + + In [79]: p.to_xarray() + Out[79]: + + array([[[ 0.469112, -1.135632, 0.119209, -2.104569], + [-0.282863, 1.212112, -1.044236, -0.494929], + [-1.509059, -0.173215, -0.861849, 1.071804]], + + [[ 0.721555, 0.27186 , 0.276232, 0.113648], + [-0.706771, -0.424972, -1.087401, -1.478427], + [-1.039575, 0.56702 , -0.67369 , 0.524988]], + + [[ 0.404705, -1.039268, -1.344312, -0.10905 ], + [ 0.577046, -0.370647, 0.844885, 1.643563], + [-1.715002, -1.157892, 1.07577 , -1.469388]]]) + Coordinates: + * items (items) object 'ItemA' 'ItemB' 'ItemC' + * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 + * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' + + +.. _whatsnew_0230.api_breaking.core_common: + +pandas.core.common removals +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following error & warning messages are removed from ``pandas.core.common`` (:issue:`13634`, :issue:`19769`): + +- ``PerformanceWarning`` +- ``UnsupportedFunctionCall`` +- ``UnsortedIndexError`` +- ``AbstractMethodError`` + +These are available from import from ``pandas.errors`` (since 0.19.0). + + +.. _whatsnew_0230.api_breaking.apply: + +Changes to make output of ``DataFrame.apply`` consistent +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies +are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case +where a list-like (e.g. ``tuple`` or ``list`` is returned) (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, +:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`). + +.. ipython:: python + + df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + df + +Previous behavior: if the returned shape happened to match the length of original columns, this would return a ``DataFrame``. +If the return shape did not match, a ``Series`` with lists was returned. + +.. code-block:: python + + In [3]: df.apply(lambda x: [1, 2, 3], axis=1) + Out[3]: + A B C + 0 1 2 3 + 1 1 2 3 + 2 1 2 3 + 3 1 2 3 + 4 1 2 3 + 5 1 2 3 + + In [4]: df.apply(lambda x: [1, 2], axis=1) + Out[4]: + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + 3 [1, 2] + 4 [1, 2] + 5 [1, 2] + dtype: object + + +New behavior: When the applied function returns a list-like, this will now *always* return a ``Series``. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1) + df.apply(lambda x: [1, 2], axis=1) + +To have expanded columns, you can use ``result_type='expand'`` + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') + +To broadcast the result across the original columns (the old behaviour for +list-likes of the correct length), you can use ``result_type='broadcast'``. +The shape must match the original columns. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') + +Returning a ``Series`` allows one to control the exact return structure and column names: + +.. ipython:: python + + df.apply(lambda x: pd.Series([1, 2, 3], index=['D', 'E', 'F']), axis=1) + +.. _whatsnew_0230.api_breaking.concat: + +Concatenation will no longer sort +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned. +The current behavior is the same as the previous (sorting), but now a warning is issued when ``sort`` is not specified and the non-concatenation axis is not aligned (:issue:`4588`). + +.. ipython:: python + :okwarning: + + df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a']) + df2 = pd.DataFrame({"a": [4, 5]}) + + pd.concat([df1, df2]) + +To keep the previous behavior (sorting) and silence the warning, pass ``sort=True`` + +.. ipython:: python + + pd.concat([df1, df2], sort=True) + +To accept the future behavior (no sorting), pass ``sort=False`` + +.. ipython + + pd.concat([df1, df2], sort=False) + +Note that this change also applies to :meth:`DataFrame.append`, which has also received a ``sort`` keyword for controlling this behavior. + + +.. _whatsnew_0230.api_breaking.build_changes: + +Build changes +^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) +- Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) +- Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) + +.. _whatsnew_0230.api_breaking.index_division_by_zero: + +Index division by zero fills correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and ``0 / 0`` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) + +Previous behavior: + +.. code-block:: ipython + + In [6]: index = pd.Int64Index([-1, 0, 1]) + + In [7]: index / 0 + Out[7]: Int64Index([0, 0, 0], dtype='int64') + + # Previous behavior yielded different results depending on the type of zero in the divisor + In [8]: index / 0.0 + Out[8]: Float64Index([-inf, nan, inf], dtype='float64') + + In [9]: index = pd.UInt64Index([0, 1]) + + In [10]: index / np.array([0, 0], dtype=np.uint64) + Out[10]: UInt64Index([0, 0], dtype='uint64') + + In [11]: pd.RangeIndex(1, 5) / 0 + ZeroDivisionError: integer division or modulo by zero + +Current behavior: + +.. code-block:: ipython + + In [12]: index = pd.Int64Index([-1, 0, 1]) + # division by zero gives -infinity where negative, + # +infinity where positive, and NaN for 0 / 0 + In [13]: index / 0 + + # The result of division by zero should not depend on + # whether the zero is int or float + In [14]: index / 0.0 + + In [15]: index = pd.UInt64Index([0, 1]) + In [16]: index / np.array([0, 0], dtype=np.uint64) + + In [17]: pd.RangeIndex(1, 5) / 0 + +.. _whatsnew_0230.api_breaking.extract: + +Extraction of matching patterns from strings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, extracting matching patterns from strings with :func:`str.extract` used to return a +``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was +extracted). As of pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless +``expand`` is set to ``False``. Finally, ``None`` was an accepted value for +the ``expand`` parameter (which was equivalent to ``False``), but now raises a ``ValueError``. (:issue:`11386`) + +Previous behavior: + +.. code-block:: ipython + + In [1]: s = pd.Series(['number 10', '12 eggs']) + + In [2]: extracted = s.str.extract(r'.*(\d\d).*') + + In [3]: extracted + Out [3]: + 0 10 + 1 12 + dtype: object + + In [4]: type(extracted) + Out [4]: + pandas.core.series.Series + +New behavior: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract(r'.*(\d\d).*') + extracted + type(extracted) + +To restore previous behavior, simply set ``expand`` to ``False``: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract(r'.*(\d\d).*', expand=False) + extracted + type(extracted) + +.. _whatsnew_0230.api_breaking.cdt_ordered: + +Default value for the ``ordered`` parameter of ``CategoricalDtype`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default value of the ``ordered`` parameter for :class:`~pandas.api.types.CategoricalDtype` has changed from ``False`` to ``None`` to allow updating of ``categories`` without impacting ``ordered``. Behavior should remain consistent for downstream objects, such as :class:`Categorical` (:issue:`18790`) + +In previous versions, the default value for the ``ordered`` parameter was ``False``. This could potentially lead to the ``ordered`` parameter unintentionally being changed from ``True`` to ``False`` when users attempt to update ``categories`` if ``ordered`` is not explicitly specified, as it would silently default to ``False``. The new behavior for ``ordered=None`` is to retain the existing value of ``ordered``. + +New behavior: + +.. code-block:: ipython + + In [2]: from pandas.api.types import CategoricalDtype + + In [3]: cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) + + In [4]: cat + Out[4]: + [a, b, c, a, b, a] + Categories (3, object): [c < b < a] + + In [5]: cdt = CategoricalDtype(categories=list('cbad')) + + In [6]: cat.astype(cdt) + Out[6]: + [a, b, c, a, b, a] + Categories (4, object): [c < b < a < d] + +Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. + +Note that the unintentional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. + +.. _whatsnew_0230.api_breaking.pretty_printing: + +Better pretty-printing of DataFrames in a terminal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the default value for the maximum number of columns was +``pd.options.display.max_columns=20``. This meant that relatively wide data +frames would not fit within the terminal width, and pandas would introduce line +breaks to display these 20 columns. This resulted in an output that was +relatively difficult to read: + +.. image:: ../_static/print_df_old.png + +If Python runs in a terminal, the maximum number of columns is now determined +automatically so that the printed data frame fits within the current terminal +width (``pd.options.display.max_columns=0``) (:issue:`17023`). If Python runs +as a Jupyter kernel (such as the Jupyter QtConsole or a Jupyter notebook, as +well as in many IDEs), this value cannot be inferred automatically and is thus +set to ``20`` as in previous versions. In a terminal, this results in a much +nicer output: + +.. image:: ../_static/print_df_new.png + +Note that if you don't like the new default, you can always set this option +yourself. To revert to the old setting, you can run this line: + +.. code-block:: python + + pd.options.display.max_columns = 20 + +.. _whatsnew_0230.api.datetimelike: + +Datetimelike API changes +^^^^^^^^^^^^^^^^^^^^^^^^ + +- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) +- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) +- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) +- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError`` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) +- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) +- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) +- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mismatched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) +- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) +- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) +- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) +- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) +- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) +- :attr:`Series.first` and :attr:`DataFrame.first` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). +- :attr:`Series.last` and :attr:`DataFrame.last` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). +- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) +- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) +- :class:`Timestamp` constructor now accepts a ``nanosecond`` keyword or positional argument (:issue:`18898`) +- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) +- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) + +.. _whatsnew_0230.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`) +- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`) +- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) +- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). +- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) +- Both construction and renaming of ``Index``/``MultiIndex`` with non-hashable ``name``/``names`` will now raise ``TypeError`` (:issue:`20527`) +- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). +- :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) +- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) +- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) +- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) +- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) +- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) +- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) +- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) +- Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) +- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`) +- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) +- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) +- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`) +- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) +- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) +- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) +- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) +- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) +- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) +- :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) +- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) +- ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) +- :func:`Series.str.replace` now takes an optional ``regex`` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) +- :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) +- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). +- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) +- A user-defined-function that is passed to :func:`Series.rolling().aggregate() <.Rolling.aggregate>`, :func:`DataFrame.rolling().aggregate() <.Rolling.aggregate>`, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) +- Rolling and Expanding types raise ``NotImplementedError`` upon iteration (:issue:`11704`). + +.. _whatsnew_0230.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`). +- ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`). +- ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`) +- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated. + In the future, a tuple passed to ``'by'`` will always refer to a single key + that is the actual tuple, instead of treating the tuple as multiple keys. To + retain the previous behavior, use a list instead of a tuple (:issue:`18314`) +- ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`). +- :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) +- :meth:`ExcelFile.parse` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with :func:`read_excel` (:issue:`20920`). +- The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). +- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) +- ``DataFrame.from_items`` is deprecated. Use :func:`DataFrame.from_dict` instead, or ``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`, :issue:`17312`) +- Indexing a :class:`MultiIndex` or a :class:`FloatIndex` with a list containing some missing keys will now show a :class:`FutureWarning`, which is consistent with other types of indexes (:issue:`17758`). + +- The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) +- The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) +- The ``order`` parameter of :func:`factorize` is deprecated and will be removed in a future release (:issue:`19727`) +- :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` are deprecated in favor of :meth:`Timestamp.day_name`, :meth:`DatetimeIndex.day_name`, and :meth:`Series.dt.day_name` (:issue:`12806`) + +- ``pandas.tseries.plotting.tsplot`` is deprecated. Use :func:`Series.plot` instead (:issue:`18627`) +- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`) +- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`) +- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`). +- :func:`Series.rolling().apply() <.Rolling.apply>`, :func:`DataFrame.rolling().apply() <.Rolling.apply>`, :func:`Series.expanding().apply() <.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <.Expanding.apply>` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`) +- The ``data``, ``base``, ``strides``, ``flags`` and ``itemsize`` properties + of the ``Series`` and ``Index`` classes have been deprecated and will be + removed in a future version (:issue:`20419`). +- ``DatetimeIndex.offset`` is deprecated. Use ``DatetimeIndex.freq`` instead (:issue:`20716`) +- Floor division between an integer ndarray and a :class:`Timedelta` is deprecated. Divide by :attr:`Timedelta.value` instead (:issue:`19761`) +- Setting ``PeriodIndex.freq`` (which was not guaranteed to work correctly) is deprecated. Use :meth:`PeriodIndex.asfreq` instead (:issue:`20678`) +- ``Index.get_duplicates()`` is deprecated and will be removed in a future version (:issue:`20239`) +- The previous default behavior of negative indices in ``Categorical.take`` is deprecated. In a future version it will change from meaning missing values to meaning positional indices from the right. The future behavior is consistent with :meth:`Series.take` (:issue:`20664`). +- Passing multiple axes to the ``axis`` parameter in :func:`DataFrame.dropna` has been deprecated and will be removed in a future version (:issue:`20987`) + + +.. _whatsnew_0230.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`) +- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`). +- ``pd.tseries.util.pivot_annual`` has been removed (deprecated since v0.19). Use ``pivot_table`` instead (:issue:`18370`) +- ``pd.tseries.util.isleapyear`` has been removed (deprecated since v0.19). Use ``.is_leap_year`` property in Datetime-likes instead (:issue:`18370`) +- ``pd.ordered_merge`` has been removed (deprecated since v0.19). Use ``pd.merge_ordered`` instead (:issue:`18459`) +- The ``SparseList`` class has been removed (:issue:`14007`) +- The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`) +- ``Categorical.from_array`` has been removed (:issue:`13854`) +- The ``freq`` and ``how`` parameters have been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame + and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:`18601` & :issue:`18668`) +- ``DatetimeIndex.to_datetime``, ``Timestamp.to_datetime``, ``PeriodIndex.to_datetime``, and ``Index.to_datetime`` have been removed (:issue:`8254`, :issue:`14096`, :issue:`14113`) +- :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`) +- :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`) +- :func:`read_csv` has dropped the ``buffer_lines`` parameter (:issue:`13360`) +- :func:`read_csv` has dropped the ``compact_ints`` and ``use_unsigned`` parameters (:issue:`13323`) +- The ``Timestamp`` class has dropped the ``offset`` attribute in favor of ``freq`` (:issue:`13593`) +- The ``Series``, ``Categorical``, and ``Index`` classes have dropped the ``reshape`` method (:issue:`13012`) +- ``pandas.tseries.frequencies.get_standard_freq`` has been removed in favor of ``pandas.tseries.frequencies.to_offset(freq).rule_code`` (:issue:`13874`) +- The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) +- The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) +- The ``Panel`` class has dropped the ``to_long`` and ``toLong`` methods (:issue:`19077`) +- The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) +- The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attr:`Categorical.codes` (:issue:`7768`) +- The ``flavor`` parameter have been removed from :func:`to_sql` method (:issue:`13611`) +- The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) +- The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). + Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) +- Imports from ``pandas.core.common`` for functions such as ``is_datetime64_dtype`` are now removed. These are located in ``pandas.api.types``. (:issue:`13634`, :issue:`19769`) +- The ``infer_dst`` keyword in :meth:`Series.tz_localize`, :meth:`DatetimeIndex.tz_localize` + and :class:`DatetimeIndex` have been removed. ``infer_dst=True`` is equivalent to + ``ambiguous='infer'``, and ``infer_dst=False`` to ``ambiguous='raise'`` (:issue:`7963`). +- When ``.resample()`` was changed from an eager to a lazy operation, like ``.groupby()`` in v0.18.0, we put in place compatibility (with a ``FutureWarning``), + so operations would continue to work. This is now fully removed, so a ``Resampler`` will no longer forward compat operations (:issue:`20554`) +- Remove long deprecated ``axis=None`` parameter from ``.replace()`` (:issue:`20271`) + +.. _whatsnew_0230.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) +- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) +- :class:`DateOffset` arithmetic performance is improved (:issue:`18218`) +- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) +- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`) +- The overridden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) +- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) +- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) +- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`) +- Improved performance of :func:`IntervalIndex.symmetric_difference` (:issue:`18475`) +- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) +- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) +- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) +- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) +- Improved performance of :func:`MultiIndex.remove_unused_levels` when there are no unused levels, at the cost of a reduction in performance when there are (:issue:`19289`) +- Improved performance of :func:`Index.get_loc` for non-unique indexes (:issue:`19478`) +- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) +- Improved performance of :func:`.GroupBy.rank` (:issue:`15779`) +- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) +- Improved performance of :func:`.GroupBy.ffill` and :func:`.GroupBy.bfill` (:issue:`11296`) +- Improved performance of :func:`.GroupBy.any` and :func:`.GroupBy.all` (:issue:`15435`) +- Improved performance of :func:`.GroupBy.pct_change` (:issue:`19165`) +- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) +- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) +- Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) +- Improved performance of :func:`.Categorical.from_codes` (:issue:`18501`) + +.. _whatsnew_0230.docs: + +Documentation changes +~~~~~~~~~~~~~~~~~~~~~ + +Thanks to all of the contributors who participated in the pandas Documentation +Sprint, which took place on March 10th. We had about 500 participants from over +30 locations across the world. You should notice that many of the +:ref:`API docstrings ` have greatly improved. + +There were too many simultaneous contributions to include a release note for each +improvement, but this `GitHub search`_ should give you an idea of how many docstrings +were improved. + +Special thanks to `Marc Garcia`_ for organizing the sprint. For more information, +read the `NumFOCUS blogpost`_ recapping the sprint. + +.. _GitHub search: https://github.com/pandas-dev/pandas/pulls?utf8=%E2%9C%93&q=is%3Apr+label%3ADocs+created%3A2018-03-10..2018-03-15+ +.. _NumFOCUS blogpost: https://www.numfocus.org/blog/worldwide-pandas-sprint/ +.. _Marc Garcia: https://github.com/datapythonista + +- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`) +- Consistency when introducing code samples, using either colon or period. + Rewrote some sentences for greater clarity, added more dynamic references + to functions, methods and classes. + (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) +- Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) + +.. _whatsnew_0230.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +.. warning:: + + A class of bugs were introduced in pandas 0.21 with ``CategoricalDtype`` that + affects the correctness of operations like ``merge``, ``concat``, and + indexing when comparing multiple unordered ``Categorical`` arrays that have + the same categories, but in a different order. We highly recommend upgrading + or manually aligning your categories before doing these operations. + +- Bug in ``Categorical.equals`` returning the wrong result when comparing two + unordered ``Categorical`` arrays with the same categories, but in a different + order (:issue:`16603`) +- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result + when for unordered categoricals with the categories in a different order. + This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). +- Bug in :func:`pandas.merge` returning the wrong result when joining on an + unordered ``Categorical`` that had the same categories but in a different + order (:issue:`19551`) +- Bug in :meth:`CategoricalIndex.get_indexer` returning the wrong result when + ``target`` was an unordered ``Categorical`` that had the same categories as + ``self`` but in a different order (:issue:`19551`) +- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) +- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in :meth:`Series.str.split` with ``expand=True`` incorrectly raising an IndexError on empty strings (:issue:`20002`). +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19032`) +- Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19565`) +- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) +- Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`) +- Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`) +- Bug in :meth:`Series.take` with categorical data interpreting ``-1`` in ``indices`` as missing value markers, rather than the last element of the Series (:issue:`20664`) + +Datetimelike +^^^^^^^^^^^^ + +- Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) +- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) +- Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) +- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) +- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`) +- Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) +- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) +- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) +- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) +- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) +- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) +- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) +- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) +- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) +- Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where setting the ``freq`` attribute was not fully supported (:issue:`20678`) + +Timedelta +^^^^^^^^^ + +- Bug in :func:`Timedelta.__mul__` where multiplying by ``NaT`` returned ``NaT`` instead of raising a ``TypeError`` (:issue:`19819`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) +- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) +- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) +- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) +- Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) +- Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) +- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`, :issue:`19834`) +- Bug in :func:`Timedelta.total_seconds` causing precision errors, for example ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) +- Bug in :func:`Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`) +- Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mismatch (:issue:`19333`) +- Bug in indexing a :class:`TimedeltaIndex` with a ``np.timedelta64`` object which was raising a ``TypeError`` (:issue:`20393`) + + +Timezones +^^^^^^^^^ + +- Bug in creating a ``Series`` from an array that contains both tz-naive and tz-aware values will result in a ``Series`` whose dtype is tz-aware instead of object (:issue:`16406`) +- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`) +- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`) +- Bug in comparing :class:`DatetimeIndex`, which failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) +- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`) +- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) +- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) +- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) +- Bug in :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) +- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) +- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`) +- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`) +- Bug in :func:`melt` that converted tz-aware dtypes to tz-naive (:issue:`15785`) +- Bug in :func:`Dataframe.count` that raised an ``ValueError``, if :func:`Dataframe.dropna` was called for a single column with timezone-aware values. (:issue:`13407`) + +Offsets +^^^^^^^ + +- Bug in :class:`WeekOfMonth` and :class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`, :issue:`18672`, :issue:`18864`) +- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) +- Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) +- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) + +Numeric +^^^^^^^ +- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) +- Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) +- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) +- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) +- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) +- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) +- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`) + +Strings +^^^^^^^ +- Bug in :func:`Series.str.get` with a dictionary in the values and the index not in the keys, raising ``KeyError`` (:issue:`20671`) + + +Indexing +^^^^^^^^ + +- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) +- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) +- Bug in :func:`DataFrame.drop`, :meth:`Panel.drop`, :meth:`Series.drop`, :meth:`Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) +- Bug in indexing a datetimelike ``Index`` that raised ``ValueError`` instead of ``IndexError`` (:issue:`18386`). +- :func:`Index.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) +- :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) +- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) +- Bug in indexing with iterator containing only missing keys, which raised no error (:issue:`20748`) +- Fixed inconsistency in ``.ix`` between list and scalar keys when the index has integer dtype and does not include the desired keys (:issue:`20753`) +- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) +- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) +- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) +- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) +- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) +- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (:issue:`19726`) +- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) +- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). +- Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) +- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) +- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) +- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) +- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`) +- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`) +- Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`) + +MultiIndex +^^^^^^^^^^ + +- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) +- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) +- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) +- Bug in :func:`MultiIndex.unique` when called on empty :class:`MultiIndex` (:issue:`20568`) +- Bug in :func:`MultiIndex.unique` which would not preserve level names (:issue:`20570`) +- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) +- Bug in :func:`MultiIndex.from_tuples` which would fail to take zipped tuples in python3 (:issue:`18434`) +- Bug in :func:`MultiIndex.get_loc` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) +- Bug in :func:`MultiIndex.get_loc` which would cast boolean to integer labels (:issue:`19086`) +- Bug in :func:`MultiIndex.get_loc` which would fail to locate keys containing ``NaN`` (:issue:`18485`) +- Bug in :func:`MultiIndex.get_loc` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) +- Bug in indexing where nested indexers having only numpy arrays are handled incorrectly (:issue:`19686`) + + +IO +^^ + +- :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) +- :meth:`DataFrame.to_html` now has an option to add an id to the leading ``
`` tag (:issue:`8496`) +- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) +- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) +- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) +- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`) +- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) +- Bug in :func:`DataFrame.to_latex` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) +- Bug in :func:`DataFrame.to_latex` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) +- Bug in :func:`DataFrame.to_latex` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`) +- Bug in :func:`DataFrame.to_latex` where the combination of an index name and the ``index_names=False`` option would result in incorrect output (:issue:`18326`) +- Bug in :func:`DataFrame.to_latex` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`) +- Bug in :func:`DataFrame.to_latex` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`) +- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) +- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) +- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) +- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`) +- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) +- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`) +- Bug in :meth:`pandas.io.json.json_normalize` where sub-records are not properly normalized if any sub-records values are NoneType (:issue:`20030`) +- Bug in ``usecols`` parameter in :func:`read_csv` where error is not raised correctly when passing a string. (:issue:`20529`) +- Bug in :func:`HDFStore.keys` when reading a file with a soft link causes exception (:issue:`20523`) +- Bug in :func:`HDFStore.select_column` where a key which is not a valid store raised an ``AttributeError`` instead of a ``KeyError`` (:issue:`17912`) + +Plotting +^^^^^^^^ + +- Better error message when attempting to plot but matplotlib is not installed (:issue:`19810`). +- :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) +- Bug in :func:`DataFrame.plot` when ``x`` and ``y`` arguments given as positions caused incorrect referenced columns for line, bar and area plots (:issue:`20056`) +- Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). +- :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). +- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) + + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) +- Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) +- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) +- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) +- Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) +- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) +- Bug in :func:`DataFrame.groupby` passing the ``on=`` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) +- Bug in :func:`DataFrame.resample().aggregate <.Resampler.aggregate>` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) +- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`) +- Bug in :func:`DataFrame.resample` that dropped timezone information (:issue:`13238`) +- Bug in :func:`DataFrame.groupby` where transformations using ``np.all`` and ``np.any`` were raising a ``ValueError`` (:issue:`20653`) +- Bug in :func:`DataFrame.resample` where ``ffill``, ``bfill``, ``pad``, ``backfill``, ``fillna``, ``interpolate``, and ``asfreq`` were ignoring ``loffset``. (:issue:`20744`) +- Bug in :func:`DataFrame.groupby` when applying a function that has mixed data types and the user supplied function can fail on the grouping column (:issue:`20949`) +- Bug in :func:`DataFrameGroupBy.rolling().apply() <.Rolling.apply>` where operations performed against the associated :class:`DataFrameGroupBy` object could impact the inclusion of the grouped item(s) in the result (:issue:`14013`) + +Sparse +^^^^^^ + +- Bug in which creating a :class:`SparseDataFrame` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) +- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) +- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) +- Bug in constructing a :class:`SparseArray`: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) + +Reshaping +^^^^^^^^^ + +- Bug in :func:`DataFrame.merge` where referencing a ``CategoricalIndex`` by name, where the ``by`` kwarg would ``KeyError`` (:issue:`20777`) +- Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) +- Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`) +- Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`) +- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) +- Fixed construction of a :class:`DataFrame` from a ``dict`` containing ``NaN`` as key (:issue:`18455`) +- Disabled construction of a :class:`Series` where len(index) > len(data) = 1, which previously would broadcast the data item, and now raises a ``ValueError`` (:issue:`18819`) +- Suppressed error in the construction of a :class:`DataFrame` from a ``dict`` containing scalar values when the corresponding keys are not included in the passed index (:issue:`18600`) + +- Fixed (changed from ``object`` to ``float64``) dtype of :class:`DataFrame` initialized with axes, no data, and ``dtype=int`` (:issue:`19646`) +- Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) +- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) +- Bug in :func:`DataFrame.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) +- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) +- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) +- Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) +- Bug in :func:`concat` when concatenating sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) +- Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) +- Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) +- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) +- Bug in :func:`~DataFrame.rename` where an Index of same-length tuples was converted to a MultiIndex (:issue:`19497`) +- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) +- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) +- Bug in :class:`Series` constructor with ``Categorical`` where a ``ValueError`` is not raised when an index of different length is given (:issue:`19342`) +- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) +- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`) +- Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`) +- Bug in :func:`get_dummies`, and :func:`select_dtypes`, where duplicate column names caused incorrect behavior (:issue:`20848`) +- Bug in :func:`isna`, which cannot handle ambiguous typed lists (:issue:`20675`) +- Bug in :func:`concat` which raises an error when concatenating TZ-aware dataframes and all-NaT dataframes (:issue:`12396`) +- Bug in :func:`concat` which raises an error when concatenating empty TZ-aware series (:issue:`18447`) + +Other +^^^^^ + +- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existent option key in some cases (:issue:`19789`) +- Bug in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) + +.. _whatsnew_0.23.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.22.0..v0.23.0 From dbb7e4600c828cd2281bea3c80871907fa93119b Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 08:56:16 -0400 Subject: [PATCH 18/26] Cleanup v0.23.0.rst --- doc/source/whatsnew/v0.23.0.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 7f7609edc27b6..e942046b945fe 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -54,7 +54,7 @@ A ``DataFrame`` can now be written to and subsequently read back via JSON while In [1]: df = pd.DataFrame({'foo': [1, 2, 3, 4], ...: 'bar': ['a', 'b', 'c', 'd'], - ...: 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + ...: 'baz': pd.date_range('2018-01-01', freq='D', periods=4), ...: 'qux': pd.Categorical(['a', 'b', 'c', 'c'])}, ...: index=pd.Index(range(4), name='idx')) @@ -516,7 +516,6 @@ The method has now gained a keyword ``join`` to control the manner of alignment, In v.0.23 ``join`` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. .. ipython:: python - :okwarning: s = pd.Series(['a', 'b', 'c', 'd']) t = pd.Series(['b', 'd', 'e', 'c'], index=[1, 3, 4, 2]) @@ -856,7 +855,6 @@ In a future version of pandas :func:`pandas.concat` will no longer sort the non- The current behavior is the same as the previous (sorting), but now a warning is issued when ``sort`` is not specified and the non-concatenation axis is not aligned (:issue:`4588`). .. ipython:: python - :okwarning: df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a']) df2 = pd.DataFrame({"a": [4, 5]}) @@ -871,7 +869,7 @@ To keep the previous behavior (sorting) and silence the warning, pass ``sort=Tru To accept the future behavior (no sorting), pass ``sort=False`` -.. ipython +.. ipython:: python pd.concat([df1, df2], sort=False) From f45b45a8e07e10649cbf8aae8b321af8a57bc8cc Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 09:09:13 -0400 Subject: [PATCH 19/26] Debug --- .github/workflows/code-checks.yml | 376 +++++----- .github/workflows/unit-tests.yml | 846 ++++++++++----------- doc/source/whatsnew/v0.23.0.rst | 1142 ----------------------------- 3 files changed, 611 insertions(+), 1753 deletions(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 728019b06e053..f5e77b75c97df 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -1,188 +1,188 @@ -name: Code Checks - -on: - push: - branches: - - main - - 2.3.x - pull_request: - branches: - - main - - 2.3.x - -env: - ENV_FILE: environment.yml - PANDAS_CI: 1 - -permissions: - contents: read - -# pre-commit run by https://pre-commit.ci/ -jobs: - docstring_typing_manual_hooks: - name: Docstring validation, typing, and other manual pre-commit hooks - runs-on: ubuntu-24.04 - defaults: - run: - shell: bash -el {0} - - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-code-checks - cancel-in-progress: true - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Conda - uses: ./.github/actions/setup-conda - - - name: Build Pandas - id: build - uses: ./.github/actions/build_pandas - with: - editable: false - - # The following checks are independent of each other and should still be run if one fails - - # TODO: The doctests have to be run first right now, since the Cython doctests only work - # with pandas installed in non-editable mode - # This can be removed once pytest-cython doesn't require C extensions to be installed inplace - - - name: Extra installs - # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd - run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 - - - name: Run doctests - run: cd ci && ./code_checks.sh doctests - if: ${{ steps.build.outcome == 'success' && always() }} - - - name: Install pandas in editable mode - id: build-editable - if: ${{ steps.build.outcome == 'success' && always() }} - uses: ./.github/actions/build_pandas - with: - editable: true - - - name: Check for no warnings when building single-page docs - run: ci/code_checks.sh single-docs - if: ${{ steps.build.outcome == 'success' && always() }} - - - name: Run checks on imported code - run: ci/code_checks.sh code - if: ${{ steps.build.outcome == 'success' && always() }} - - - name: Run docstring validation - run: ci/code_checks.sh docstrings - if: ${{ steps.build.outcome == 'success' && always() }} - - - name: Run check of documentation notebooks - run: ci/code_checks.sh notebooks - if: ${{ steps.build.outcome == 'success' && always() }} - - - name: Use existing environment for type checking - run: | - echo $PATH >> $GITHUB_PATH - echo "PYTHONHOME=$PYTHONHOME" >> $GITHUB_ENV - echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV - if: ${{ steps.build.outcome == 'success' && always() }} - - - name: Typing - uses: pre-commit/action@v3.0.1 - with: - extra_args: --verbose --hook-stage manual --all-files - if: ${{ steps.build.outcome == 'success' && always() }} - - - name: Run docstring validation script tests - run: pytest scripts - if: ${{ steps.build.outcome == 'success' && always() }} - - asv-benchmarks: - name: ASV Benchmarks - runs-on: ubuntu-24.04 - defaults: - run: - shell: bash -el {0} - - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-asv-benchmarks - cancel-in-progress: true - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Conda - uses: ./.github/actions/setup-conda - - - name: Build Pandas - id: build - uses: ./.github/actions/build_pandas - - - name: Run ASV benchmarks - run: | - cd asv_bench - asv machine --yes - asv run --quick --dry-run --durations=30 --python=same --show-stderr - - build_docker_dev_environment: - name: Build Docker Dev Environment - runs-on: ubuntu-24.04 - defaults: - run: - shell: bash -el {0} - - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-build_docker_dev_environment - cancel-in-progress: true - - steps: - - name: Clean up dangling images - run: docker image prune -f - - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Build image - run: docker build --pull --no-cache --tag pandas-dev-env . - - - name: Show environment - run: docker run --rm pandas-dev-env python -c "import pandas as pd; print(pd.show_versions())" - - requirements-dev-text-installable: - name: Test install requirements-dev.txt - runs-on: ubuntu-24.04 - - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-requirements-dev-text-installable - cancel-in-progress: true - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Setup Python - id: setup_python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'pip' - cache-dependency-path: 'requirements-dev.txt' - - - name: Install requirements-dev.txt - run: pip install -r requirements-dev.txt - - - name: Check Pip Cache Hit - run: echo ${{ steps.setup_python.outputs.cache-hit }} +#name: Code Checks +# +#on: +# push: +# branches: +# - main +# - 2.3.x +# pull_request: +# branches: +# - main +# - 2.3.x +# +#env: +# ENV_FILE: environment.yml +# PANDAS_CI: 1 +# +#permissions: +# contents: read +# +## pre-commit run by https://pre-commit.ci/ +#jobs: +# docstring_typing_manual_hooks: +# name: Docstring validation, typing, and other manual pre-commit hooks +# runs-on: ubuntu-24.04 +# defaults: +# run: +# shell: bash -el {0} +# +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-code-checks +# cancel-in-progress: true +# +# steps: +# - name: Checkout +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Set up Conda +# uses: ./.github/actions/setup-conda +# +# - name: Build Pandas +# id: build +# uses: ./.github/actions/build_pandas +# with: +# editable: false +# +# # The following checks are independent of each other and should still be run if one fails +# +# # TODO: The doctests have to be run first right now, since the Cython doctests only work +# # with pandas installed in non-editable mode +# # This can be removed once pytest-cython doesn't require C extensions to be installed inplace +# +# - name: Extra installs +# # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd +# run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 +# +# - name: Run doctests +# run: cd ci && ./code_checks.sh doctests +# if: ${{ steps.build.outcome == 'success' && always() }} +# +# - name: Install pandas in editable mode +# id: build-editable +# if: ${{ steps.build.outcome == 'success' && always() }} +# uses: ./.github/actions/build_pandas +# with: +# editable: true +# +# - name: Check for no warnings when building single-page docs +# run: ci/code_checks.sh single-docs +# if: ${{ steps.build.outcome == 'success' && always() }} +# +# - name: Run checks on imported code +# run: ci/code_checks.sh code +# if: ${{ steps.build.outcome == 'success' && always() }} +# +# - name: Run docstring validation +# run: ci/code_checks.sh docstrings +# if: ${{ steps.build.outcome == 'success' && always() }} +# +# - name: Run check of documentation notebooks +# run: ci/code_checks.sh notebooks +# if: ${{ steps.build.outcome == 'success' && always() }} +# +# - name: Use existing environment for type checking +# run: | +# echo $PATH >> $GITHUB_PATH +# echo "PYTHONHOME=$PYTHONHOME" >> $GITHUB_ENV +# echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV +# if: ${{ steps.build.outcome == 'success' && always() }} +# +# - name: Typing +# uses: pre-commit/action@v3.0.1 +# with: +# extra_args: --verbose --hook-stage manual --all-files +# if: ${{ steps.build.outcome == 'success' && always() }} +# +# - name: Run docstring validation script tests +# run: pytest scripts +# if: ${{ steps.build.outcome == 'success' && always() }} +# +# asv-benchmarks: +# name: ASV Benchmarks +# runs-on: ubuntu-24.04 +# defaults: +# run: +# shell: bash -el {0} +# +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-asv-benchmarks +# cancel-in-progress: true +# +# steps: +# - name: Checkout +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Set up Conda +# uses: ./.github/actions/setup-conda +# +# - name: Build Pandas +# id: build +# uses: ./.github/actions/build_pandas +# +# - name: Run ASV benchmarks +# run: | +# cd asv_bench +# asv machine --yes +# asv run --quick --dry-run --durations=30 --python=same --show-stderr +# +# build_docker_dev_environment: +# name: Build Docker Dev Environment +# runs-on: ubuntu-24.04 +# defaults: +# run: +# shell: bash -el {0} +# +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-build_docker_dev_environment +# cancel-in-progress: true +# +# steps: +# - name: Clean up dangling images +# run: docker image prune -f +# +# - name: Checkout +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Build image +# run: docker build --pull --no-cache --tag pandas-dev-env . +# +# - name: Show environment +# run: docker run --rm pandas-dev-env python -c "import pandas as pd; print(pd.show_versions())" +# +# requirements-dev-text-installable: +# name: Test install requirements-dev.txt +# runs-on: ubuntu-24.04 +# +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-requirements-dev-text-installable +# cancel-in-progress: true +# +# steps: +# - name: Checkout +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Setup Python +# id: setup_python +# uses: actions/setup-python@v5 +# with: +# python-version: '3.10' +# cache: 'pip' +# cache-dependency-path: 'requirements-dev.txt' +# +# - name: Install requirements-dev.txt +# run: pip install -r requirements-dev.txt +# +# - name: Check Pip Cache Hit +# run: echo ${{ steps.setup_python.outputs.cache-hit }} diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 412f27cba9c4f..d951287ab808b 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -1,423 +1,423 @@ -name: Unit Tests - -on: - push: - branches: - - main - - 2.3.x - pull_request: - branches: - - main - - 2.3.x - paths-ignore: - - "doc/**" - - "web/**" - -permissions: - contents: read - -defaults: - run: - shell: bash -el {0} - -jobs: - ubuntu: - runs-on: ${{ matrix.platform }} - timeout-minutes: 90 - strategy: - matrix: - platform: [ubuntu-24.04, ubuntu-24.04-arm] - env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] - # Prevent the include jobs from overriding other jobs - pattern: [""] - pandas_future_infer_string: ["1"] - include: - - name: "Downstream Compat" - env_file: actions-311-downstream_compat.yaml - pattern: "not slow and not network and not single_cpu" - pytest_target: "pandas/tests/test_downstream.py" - platform: ubuntu-24.04 - - name: "Minimum Versions" - env_file: actions-310-minimum_versions.yaml - pattern: "not slow and not network and not single_cpu" - platform: ubuntu-24.04 - - name: "Freethreading" - env_file: actions-313-freethreading.yaml - pattern: "not slow and not network and not single_cpu" - platform: ubuntu-24.04 - - name: "Without PyArrow" - env_file: actions-312.yaml - pattern: "not slow and not network and not single_cpu" - platform: ubuntu-24.04 - - name: "Locale: it_IT" - env_file: actions-311.yaml - pattern: "not slow and not network and not single_cpu" - extra_apt: "language-pack-it" - # Use the utf8 version as the default, it has no bad side-effect. - lang: "it_IT.utf8" - lc_all: "it_IT.utf8" - # Also install it_IT (its encoding is ISO8859-1) but do not activate it. - # It will be temporarily activated during tests with locale.setlocale - extra_loc: "it_IT" - platform: ubuntu-24.04 - - name: "Locale: zh_CN" - env_file: actions-311.yaml - pattern: "not slow and not network and not single_cpu" - extra_apt: "language-pack-zh-hans" - # Use the utf8 version as the default, it has no bad side-effect. - lang: "zh_CN.utf8" - lc_all: "zh_CN.utf8" - # Also install zh_CN (its encoding is gb2312) but do not activate it. - # It will be temporarily activated during tests with locale.setlocale - extra_loc: "zh_CN" - platform: ubuntu-24.04 - - name: "Past no infer strings" - env_file: actions-312.yaml - pandas_future_infer_string: "0" - platform: ubuntu-24.04 - - name: "Numpy Dev" - env_file: actions-311-numpydev.yaml - pattern: "not slow and not network and not single_cpu" - test_args: "-W error::DeprecationWarning -W error::FutureWarning" - platform: ubuntu-24.04 - - name: "Pyarrow Nightly" - env_file: actions-311-pyarrownightly.yaml - pattern: "not slow and not network and not single_cpu" - platform: ubuntu-24.04 - fail-fast: false - name: ${{ matrix.name || format('{0} {1}', matrix.platform, matrix.env_file) }} - env: - PATTERN: ${{ matrix.pattern }} - LANG: ${{ matrix.lang || 'C.UTF-8' }} - LC_ALL: ${{ matrix.lc_all || '' }} - PANDAS_CI: '1' - PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '1' }} - TEST_ARGS: ${{ matrix.test_args || '' }} - PYTEST_WORKERS: 'auto' - PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} - # Clipboard tests - QT_QPA_PLATFORM: offscreen - REMOVE_PYARROW: ${{ matrix.name == 'Without PyArrow' && '1' || '0' }} - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }} - cancel-in-progress: true - - services: - mysql: - image: mysql:9 - env: - MYSQL_ALLOW_EMPTY_PASSWORD: yes - MYSQL_DATABASE: pandas - options: >- - --health-cmd "mysqladmin ping" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 3306:3306 - - postgres: - image: postgres:17 - env: - PGUSER: postgres - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - POSTGRES_DB: pandas - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 5432:5432 - - moto: - image: motoserver/moto:5.0.27 - ports: - - 5000:5000 - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Extra installs - # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd - run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 ${{ matrix.extra_apt || ''}} - - - name: Generate extra locales - # These extra locales will be available for locale.setlocale() calls in tests - run: sudo locale-gen ${{ matrix.extra_loc }} - if: ${{ matrix.extra_loc }} - - - name: Set up Conda - uses: ./.github/actions/setup-conda - with: - environment-file: ci/deps/${{ matrix.env_file }} - - - name: Build Pandas - id: build - uses: ./.github/actions/build_pandas - with: - # xref https://github.com/cython/cython/issues/6870 - werror: ${{ matrix.name != 'Freethreading' }} - - - name: Test (not single_cpu) - uses: ./.github/actions/run-tests - env: - # Set pattern to not single_cpu if not already set - PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} - - - name: Test (single_cpu) - uses: ./.github/actions/run-tests - env: - PATTERN: 'single_cpu' - PYTEST_WORKERS: 0 - if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}} - - macos-windows: - timeout-minutes: 90 - strategy: - matrix: - # Note: Don't use macOS latest since macos 14 appears to be arm64 only - os: [macos-13, macos-14, windows-latest] - env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] - fail-fast: false - runs-on: ${{ matrix.os }} - name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }} - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.os }} - cancel-in-progress: true - env: - PANDAS_CI: 1 - PYTEST_TARGET: pandas - PATTERN: "not slow and not db and not network and not single_cpu" - PYTEST_WORKERS: 'auto' - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Conda - uses: ./.github/actions/setup-conda - with: - environment-file: ci/deps/${{ matrix.env_file }} - - - name: Build Pandas - uses: ./.github/actions/build_pandas - - - name: Test - uses: ./.github/actions/run-tests - - Linux-32-bit: - runs-on: ubuntu-24.04 - container: - image: quay.io/pypa/manylinux2014_i686 - options: --platform linux/386 - steps: - - name: Checkout pandas Repo - # actions/checkout does not work since it requires node - run: | - git config --global --add safe.directory $PWD - - if [ $GITHUB_EVENT_NAME != pull_request ]; then - git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE - git reset --hard $GITHUB_SHA - else - git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE - git fetch origin $GITHUB_REF:my_ref_name - git checkout $GITHUB_BASE_REF - git -c user.email="you@example.com" merge --no-commit my_ref_name - fi - - name: Build environment and Run Tests - # https://github.com/numpy/numpy/issues/24703#issuecomment-1722379388 - # Note: Pinned to Cython 3.0.10 to avoid numerical instability in 32-bit environments - # https://github.com/pandas-dev/pandas/pull/61423 - run: | - /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev - . ~/virtualenvs/pandas-dev/bin/activate - python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 - python -m pip install numpy -Csetup-args="-Dallow-noblas=true" - python -m pip install --no-cache-dir versioneer[toml] cython==3.0.10 python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 - python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" - python -m pip list --no-cache-dir - PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit - cancel-in-progress: true - - Linux-Musl: - runs-on: ubuntu-24.04 - container: - image: quay.io/pypa/musllinux_1_2_x86_64 - steps: - - name: Checkout pandas Repo - # actions/checkout does not work since it requires node - run: | - git config --global --add safe.directory $PWD - - if [ $GITHUB_EVENT_NAME != pull_request ]; then - git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE - git reset --hard $GITHUB_SHA - else - git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE - git fetch origin $GITHUB_REF:my_ref_name - git checkout $GITHUB_BASE_REF - git -c user.email="you@example.com" merge --no-commit my_ref_name - fi - - name: Configure System Packages - run: | - apk update - apk add musl-locales - - name: Build environment - run: | - /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev - . ~/virtualenvs/pandas-dev/bin/activate - python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 - python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 - python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" - python -m pip list --no-cache-dir - - - name: Run Tests - run: | - . ~/virtualenvs/pandas-dev/bin/activate - PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl - cancel-in-progress: true - - python-dev: - # This job may or may not run depending on the state of the next - # unreleased Python version. DO NOT DELETE IT. - # - # In general, this will remain frozen(present, but not running) until: - # - The next unreleased Python version has released beta 1 - # - This version should be available on GitHub Actions. - # - Our required build/runtime dependencies(numpy, Cython, python-dateutil) - # support that unreleased Python version. - # To unfreeze, comment out the ``if: false`` condition, and make sure you update - # the name of the workflow and Python version in actions/setup-python ``python-version:`` - # - # After it has been unfrozen, this file should remain unfrozen(present, and running) until: - # - The next Python version has been officially released. - # OR - # - Most/All of our optional dependencies support the next Python version AND - # - The next Python version has released a rc(we are guaranteed a stable ABI). - # To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs - # to the corresponding posix/windows-macos/sdist etc. workflows. - # Feel free to modify this comment as necessary. - if: false - defaults: - run: - shell: bash -eou pipefail {0} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - # Separate out macOS 13 and 14, since macOS 14 is arm64 only - os: [ubuntu-24.04, macOS-13, macOS-14, windows-latest] - - timeout-minutes: 90 - - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-python-dev - cancel-in-progress: true - - env: - PYTEST_WORKERS: "auto" - PANDAS_CI: 1 - PATTERN: "not slow and not network and not clipboard and not single_cpu" - PYTEST_TARGET: pandas - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Python Dev Version - uses: actions/setup-python@v5 - with: - python-version: '3.13-dev' - - - name: Build Environment - run: | - python --version - python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 - python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy - python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov - python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror" - python -m pip list - - - name: Run Tests - uses: ./.github/actions/run-tests - - # NOTE: this job must be kept in sync with the Pyodide build job in wheels.yml - emscripten: - # Note: the Python version, Emscripten toolchain version are determined - # by the Pyodide version. The appropriate versions can be found in the - # Pyodide repodata.json "info" field, or in the Makefile.envs file: - # https://github.com/pyodide/pyodide/blob/stable/Makefile.envs#L2 - # The Node.js version can be determined via Pyodide: - # https://pyodide.org/en/stable/usage/index.html#node-js - name: Pyodide build - runs-on: ubuntu-24.04 - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-wasm - cancel-in-progress: true - steps: - - name: Checkout pandas Repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Python for pyodide-build - id: setup-python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Set up Emscripten toolchain - uses: mymindstorm/setup-emsdk@v14 - with: - version: '3.1.58' - actions-cache-folder: emsdk-cache - - - name: Install pyodide-build - run: pip install "pyodide-build>=0.29.2" - - - name: Build pandas for Pyodide - run: | - pyodide build - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: '20' - - - name: Set up Pyodide virtual environment - env: - pyodide-version: '0.27.1' - run: | - pyodide xbuildenv install ${{ env.pyodide-version }} - pyodide venv .venv-pyodide - source .venv-pyodide/bin/activate - pip install dist/*.whl - - - name: Test pandas for Pyodide - env: - PANDAS_CI: 1 - run: | - source .venv-pyodide/bin/activate - pip install pytest hypothesis - # do not import pandas from the checked out repo - cd .. - python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])' +#name: Unit Tests +# +#on: +# push: +# branches: +# - main +# - 2.3.x +# pull_request: +# branches: +# - main +# - 2.3.x +# paths-ignore: +# - "doc/**" +# - "web/**" +# +#permissions: +# contents: read +# +#defaults: +# run: +# shell: bash -el {0} +# +#jobs: +# ubuntu: +# runs-on: ${{ matrix.platform }} +# timeout-minutes: 90 +# strategy: +# matrix: +# platform: [ubuntu-24.04, ubuntu-24.04-arm] +# env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] +# # Prevent the include jobs from overriding other jobs +# pattern: [""] +# pandas_future_infer_string: ["1"] +# include: +# - name: "Downstream Compat" +# env_file: actions-311-downstream_compat.yaml +# pattern: "not slow and not network and not single_cpu" +# pytest_target: "pandas/tests/test_downstream.py" +# platform: ubuntu-24.04 +# - name: "Minimum Versions" +# env_file: actions-310-minimum_versions.yaml +# pattern: "not slow and not network and not single_cpu" +# platform: ubuntu-24.04 +# - name: "Freethreading" +# env_file: actions-313-freethreading.yaml +# pattern: "not slow and not network and not single_cpu" +# platform: ubuntu-24.04 +# - name: "Without PyArrow" +# env_file: actions-312.yaml +# pattern: "not slow and not network and not single_cpu" +# platform: ubuntu-24.04 +# - name: "Locale: it_IT" +# env_file: actions-311.yaml +# pattern: "not slow and not network and not single_cpu" +# extra_apt: "language-pack-it" +# # Use the utf8 version as the default, it has no bad side-effect. +# lang: "it_IT.utf8" +# lc_all: "it_IT.utf8" +# # Also install it_IT (its encoding is ISO8859-1) but do not activate it. +# # It will be temporarily activated during tests with locale.setlocale +# extra_loc: "it_IT" +# platform: ubuntu-24.04 +# - name: "Locale: zh_CN" +# env_file: actions-311.yaml +# pattern: "not slow and not network and not single_cpu" +# extra_apt: "language-pack-zh-hans" +# # Use the utf8 version as the default, it has no bad side-effect. +# lang: "zh_CN.utf8" +# lc_all: "zh_CN.utf8" +# # Also install zh_CN (its encoding is gb2312) but do not activate it. +# # It will be temporarily activated during tests with locale.setlocale +# extra_loc: "zh_CN" +# platform: ubuntu-24.04 +# - name: "Past no infer strings" +# env_file: actions-312.yaml +# pandas_future_infer_string: "0" +# platform: ubuntu-24.04 +# - name: "Numpy Dev" +# env_file: actions-311-numpydev.yaml +# pattern: "not slow and not network and not single_cpu" +# test_args: "-W error::DeprecationWarning -W error::FutureWarning" +# platform: ubuntu-24.04 +# - name: "Pyarrow Nightly" +# env_file: actions-311-pyarrownightly.yaml +# pattern: "not slow and not network and not single_cpu" +# platform: ubuntu-24.04 +# fail-fast: false +# name: ${{ matrix.name || format('{0} {1}', matrix.platform, matrix.env_file) }} +# env: +# PATTERN: ${{ matrix.pattern }} +# LANG: ${{ matrix.lang || 'C.UTF-8' }} +# LC_ALL: ${{ matrix.lc_all || '' }} +# PANDAS_CI: '1' +# PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '1' }} +# TEST_ARGS: ${{ matrix.test_args || '' }} +# PYTEST_WORKERS: 'auto' +# PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} +# # Clipboard tests +# QT_QPA_PLATFORM: offscreen +# REMOVE_PYARROW: ${{ matrix.name == 'Without PyArrow' && '1' || '0' }} +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }} +# cancel-in-progress: true +# +# services: +# mysql: +# image: mysql:9 +# env: +# MYSQL_ALLOW_EMPTY_PASSWORD: yes +# MYSQL_DATABASE: pandas +# options: >- +# --health-cmd "mysqladmin ping" +# --health-interval 10s +# --health-timeout 5s +# --health-retries 5 +# ports: +# - 3306:3306 +# +# postgres: +# image: postgres:17 +# env: +# PGUSER: postgres +# POSTGRES_USER: postgres +# POSTGRES_PASSWORD: postgres +# POSTGRES_DB: pandas +# options: >- +# --health-cmd pg_isready +# --health-interval 10s +# --health-timeout 5s +# --health-retries 5 +# ports: +# - 5432:5432 +# +# moto: +# image: motoserver/moto:5.0.27 +# ports: +# - 5000:5000 +# +# steps: +# - name: Checkout +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Extra installs +# # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd +# run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 ${{ matrix.extra_apt || ''}} +# +# - name: Generate extra locales +# # These extra locales will be available for locale.setlocale() calls in tests +# run: sudo locale-gen ${{ matrix.extra_loc }} +# if: ${{ matrix.extra_loc }} +# +# - name: Set up Conda +# uses: ./.github/actions/setup-conda +# with: +# environment-file: ci/deps/${{ matrix.env_file }} +# +# - name: Build Pandas +# id: build +# uses: ./.github/actions/build_pandas +# with: +# # xref https://github.com/cython/cython/issues/6870 +# werror: ${{ matrix.name != 'Freethreading' }} +# +# - name: Test (not single_cpu) +# uses: ./.github/actions/run-tests +# env: +# # Set pattern to not single_cpu if not already set +# PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} +# +# - name: Test (single_cpu) +# uses: ./.github/actions/run-tests +# env: +# PATTERN: 'single_cpu' +# PYTEST_WORKERS: 0 +# if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}} +# +# macos-windows: +# timeout-minutes: 90 +# strategy: +# matrix: +# # Note: Don't use macOS latest since macos 14 appears to be arm64 only +# os: [macos-13, macos-14, windows-latest] +# env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] +# fail-fast: false +# runs-on: ${{ matrix.os }} +# name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }} +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.os }} +# cancel-in-progress: true +# env: +# PANDAS_CI: 1 +# PYTEST_TARGET: pandas +# PATTERN: "not slow and not db and not network and not single_cpu" +# PYTEST_WORKERS: 'auto' +# +# steps: +# - name: Checkout +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Set up Conda +# uses: ./.github/actions/setup-conda +# with: +# environment-file: ci/deps/${{ matrix.env_file }} +# +# - name: Build Pandas +# uses: ./.github/actions/build_pandas +# +# - name: Test +# uses: ./.github/actions/run-tests +# +# Linux-32-bit: +# runs-on: ubuntu-24.04 +# container: +# image: quay.io/pypa/manylinux2014_i686 +# options: --platform linux/386 +# steps: +# - name: Checkout pandas Repo +# # actions/checkout does not work since it requires node +# run: | +# git config --global --add safe.directory $PWD +# +# if [ $GITHUB_EVENT_NAME != pull_request ]; then +# git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE +# git reset --hard $GITHUB_SHA +# else +# git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE +# git fetch origin $GITHUB_REF:my_ref_name +# git checkout $GITHUB_BASE_REF +# git -c user.email="you@example.com" merge --no-commit my_ref_name +# fi +# - name: Build environment and Run Tests +# # https://github.com/numpy/numpy/issues/24703#issuecomment-1722379388 +# # Note: Pinned to Cython 3.0.10 to avoid numerical instability in 32-bit environments +# # https://github.com/pandas-dev/pandas/pull/61423 +# run: | +# /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev +# . ~/virtualenvs/pandas-dev/bin/activate +# python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 +# python -m pip install numpy -Csetup-args="-Dallow-noblas=true" +# python -m pip install --no-cache-dir versioneer[toml] cython==3.0.10 python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 +# python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" +# python -m pip list --no-cache-dir +# PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit +# cancel-in-progress: true +# +# Linux-Musl: +# runs-on: ubuntu-24.04 +# container: +# image: quay.io/pypa/musllinux_1_2_x86_64 +# steps: +# - name: Checkout pandas Repo +# # actions/checkout does not work since it requires node +# run: | +# git config --global --add safe.directory $PWD +# +# if [ $GITHUB_EVENT_NAME != pull_request ]; then +# git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE +# git reset --hard $GITHUB_SHA +# else +# git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE +# git fetch origin $GITHUB_REF:my_ref_name +# git checkout $GITHUB_BASE_REF +# git -c user.email="you@example.com" merge --no-commit my_ref_name +# fi +# - name: Configure System Packages +# run: | +# apk update +# apk add musl-locales +# - name: Build environment +# run: | +# /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev +# . ~/virtualenvs/pandas-dev/bin/activate +# python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 +# python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 +# python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" +# python -m pip list --no-cache-dir +# +# - name: Run Tests +# run: | +# . ~/virtualenvs/pandas-dev/bin/activate +# PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl +# cancel-in-progress: true +# +# python-dev: +# # This job may or may not run depending on the state of the next +# # unreleased Python version. DO NOT DELETE IT. +# # +# # In general, this will remain frozen(present, but not running) until: +# # - The next unreleased Python version has released beta 1 +# # - This version should be available on GitHub Actions. +# # - Our required build/runtime dependencies(numpy, Cython, python-dateutil) +# # support that unreleased Python version. +# # To unfreeze, comment out the ``if: false`` condition, and make sure you update +# # the name of the workflow and Python version in actions/setup-python ``python-version:`` +# # +# # After it has been unfrozen, this file should remain unfrozen(present, and running) until: +# # - The next Python version has been officially released. +# # OR +# # - Most/All of our optional dependencies support the next Python version AND +# # - The next Python version has released a rc(we are guaranteed a stable ABI). +# # To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs +# # to the corresponding posix/windows-macos/sdist etc. workflows. +# # Feel free to modify this comment as necessary. +# if: false +# defaults: +# run: +# shell: bash -eou pipefail {0} +# runs-on: ${{ matrix.os }} +# strategy: +# fail-fast: false +# matrix: +# # Separate out macOS 13 and 14, since macOS 14 is arm64 only +# os: [ubuntu-24.04, macOS-13, macOS-14, windows-latest] +# +# timeout-minutes: 90 +# +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-python-dev +# cancel-in-progress: true +# +# env: +# PYTEST_WORKERS: "auto" +# PANDAS_CI: 1 +# PATTERN: "not slow and not network and not clipboard and not single_cpu" +# PYTEST_TARGET: pandas +# +# steps: +# - uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Set up Python Dev Version +# uses: actions/setup-python@v5 +# with: +# python-version: '3.13-dev' +# +# - name: Build Environment +# run: | +# python --version +# python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 +# python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy +# python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov +# python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror" +# python -m pip list +# +# - name: Run Tests +# uses: ./.github/actions/run-tests +# +# # NOTE: this job must be kept in sync with the Pyodide build job in wheels.yml +# emscripten: +# # Note: the Python version, Emscripten toolchain version are determined +# # by the Pyodide version. The appropriate versions can be found in the +# # Pyodide repodata.json "info" field, or in the Makefile.envs file: +# # https://github.com/pyodide/pyodide/blob/stable/Makefile.envs#L2 +# # The Node.js version can be determined via Pyodide: +# # https://pyodide.org/en/stable/usage/index.html#node-js +# name: Pyodide build +# runs-on: ubuntu-24.04 +# concurrency: +# # https://github.community/t/concurrecy-not-work-for-push/183068/7 +# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-wasm +# cancel-in-progress: true +# steps: +# - name: Checkout pandas Repo +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Set up Python for pyodide-build +# id: setup-python +# uses: actions/setup-python@v5 +# with: +# python-version: '3.12' +# +# - name: Set up Emscripten toolchain +# uses: mymindstorm/setup-emsdk@v14 +# with: +# version: '3.1.58' +# actions-cache-folder: emsdk-cache +# +# - name: Install pyodide-build +# run: pip install "pyodide-build>=0.29.2" +# +# - name: Build pandas for Pyodide +# run: | +# pyodide build +# +# - name: Set up Node.js +# uses: actions/setup-node@v4 +# with: +# node-version: '20' +# +# - name: Set up Pyodide virtual environment +# env: +# pyodide-version: '0.27.1' +# run: | +# pyodide xbuildenv install ${{ env.pyodide-version }} +# pyodide venv .venv-pyodide +# source .venv-pyodide/bin/activate +# pip install dist/*.whl +# +# - name: Test pandas for Pyodide +# env: +# PANDAS_CI: 1 +# run: | +# source .venv-pyodide/bin/activate +# pip install pytest hypothesis +# # do not import pandas from the checked out repo +# cd .. +# python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])' diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index e942046b945fe..b9140840a5f7e 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -382,1148 +382,6 @@ Mimic the original behavior of passing a ndarray: .. _whatsnew_0210.enhancements.limit_area: -``DataFrame.interpolate`` has gained the ``limit_area`` kwarg -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:meth:`DataFrame.interpolate` has gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. -Use ``limit_area='inside'`` to fill only NaNs surrounded by valid values or use ``limit_area='outside'`` to fill only ``NaN`` s -outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. - - -.. ipython:: python - - ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, - np.nan, 13, np.nan, np.nan]) - ser - -Fill one consecutive inside value in both directions - -.. ipython:: python - - ser.interpolate(limit_direction='both', limit_area='inside', limit=1) - -Fill all consecutive outside values backward - -.. ipython:: python - - ser.interpolate(limit_direction='backward', limit_area='outside') - -Fill all consecutive outside values in both directions - -.. ipython:: python - - ser.interpolate(limit_direction='both', limit_area='outside') - -.. _whatsnew_0210.enhancements.get_dummies_dtype: - -Function ``get_dummies`` now supports ``dtype`` argument -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtype for the new columns. The default remains uint8. (:issue:`18330`) - -.. ipython:: python - - df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) - pd.get_dummies(df, columns=['c']).dtypes - pd.get_dummies(df, columns=['c'], dtype=bool).dtypes - - -.. _whatsnew_0230.enhancements.timedelta_mod: - -Timedelta mod method -^^^^^^^^^^^^^^^^^^^^ - -``mod`` (%) and ``divmod`` operations are now defined on ``Timedelta`` objects -when operating with either timedelta-like or with numeric arguments. -See the :ref:`documentation here `. (:issue:`19365`) - -.. ipython:: python - - td = pd.Timedelta(hours=37) - td % pd.Timedelta(minutes=45) - -.. _whatsnew_0230.enhancements.ran_inf: - -Method ``.rank()`` handles ``inf`` values when ``NaN`` are present -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In previous versions, ``.rank()`` would assign ``inf`` elements ``NaN`` as their ranks. Now ranks are calculated properly. (:issue:`6945`) - -.. ipython:: python - - s = pd.Series([-np.inf, 0, 1, np.nan, np.inf]) - s - -Previous behavior: - -.. code-block:: ipython - - In [11]: s.rank() - Out[11]: - 0 1.0 - 1 2.0 - 2 3.0 - 3 NaN - 4 NaN - dtype: float64 - -Current behavior: - -.. ipython:: python - - s.rank() - -Furthermore, previously if you rank ``inf`` or ``-inf`` values together with ``NaN`` values, the calculation won't distinguish ``NaN`` from infinity when using 'top' or 'bottom' argument. - -.. ipython:: python - - s = pd.Series([np.nan, np.nan, -np.inf, -np.inf]) - s - -Previous behavior: - -.. code-block:: ipython - - In [15]: s.rank(na_option='top') - Out[15]: - 0 2.5 - 1 2.5 - 2 2.5 - 3 2.5 - dtype: float64 - -Current behavior: - -.. ipython:: python - - s.rank(na_option='top') - -These bugs were squashed: - -- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`) -- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) -- Bug in :func:`DataFrameGroupBy.rank` where ranks were incorrect when both infinity and ``NaN`` were present (:issue:`20561`) - - -.. _whatsnew_0230.enhancements.str_cat_align: - -``Series.str.cat`` has gained the ``join`` kwarg -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Previously, :meth:`Series.str.cat` did not -- in contrast to most of ``pandas`` -- align :class:`Series` on their index before concatenation (see :issue:`18657`). -The method has now gained a keyword ``join`` to control the manner of alignment, see examples below and :ref:`here `. - -In v.0.23 ``join`` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. - -.. ipython:: python - - s = pd.Series(['a', 'b', 'c', 'd']) - t = pd.Series(['b', 'd', 'e', 'c'], index=[1, 3, 4, 2]) - s.str.cat(t) - s.str.cat(t, join='left', na_rep='-') - -Furthermore, :meth:`Series.str.cat` now works for ``CategoricalIndex`` as well (previously raised a ``ValueError``; see :issue:`20842`). - -.. _whatsnew_0230.enhancements.astype_category: - -``DataFrame.astype`` performs column-wise conversion to ``Categorical`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:meth:`DataFrame.astype` can now perform column-wise conversion to ``Categorical`` by supplying the string ``'category'`` or -a :class:`~pandas.api.types.CategoricalDtype`. Previously, attempting this would raise a ``NotImplementedError``. See the -:ref:`categorical.objectcreation` section of the documentation for more details and examples. (:issue:`12860`, :issue:`18099`) - -Supplying the string ``'category'`` performs column-wise conversion, with only labels appearing in a given column set as categories: - -.. ipython:: python - - df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) - df = df.astype('category') - df['A'].dtype - df['B'].dtype - - -Supplying a ``CategoricalDtype`` will make the categories in each column consistent with the supplied dtype: - -.. ipython:: python - - from pandas.api.types import CategoricalDtype - df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) - cdt = CategoricalDtype(categories=list('abcd'), ordered=True) - df = df.astype(cdt) - df['A'].dtype - df['B'].dtype - - -.. _whatsnew_0230.enhancements.other: - -Other enhancements -^^^^^^^^^^^^^^^^^^ - -- Unary ``+`` now permitted for ``Series`` and ``DataFrame`` as numeric operator (:issue:`16073`) -- Better support for :meth:`~pandas.io.formats.style.Styler.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`) -- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`) -- :func:`MultiIndex.unique` now supports the ``level=`` argument, to get unique values from a specific index level (:issue:`17896`) -- :class:`pandas.io.formats.style.Styler` now has method ``hide_index()`` to determine whether the index will be rendered in output (:issue:`14194`) -- :class:`pandas.io.formats.style.Styler` now has method ``hide_columns()`` to determine whether columns will be hidden in output (:issue:`14194`) -- Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`) -- :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`) -- :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`) -- Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) -- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). -- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) -- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) -- :func:`read_excel` has gained the ``nrows`` parameter (:issue:`16645`) -- :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`) -- :meth:`DataFrame.to_json` and :meth:`Series.to_json` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) -- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) -- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` - can now take a callable as their argument (:issue:`18862`) -- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`) -- ``Resampler`` objects now have a functioning :attr:`.Resampler.pipe` method. - Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). -- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). -- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`). -- Added :func:`pandas.api.extensions.register_dataframe_accessor`, - :func:`pandas.api.extensions.register_series_accessor`, and - :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas - to register custom accessors like ``.cat`` on pandas objects. See - :ref:`Registering Custom Accessors ` for more (:issue:`14781`). - -- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) -- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) -- Added :func:`.SeriesGroupBy.is_monotonic_increasing` and :func:`.SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) -- For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) -- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) -- Added option ``display.html.use_mathjax`` so `MathJax `_ can be disabled when rendering tables in ``Jupyter`` notebooks (:issue:`19856`, :issue:`19824`) -- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) -- :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) -- :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) -- :meth:`DataFrame.to_sql` now performs a multi-value insert if the underlying connection supports itk rather than inserting row by row. - ``SQLAlchemy`` dialects supporting multi-value inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) -- :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) -- :func:`read_html` now reads all ```` elements in a ``
``, not just the first. (:issue:`20690`) -- :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile` now accept the ``interpolation`` keyword, ``linear`` by default (:issue:`20497`) -- zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`) -- :class:`~pandas.tseries.offsets.WeekOfMonth` constructor now supports ``n=0`` (:issue:`20517`). -- :class:`DataFrame` and :class:`Series` now support matrix multiplication (``@``) operator (:issue:`10259`) for Python>=3.5 -- Updated :meth:`DataFrame.to_gbq` and :meth:`pandas.read_gbq` signature and documentation to reflect changes from - the pandas-gbq library version 0.4.0. Adds intersphinx mapping to pandas-gbq - library. (:issue:`20564`) -- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`) -- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`) -- :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`) -- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`) - -.. _whatsnew_0230.api_breaking: - -Backwards incompatible API changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. _whatsnew_0230.api_breaking.deps: - -Dependencies have increased minimum versions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We have updated our minimum supported versions of dependencies (:issue:`15184`). -If installed, we now require: - -+-----------------+-----------------+----------+---------------+ -| Package | Minimum Version | Required | Issue | -+=================+=================+==========+===============+ -| python-dateutil | 2.5.0 | X | :issue:`15184`| -+-----------------+-----------------+----------+---------------+ -| openpyxl | 2.4.0 | | :issue:`15184`| -+-----------------+-----------------+----------+---------------+ -| beautifulsoup4 | 4.2.1 | | :issue:`20082`| -+-----------------+-----------------+----------+---------------+ -| setuptools | 24.2.0 | | :issue:`20698`| -+-----------------+-----------------+----------+---------------+ - -.. _whatsnew_0230.api_breaking.dict_insertion_order: - -Instantiation from dicts preserves dict insertion order for Python 3.6+ -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Until Python 3.6, dicts in Python had no formally defined ordering. For Python -version 3.6 and later, dicts are ordered by insertion order, see -`PEP 468 `_. -pandas will use the dict's insertion order, when creating a ``Series`` or -``DataFrame`` from a dict and you're using Python version 3.6 or -higher. (:issue:`19884`) - -Previous behavior (and current behavior if on Python < 3.6): - -.. code-block:: ipython - - In [16]: pd.Series({'Income': 2000, - ....: 'Expenses': -1500, - ....: 'Taxes': -200, - ....: 'Net result': 300}) - Out[16]: - Expenses -1500 - Income 2000 - Net result 300 - Taxes -200 - dtype: int64 - -Note the Series above is ordered alphabetically by the index values. - -New behavior (for Python >= 3.6): - -.. ipython:: python - - pd.Series({'Income': 2000, - 'Expenses': -1500, - 'Taxes': -200, - 'Net result': 300}) - -Notice that the Series is now ordered by insertion order. This new behavior is -used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries`` -and ``SparseDataFrame``). - -If you wish to retain the old behavior while using Python >= 3.6, you can use -``.sort_index()``: - -.. ipython:: python - - pd.Series({'Income': 2000, - 'Expenses': -1500, - 'Taxes': -200, - 'Net result': 300}).sort_index() - -.. _whatsnew_0230.api_breaking.deprecate_panel: - -Deprecate Panel -^^^^^^^^^^^^^^^ - -``Panel`` was deprecated in the 0.20.x release, showing as a ``DeprecationWarning``. Using ``Panel`` will now show a ``FutureWarning``. The recommended way to represent 3-D data are -with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. pandas -provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`13563`, :issue:`18324`). - -.. code-block:: ipython - - In [75]: import pandas._testing as tm - - In [76]: p = tm.makePanel() - - In [77]: p - Out[77]: - - Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) - Items axis: ItemA to ItemC - Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 - Minor_axis axis: A to D - -Convert to a MultiIndex DataFrame - -.. code-block:: ipython - - In [78]: p.to_frame() - Out[78]: - ItemA ItemB ItemC - major minor - 2000-01-03 A 0.469112 0.721555 0.404705 - B -1.135632 0.271860 -1.039268 - C 0.119209 0.276232 -1.344312 - D -2.104569 0.113648 -0.109050 - 2000-01-04 A -0.282863 -0.706771 0.577046 - B 1.212112 -0.424972 -0.370647 - C -1.044236 -1.087401 0.844885 - D -0.494929 -1.478427 1.643563 - 2000-01-05 A -1.509059 -1.039575 -1.715002 - B -0.173215 0.567020 -1.157892 - C -0.861849 -0.673690 1.075770 - D 1.071804 0.524988 -1.469388 - - [12 rows x 3 columns] - -Convert to an xarray DataArray - -.. code-block:: ipython - - In [79]: p.to_xarray() - Out[79]: - - array([[[ 0.469112, -1.135632, 0.119209, -2.104569], - [-0.282863, 1.212112, -1.044236, -0.494929], - [-1.509059, -0.173215, -0.861849, 1.071804]], - - [[ 0.721555, 0.27186 , 0.276232, 0.113648], - [-0.706771, -0.424972, -1.087401, -1.478427], - [-1.039575, 0.56702 , -0.67369 , 0.524988]], - - [[ 0.404705, -1.039268, -1.344312, -0.10905 ], - [ 0.577046, -0.370647, 0.844885, 1.643563], - [-1.715002, -1.157892, 1.07577 , -1.469388]]]) - Coordinates: - * items (items) object 'ItemA' 'ItemB' 'ItemC' - * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 - * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' - - -.. _whatsnew_0230.api_breaking.core_common: - -pandas.core.common removals -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The following error & warning messages are removed from ``pandas.core.common`` (:issue:`13634`, :issue:`19769`): - -- ``PerformanceWarning`` -- ``UnsupportedFunctionCall`` -- ``UnsortedIndexError`` -- ``AbstractMethodError`` - -These are available from import from ``pandas.errors`` (since 0.19.0). - - -.. _whatsnew_0230.api_breaking.apply: - -Changes to make output of ``DataFrame.apply`` consistent -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies -are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case -where a list-like (e.g. ``tuple`` or ``list`` is returned) (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, -:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`). - -.. ipython:: python - - df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, - columns=['A', 'B', 'C']) - df - -Previous behavior: if the returned shape happened to match the length of original columns, this would return a ``DataFrame``. -If the return shape did not match, a ``Series`` with lists was returned. - -.. code-block:: python - - In [3]: df.apply(lambda x: [1, 2, 3], axis=1) - Out[3]: - A B C - 0 1 2 3 - 1 1 2 3 - 2 1 2 3 - 3 1 2 3 - 4 1 2 3 - 5 1 2 3 - - In [4]: df.apply(lambda x: [1, 2], axis=1) - Out[4]: - 0 [1, 2] - 1 [1, 2] - 2 [1, 2] - 3 [1, 2] - 4 [1, 2] - 5 [1, 2] - dtype: object - - -New behavior: When the applied function returns a list-like, this will now *always* return a ``Series``. - -.. ipython:: python - - df.apply(lambda x: [1, 2, 3], axis=1) - df.apply(lambda x: [1, 2], axis=1) - -To have expanded columns, you can use ``result_type='expand'`` - -.. ipython:: python - - df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') - -To broadcast the result across the original columns (the old behaviour for -list-likes of the correct length), you can use ``result_type='broadcast'``. -The shape must match the original columns. - -.. ipython:: python - - df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') - -Returning a ``Series`` allows one to control the exact return structure and column names: - -.. ipython:: python - - df.apply(lambda x: pd.Series([1, 2, 3], index=['D', 'E', 'F']), axis=1) - -.. _whatsnew_0230.api_breaking.concat: - -Concatenation will no longer sort -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned. -The current behavior is the same as the previous (sorting), but now a warning is issued when ``sort`` is not specified and the non-concatenation axis is not aligned (:issue:`4588`). - -.. ipython:: python - - df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a']) - df2 = pd.DataFrame({"a": [4, 5]}) - - pd.concat([df1, df2]) - -To keep the previous behavior (sorting) and silence the warning, pass ``sort=True`` - -.. ipython:: python - - pd.concat([df1, df2], sort=True) - -To accept the future behavior (no sorting), pass ``sort=False`` - -.. ipython:: python - - pd.concat([df1, df2], sort=False) - -Note that this change also applies to :meth:`DataFrame.append`, which has also received a ``sort`` keyword for controlling this behavior. - - -.. _whatsnew_0230.api_breaking.build_changes: - -Build changes -^^^^^^^^^^^^^ - -- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) -- Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) -- Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) - -.. _whatsnew_0230.api_breaking.index_division_by_zero: - -Index division by zero fills correctly -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and ``0 / 0`` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) - -Previous behavior: - -.. code-block:: ipython - - In [6]: index = pd.Int64Index([-1, 0, 1]) - - In [7]: index / 0 - Out[7]: Int64Index([0, 0, 0], dtype='int64') - - # Previous behavior yielded different results depending on the type of zero in the divisor - In [8]: index / 0.0 - Out[8]: Float64Index([-inf, nan, inf], dtype='float64') - - In [9]: index = pd.UInt64Index([0, 1]) - - In [10]: index / np.array([0, 0], dtype=np.uint64) - Out[10]: UInt64Index([0, 0], dtype='uint64') - - In [11]: pd.RangeIndex(1, 5) / 0 - ZeroDivisionError: integer division or modulo by zero - -Current behavior: - -.. code-block:: ipython - - In [12]: index = pd.Int64Index([-1, 0, 1]) - # division by zero gives -infinity where negative, - # +infinity where positive, and NaN for 0 / 0 - In [13]: index / 0 - - # The result of division by zero should not depend on - # whether the zero is int or float - In [14]: index / 0.0 - - In [15]: index = pd.UInt64Index([0, 1]) - In [16]: index / np.array([0, 0], dtype=np.uint64) - - In [17]: pd.RangeIndex(1, 5) / 0 - -.. _whatsnew_0230.api_breaking.extract: - -Extraction of matching patterns from strings -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -By default, extracting matching patterns from strings with :func:`str.extract` used to return a -``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was -extracted). As of pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless -``expand`` is set to ``False``. Finally, ``None`` was an accepted value for -the ``expand`` parameter (which was equivalent to ``False``), but now raises a ``ValueError``. (:issue:`11386`) - -Previous behavior: - -.. code-block:: ipython - - In [1]: s = pd.Series(['number 10', '12 eggs']) - - In [2]: extracted = s.str.extract(r'.*(\d\d).*') - - In [3]: extracted - Out [3]: - 0 10 - 1 12 - dtype: object - - In [4]: type(extracted) - Out [4]: - pandas.core.series.Series - -New behavior: - -.. ipython:: python - - s = pd.Series(['number 10', '12 eggs']) - extracted = s.str.extract(r'.*(\d\d).*') - extracted - type(extracted) - -To restore previous behavior, simply set ``expand`` to ``False``: - -.. ipython:: python - - s = pd.Series(['number 10', '12 eggs']) - extracted = s.str.extract(r'.*(\d\d).*', expand=False) - extracted - type(extracted) - -.. _whatsnew_0230.api_breaking.cdt_ordered: - -Default value for the ``ordered`` parameter of ``CategoricalDtype`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The default value of the ``ordered`` parameter for :class:`~pandas.api.types.CategoricalDtype` has changed from ``False`` to ``None`` to allow updating of ``categories`` without impacting ``ordered``. Behavior should remain consistent for downstream objects, such as :class:`Categorical` (:issue:`18790`) - -In previous versions, the default value for the ``ordered`` parameter was ``False``. This could potentially lead to the ``ordered`` parameter unintentionally being changed from ``True`` to ``False`` when users attempt to update ``categories`` if ``ordered`` is not explicitly specified, as it would silently default to ``False``. The new behavior for ``ordered=None`` is to retain the existing value of ``ordered``. - -New behavior: - -.. code-block:: ipython - - In [2]: from pandas.api.types import CategoricalDtype - - In [3]: cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) - - In [4]: cat - Out[4]: - [a, b, c, a, b, a] - Categories (3, object): [c < b < a] - - In [5]: cdt = CategoricalDtype(categories=list('cbad')) - - In [6]: cat.astype(cdt) - Out[6]: - [a, b, c, a, b, a] - Categories (4, object): [c < b < a < d] - -Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. - -Note that the unintentional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. - -.. _whatsnew_0230.api_breaking.pretty_printing: - -Better pretty-printing of DataFrames in a terminal -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, the default value for the maximum number of columns was -``pd.options.display.max_columns=20``. This meant that relatively wide data -frames would not fit within the terminal width, and pandas would introduce line -breaks to display these 20 columns. This resulted in an output that was -relatively difficult to read: - -.. image:: ../_static/print_df_old.png - -If Python runs in a terminal, the maximum number of columns is now determined -automatically so that the printed data frame fits within the current terminal -width (``pd.options.display.max_columns=0``) (:issue:`17023`). If Python runs -as a Jupyter kernel (such as the Jupyter QtConsole or a Jupyter notebook, as -well as in many IDEs), this value cannot be inferred automatically and is thus -set to ``20`` as in previous versions. In a terminal, this results in a much -nicer output: - -.. image:: ../_static/print_df_new.png - -Note that if you don't like the new default, you can always set this option -yourself. To revert to the old setting, you can run this line: - -.. code-block:: python - - pd.options.display.max_columns = 20 - -.. _whatsnew_0230.api.datetimelike: - -Datetimelike API changes -^^^^^^^^^^^^^^^^^^^^^^^^ - -- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) -- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) -- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) -- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) -- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError`` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) -- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) -- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) -- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mismatched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) -- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) -- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) -- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) -- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) -- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) -- :attr:`Series.first` and :attr:`DataFrame.first` will now raise a ``TypeError`` - rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). -- :attr:`Series.last` and :attr:`DataFrame.last` will now raise a ``TypeError`` - rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). -- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). -- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) -- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) -- :class:`Timestamp` constructor now accepts a ``nanosecond`` keyword or positional argument (:issue:`18898`) -- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) -- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) - -.. _whatsnew_0230.api.other: - -Other API changes -^^^^^^^^^^^^^^^^^ - -- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`) -- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`) -- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) -- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). -- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) -- Both construction and renaming of ``Index``/``MultiIndex`` with non-hashable ``name``/``names`` will now raise ``TypeError`` (:issue:`20527`) -- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). -- :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) -- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) -- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) -- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) -- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) -- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) -- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) -- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) -- Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) -- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`) -- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) -- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) -- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`) -- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) -- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) -- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) -- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) -- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) -- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) -- :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) -- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) -- ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) -- :func:`Series.str.replace` now takes an optional ``regex`` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) -- :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) -- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). -- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) -- A user-defined-function that is passed to :func:`Series.rolling().aggregate() <.Rolling.aggregate>`, :func:`DataFrame.rolling().aggregate() <.Rolling.aggregate>`, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) -- Rolling and Expanding types raise ``NotImplementedError`` upon iteration (:issue:`11704`). - -.. _whatsnew_0230.deprecations: - -Deprecations -~~~~~~~~~~~~ - -- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`). -- ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`). -- ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`) -- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated. - In the future, a tuple passed to ``'by'`` will always refer to a single key - that is the actual tuple, instead of treating the tuple as multiple keys. To - retain the previous behavior, use a list instead of a tuple (:issue:`18314`) -- ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`). -- :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) -- :meth:`ExcelFile.parse` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with :func:`read_excel` (:issue:`20920`). -- The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). -- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) -- ``DataFrame.from_items`` is deprecated. Use :func:`DataFrame.from_dict` instead, or ``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`, :issue:`17312`) -- Indexing a :class:`MultiIndex` or a :class:`FloatIndex` with a list containing some missing keys will now show a :class:`FutureWarning`, which is consistent with other types of indexes (:issue:`17758`). - -- The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) -- The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) -- The ``order`` parameter of :func:`factorize` is deprecated and will be removed in a future release (:issue:`19727`) -- :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` are deprecated in favor of :meth:`Timestamp.day_name`, :meth:`DatetimeIndex.day_name`, and :meth:`Series.dt.day_name` (:issue:`12806`) - -- ``pandas.tseries.plotting.tsplot`` is deprecated. Use :func:`Series.plot` instead (:issue:`18627`) -- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`) -- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`) -- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`). -- :func:`Series.rolling().apply() <.Rolling.apply>`, :func:`DataFrame.rolling().apply() <.Rolling.apply>`, :func:`Series.expanding().apply() <.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <.Expanding.apply>` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`) -- The ``data``, ``base``, ``strides``, ``flags`` and ``itemsize`` properties - of the ``Series`` and ``Index`` classes have been deprecated and will be - removed in a future version (:issue:`20419`). -- ``DatetimeIndex.offset`` is deprecated. Use ``DatetimeIndex.freq`` instead (:issue:`20716`) -- Floor division between an integer ndarray and a :class:`Timedelta` is deprecated. Divide by :attr:`Timedelta.value` instead (:issue:`19761`) -- Setting ``PeriodIndex.freq`` (which was not guaranteed to work correctly) is deprecated. Use :meth:`PeriodIndex.asfreq` instead (:issue:`20678`) -- ``Index.get_duplicates()`` is deprecated and will be removed in a future version (:issue:`20239`) -- The previous default behavior of negative indices in ``Categorical.take`` is deprecated. In a future version it will change from meaning missing values to meaning positional indices from the right. The future behavior is consistent with :meth:`Series.take` (:issue:`20664`). -- Passing multiple axes to the ``axis`` parameter in :func:`DataFrame.dropna` has been deprecated and will be removed in a future version (:issue:`20987`) - - -.. _whatsnew_0230.prior_deprecations: - -Removal of prior version deprecations/changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`) -- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`). -- ``pd.tseries.util.pivot_annual`` has been removed (deprecated since v0.19). Use ``pivot_table`` instead (:issue:`18370`) -- ``pd.tseries.util.isleapyear`` has been removed (deprecated since v0.19). Use ``.is_leap_year`` property in Datetime-likes instead (:issue:`18370`) -- ``pd.ordered_merge`` has been removed (deprecated since v0.19). Use ``pd.merge_ordered`` instead (:issue:`18459`) -- The ``SparseList`` class has been removed (:issue:`14007`) -- The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`) -- ``Categorical.from_array`` has been removed (:issue:`13854`) -- The ``freq`` and ``how`` parameters have been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame - and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:`18601` & :issue:`18668`) -- ``DatetimeIndex.to_datetime``, ``Timestamp.to_datetime``, ``PeriodIndex.to_datetime``, and ``Index.to_datetime`` have been removed (:issue:`8254`, :issue:`14096`, :issue:`14113`) -- :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`) -- :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`) -- :func:`read_csv` has dropped the ``buffer_lines`` parameter (:issue:`13360`) -- :func:`read_csv` has dropped the ``compact_ints`` and ``use_unsigned`` parameters (:issue:`13323`) -- The ``Timestamp`` class has dropped the ``offset`` attribute in favor of ``freq`` (:issue:`13593`) -- The ``Series``, ``Categorical``, and ``Index`` classes have dropped the ``reshape`` method (:issue:`13012`) -- ``pandas.tseries.frequencies.get_standard_freq`` has been removed in favor of ``pandas.tseries.frequencies.to_offset(freq).rule_code`` (:issue:`13874`) -- The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) -- The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) -- The ``Panel`` class has dropped the ``to_long`` and ``toLong`` methods (:issue:`19077`) -- The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) -- The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attr:`Categorical.codes` (:issue:`7768`) -- The ``flavor`` parameter have been removed from :func:`to_sql` method (:issue:`13611`) -- The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) -- The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). - Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) -- Imports from ``pandas.core.common`` for functions such as ``is_datetime64_dtype`` are now removed. These are located in ``pandas.api.types``. (:issue:`13634`, :issue:`19769`) -- The ``infer_dst`` keyword in :meth:`Series.tz_localize`, :meth:`DatetimeIndex.tz_localize` - and :class:`DatetimeIndex` have been removed. ``infer_dst=True`` is equivalent to - ``ambiguous='infer'``, and ``infer_dst=False`` to ``ambiguous='raise'`` (:issue:`7963`). -- When ``.resample()`` was changed from an eager to a lazy operation, like ``.groupby()`` in v0.18.0, we put in place compatibility (with a ``FutureWarning``), - so operations would continue to work. This is now fully removed, so a ``Resampler`` will no longer forward compat operations (:issue:`20554`) -- Remove long deprecated ``axis=None`` parameter from ``.replace()`` (:issue:`20271`) - -.. _whatsnew_0230.performance: - -Performance improvements -~~~~~~~~~~~~~~~~~~~~~~~~ - -- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) -- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) -- :class:`DateOffset` arithmetic performance is improved (:issue:`18218`) -- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) -- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`) -- The overridden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) -- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) -- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) -- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`) -- Improved performance of :func:`IntervalIndex.symmetric_difference` (:issue:`18475`) -- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) -- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) -- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) -- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) -- Improved performance of :func:`MultiIndex.remove_unused_levels` when there are no unused levels, at the cost of a reduction in performance when there are (:issue:`19289`) -- Improved performance of :func:`Index.get_loc` for non-unique indexes (:issue:`19478`) -- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) -- Improved performance of :func:`.GroupBy.rank` (:issue:`15779`) -- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) -- Improved performance of :func:`.GroupBy.ffill` and :func:`.GroupBy.bfill` (:issue:`11296`) -- Improved performance of :func:`.GroupBy.any` and :func:`.GroupBy.all` (:issue:`15435`) -- Improved performance of :func:`.GroupBy.pct_change` (:issue:`19165`) -- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) -- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) -- Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) -- Improved performance of :func:`.Categorical.from_codes` (:issue:`18501`) - -.. _whatsnew_0230.docs: - -Documentation changes -~~~~~~~~~~~~~~~~~~~~~ - -Thanks to all of the contributors who participated in the pandas Documentation -Sprint, which took place on March 10th. We had about 500 participants from over -30 locations across the world. You should notice that many of the -:ref:`API docstrings ` have greatly improved. - -There were too many simultaneous contributions to include a release note for each -improvement, but this `GitHub search`_ should give you an idea of how many docstrings -were improved. - -Special thanks to `Marc Garcia`_ for organizing the sprint. For more information, -read the `NumFOCUS blogpost`_ recapping the sprint. - -.. _GitHub search: https://github.com/pandas-dev/pandas/pulls?utf8=%E2%9C%93&q=is%3Apr+label%3ADocs+created%3A2018-03-10..2018-03-15+ -.. _NumFOCUS blogpost: https://www.numfocus.org/blog/worldwide-pandas-sprint/ -.. _Marc Garcia: https://github.com/datapythonista - -- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`) -- Consistency when introducing code samples, using either colon or period. - Rewrote some sentences for greater clarity, added more dynamic references - to functions, methods and classes. - (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) -- Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) - -.. _whatsnew_0230.bug_fixes: - -Bug fixes -~~~~~~~~~ - -Categorical -^^^^^^^^^^^ - -.. warning:: - - A class of bugs were introduced in pandas 0.21 with ``CategoricalDtype`` that - affects the correctness of operations like ``merge``, ``concat``, and - indexing when comparing multiple unordered ``Categorical`` arrays that have - the same categories, but in a different order. We highly recommend upgrading - or manually aligning your categories before doing these operations. - -- Bug in ``Categorical.equals`` returning the wrong result when comparing two - unordered ``Categorical`` arrays with the same categories, but in a different - order (:issue:`16603`) -- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result - when for unordered categoricals with the categories in a different order. - This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). -- Bug in :func:`pandas.merge` returning the wrong result when joining on an - unordered ``Categorical`` that had the same categories but in a different - order (:issue:`19551`) -- Bug in :meth:`CategoricalIndex.get_indexer` returning the wrong result when - ``target`` was an unordered ``Categorical`` that had the same categories as - ``self`` but in a different order (:issue:`19551`) -- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) -- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) -- Bug in :meth:`Series.str.split` with ``expand=True`` incorrectly raising an IndexError on empty strings (:issue:`20002`). -- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19032`) -- Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19565`) -- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) -- Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`) -- Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`) -- Bug in :meth:`Series.take` with categorical data interpreting ``-1`` in ``indices`` as missing value markers, rather than the last element of the Series (:issue:`20664`) - -Datetimelike -^^^^^^^^^^^^ - -- Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) -- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) -- Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) -- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) -- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`) -- Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) -- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) -- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) -- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) -- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) -- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) -- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) -- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) -- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) -- Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where setting the ``freq`` attribute was not fully supported (:issue:`20678`) - -Timedelta -^^^^^^^^^ - -- Bug in :func:`Timedelta.__mul__` where multiplying by ``NaT`` returned ``NaT`` instead of raising a ``TypeError`` (:issue:`19819`) -- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) -- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) -- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) -- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) -- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) -- Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) -- Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) -- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`, :issue:`19834`) -- Bug in :func:`Timedelta.total_seconds` causing precision errors, for example ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) -- Bug in :func:`Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`) -- Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mismatch (:issue:`19333`) -- Bug in indexing a :class:`TimedeltaIndex` with a ``np.timedelta64`` object which was raising a ``TypeError`` (:issue:`20393`) - - -Timezones -^^^^^^^^^ - -- Bug in creating a ``Series`` from an array that contains both tz-naive and tz-aware values will result in a ``Series`` whose dtype is tz-aware instead of object (:issue:`16406`) -- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`) -- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`) -- Bug in comparing :class:`DatetimeIndex`, which failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) -- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`) -- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) -- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) -- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) -- Bug in :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) -- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) -- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`) -- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`) -- Bug in :func:`melt` that converted tz-aware dtypes to tz-naive (:issue:`15785`) -- Bug in :func:`Dataframe.count` that raised an ``ValueError``, if :func:`Dataframe.dropna` was called for a single column with timezone-aware values. (:issue:`13407`) - -Offsets -^^^^^^^ - -- Bug in :class:`WeekOfMonth` and :class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`, :issue:`18672`, :issue:`18864`) -- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) -- Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) -- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) -- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) - -Numeric -^^^^^^^ -- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) -- Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) -- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) -- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) -- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) -- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) -- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`) - -Strings -^^^^^^^ -- Bug in :func:`Series.str.get` with a dictionary in the values and the index not in the keys, raising ``KeyError`` (:issue:`20671`) - - -Indexing -^^^^^^^^ - -- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) -- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) -- Bug in :func:`DataFrame.drop`, :meth:`Panel.drop`, :meth:`Series.drop`, :meth:`Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) -- Bug in indexing a datetimelike ``Index`` that raised ``ValueError`` instead of ``IndexError`` (:issue:`18386`). -- :func:`Index.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) -- :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) -- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) -- Bug in indexing with iterator containing only missing keys, which raised no error (:issue:`20748`) -- Fixed inconsistency in ``.ix`` between list and scalar keys when the index has integer dtype and does not include the desired keys (:issue:`20753`) -- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) -- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) -- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) -- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) -- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) -- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (:issue:`19726`) -- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) -- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) -- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). -- Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) -- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) -- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) -- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) -- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`) -- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`) -- Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`) - -MultiIndex -^^^^^^^^^^ - -- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) -- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) -- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) -- Bug in :func:`MultiIndex.unique` when called on empty :class:`MultiIndex` (:issue:`20568`) -- Bug in :func:`MultiIndex.unique` which would not preserve level names (:issue:`20570`) -- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) -- Bug in :func:`MultiIndex.from_tuples` which would fail to take zipped tuples in python3 (:issue:`18434`) -- Bug in :func:`MultiIndex.get_loc` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) -- Bug in :func:`MultiIndex.get_loc` which would cast boolean to integer labels (:issue:`19086`) -- Bug in :func:`MultiIndex.get_loc` which would fail to locate keys containing ``NaN`` (:issue:`18485`) -- Bug in :func:`MultiIndex.get_loc` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) -- Bug in indexing where nested indexers having only numpy arrays are handled incorrectly (:issue:`19686`) - - -IO -^^ - -- :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) -- :meth:`DataFrame.to_html` now has an option to add an id to the leading ``
`` tag (:issue:`8496`) -- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) -- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) -- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) -- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`) -- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) -- Bug in :func:`DataFrame.to_latex` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) -- Bug in :func:`DataFrame.to_latex` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) -- Bug in :func:`DataFrame.to_latex` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`) -- Bug in :func:`DataFrame.to_latex` where the combination of an index name and the ``index_names=False`` option would result in incorrect output (:issue:`18326`) -- Bug in :func:`DataFrame.to_latex` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`) -- Bug in :func:`DataFrame.to_latex` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`) -- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) -- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) -- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) -- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`) -- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) -- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`) -- Bug in :meth:`pandas.io.json.json_normalize` where sub-records are not properly normalized if any sub-records values are NoneType (:issue:`20030`) -- Bug in ``usecols`` parameter in :func:`read_csv` where error is not raised correctly when passing a string. (:issue:`20529`) -- Bug in :func:`HDFStore.keys` when reading a file with a soft link causes exception (:issue:`20523`) -- Bug in :func:`HDFStore.select_column` where a key which is not a valid store raised an ``AttributeError`` instead of a ``KeyError`` (:issue:`17912`) - -Plotting -^^^^^^^^ - -- Better error message when attempting to plot but matplotlib is not installed (:issue:`19810`). -- :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) -- Bug in :func:`DataFrame.plot` when ``x`` and ``y`` arguments given as positions caused incorrect referenced columns for line, bar and area plots (:issue:`20056`) -- Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). -- :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). -- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) - - -GroupBy/resample/rolling -^^^^^^^^^^^^^^^^^^^^^^^^ - -- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) -- Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) -- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) -- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) -- Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) -- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) -- Bug in :func:`DataFrame.groupby` passing the ``on=`` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) -- Bug in :func:`DataFrame.resample().aggregate <.Resampler.aggregate>` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) -- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`) -- Bug in :func:`DataFrame.resample` that dropped timezone information (:issue:`13238`) -- Bug in :func:`DataFrame.groupby` where transformations using ``np.all`` and ``np.any`` were raising a ``ValueError`` (:issue:`20653`) -- Bug in :func:`DataFrame.resample` where ``ffill``, ``bfill``, ``pad``, ``backfill``, ``fillna``, ``interpolate``, and ``asfreq`` were ignoring ``loffset``. (:issue:`20744`) -- Bug in :func:`DataFrame.groupby` when applying a function that has mixed data types and the user supplied function can fail on the grouping column (:issue:`20949`) -- Bug in :func:`DataFrameGroupBy.rolling().apply() <.Rolling.apply>` where operations performed against the associated :class:`DataFrameGroupBy` object could impact the inclusion of the grouped item(s) in the result (:issue:`14013`) - -Sparse -^^^^^^ - -- Bug in which creating a :class:`SparseDataFrame` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) -- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) -- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) -- Bug in constructing a :class:`SparseArray`: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) - -Reshaping -^^^^^^^^^ - -- Bug in :func:`DataFrame.merge` where referencing a ``CategoricalIndex`` by name, where the ``by`` kwarg would ``KeyError`` (:issue:`20777`) -- Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) -- Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`) -- Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`) -- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) -- Fixed construction of a :class:`DataFrame` from a ``dict`` containing ``NaN`` as key (:issue:`18455`) -- Disabled construction of a :class:`Series` where len(index) > len(data) = 1, which previously would broadcast the data item, and now raises a ``ValueError`` (:issue:`18819`) -- Suppressed error in the construction of a :class:`DataFrame` from a ``dict`` containing scalar values when the corresponding keys are not included in the passed index (:issue:`18600`) - -- Fixed (changed from ``object`` to ``float64``) dtype of :class:`DataFrame` initialized with axes, no data, and ``dtype=int`` (:issue:`19646`) -- Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) -- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) -- Bug in :func:`DataFrame.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) -- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) -- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) -- Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) -- Bug in :func:`concat` when concatenating sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) -- Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) -- Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) -- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) -- Bug in :func:`~DataFrame.rename` where an Index of same-length tuples was converted to a MultiIndex (:issue:`19497`) -- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) -- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) -- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) -- Bug in :class:`Series` constructor with ``Categorical`` where a ``ValueError`` is not raised when an index of different length is given (:issue:`19342`) -- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) -- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`) -- Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`) -- Bug in :func:`get_dummies`, and :func:`select_dtypes`, where duplicate column names caused incorrect behavior (:issue:`20848`) -- Bug in :func:`isna`, which cannot handle ambiguous typed lists (:issue:`20675`) -- Bug in :func:`concat` which raises an error when concatenating TZ-aware dataframes and all-NaT dataframes (:issue:`12396`) -- Bug in :func:`concat` which raises an error when concatenating empty TZ-aware series (:issue:`18447`) - -Other -^^^^^ - -- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) -- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existent option key in some cases (:issue:`19789`) -- Bug in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) .. _whatsnew_0.23.0.contributors: From d984c1c349fcec1cf5b9e508a8ff303df0fd01f6 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 09:48:04 -0400 Subject: [PATCH 20/26] Debug --- doc/source/whatsnew/v0.23.0.rst | 189 -------------------------------- 1 file changed, 189 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index b9140840a5f7e..25edf3b9935d5 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -194,195 +194,6 @@ levels ` documentation section. .. _whatsnew_0230.enhancements.sort_by_columns_and_levels: -Sorting by a combination of columns and index levels -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Strings passed to :meth:`DataFrame.sort_values` as the ``by`` parameter may -now refer to either column names or index level names. This enables sorting -``DataFrame`` instances by a combination of index levels and columns without -resetting indexes. See the :ref:`Sorting by Indexes and Values -` documentation section. -(:issue:`14353`) - -.. ipython:: python - - # Build MultiIndex - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), - ('b', 2), ('b', 1), ('b', 1)]) - idx.names = ['first', 'second'] - - # Build DataFrame - df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, - index=idx) - df_multi - - # Sort by 'second' (index) and 'A' (column) - df_multi.sort_values(by=['second', 'A']) - - -.. _whatsnew_023.enhancements.extension: - -Extending pandas with custom types (experimental) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -pandas now supports storing array-like objects that aren't necessarily 1-D NumPy -arrays as columns in a DataFrame or values in a Series. This allows third-party -libraries to implement extensions to NumPy's types, similar to how pandas -implemented categoricals, datetimes with timezones, periods, and intervals. - -As a demonstration, we'll use cyberpandas_, which provides an ``IPArray`` type -for storing ip addresses. - -.. code-block:: ipython - - In [1]: from cyberpandas import IPArray - - In [2]: values = IPArray([ - ...: 0, - ...: 3232235777, - ...: 42540766452641154071740215577757643572 - ...: ]) - ...: - ...: - -``IPArray`` isn't a normal 1-D NumPy array, but because it's a pandas -:class:`~pandas.api.extensions.ExtensionArray`, it can be stored properly inside pandas' containers. - -.. code-block:: ipython - - In [3]: ser = pd.Series(values) - - In [4]: ser - Out[4]: - 0 0.0.0.0 - 1 192.168.1.1 - 2 2001:db8:85a3::8a2e:370:7334 - dtype: ip - -Notice that the dtype is ``ip``. The missing value semantics of the underlying -array are respected: - -.. code-block:: ipython - - In [5]: ser.isna() - Out[5]: - 0 True - 1 False - 2 False - dtype: bool - -For more, see the :ref:`extension types ` -documentation. If you build an extension array, publicize it on `the ecosystem page `_. - -.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest/ - - -.. _whatsnew_0230.enhancements.categorical_grouping: - -New ``observed`` keyword for excluding unobserved categories in ``GroupBy`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Grouping by a categorical includes the unobserved categories in the output. -When grouping by multiple categorical columns, this means you get the cartesian product of all the -categories, including combinations where there are no observations, which can result in a large -number of groups. We have added a keyword ``observed`` to control this behavior, it defaults to -``observed=False`` for backward-compatibility. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`, :issue:`20902`) - -.. ipython:: python - - cat1 = pd.Categorical(["a", "a", "b", "b"], - categories=["a", "b", "z"], ordered=True) - cat2 = pd.Categorical(["c", "d", "c", "d"], - categories=["c", "d", "y"], ordered=True) - df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) - df['C'] = ['foo', 'bar'] * 2 - df - -To show all values, the previous behavior: - -.. ipython:: python - - df.groupby(['A', 'B', 'C'], observed=False).count() - - -To show only observed values: - -.. ipython:: python - - df.groupby(['A', 'B', 'C'], observed=True).count() - -For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: - -.. ipython:: python - - cat1 = pd.Categorical(["a", "a", "b", "b"], - categories=["a", "b", "z"], ordered=True) - cat2 = pd.Categorical(["c", "d", "c", "d"], - categories=["c", "d", "y"], ordered=True) - df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) - df - - -.. code-block:: ipython - - In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True) - - Out[1]: - values - A B - a c 1.0 - d 2.0 - b c 3.0 - d 4.0 - - In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False) - - Out[2]: - values - A B - a c 1.0 - d 2.0 - y NaN - b c 3.0 - d 4.0 - y NaN - z c NaN - d NaN - y NaN - - -.. _whatsnew_0230.enhancements.window_raw: - -Rolling/Expanding.apply() accepts ``raw=False`` to pass a ``Series`` to the function -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:func:`Series.rolling().apply() <.Rolling.apply>`, :func:`DataFrame.rolling().apply() <.Rolling.apply>`, -:func:`Series.expanding().apply() <.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <.Expanding.apply>` have gained a ``raw=None`` parameter. -This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The -default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``. -In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`) - -.. ipython:: python - - s = pd.Series(np.arange(5), np.arange(5) + 1) - s - -Pass a ``Series``: - -.. ipython:: python - - s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False) - -Mimic the original behavior of passing a ndarray: - -.. ipython:: python - - s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True) - - -.. _whatsnew_0210.enhancements.limit_area: - - .. _whatsnew_0.23.0.contributors: Contributors From a92650d9909cdf69fab3d38f7511255d978da969 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 10:03:08 -0400 Subject: [PATCH 21/26] Debug --- doc/source/whatsnew/v0.23.0.rst | 91 --------------------------------- 1 file changed, 91 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 25edf3b9935d5..68fbaf315e8d6 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -102,97 +102,6 @@ A ``DataFrame`` can now be written to and subsequently read back via JSON while Please note that the string ``index`` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. -.. ipython:: python - :okwarning: - - df.index.name = 'index' - - df.to_json('test.json', orient='table') - new_df = pd.read_json('test.json', orient='table') - new_df - new_df.dtypes - -.. ipython:: python - :suppress: - - import os - os.remove('test.json') - - -.. _whatsnew_0230.enhancements.assign_dependent: - - -Method ``.assign()`` accepts dependent arguments -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The :func:`DataFrame.assign` now accepts dependent keyword arguments for python version later than 3.6 (see also `PEP 468 -`_). Later keyword arguments may now refer to earlier ones if the argument is a callable. See the -:ref:`documentation here ` (:issue:`14207`) - -.. ipython:: python - - df = pd.DataFrame({'A': [1, 2, 3]}) - df - df.assign(B=df.A, C=lambda x: x['A'] + x['B']) - -.. warning:: - - This may subtly change the behavior of your code when you're - using ``.assign()`` to update an existing column. Previously, callables - referring to other variables being updated would get the "old" values - - Previous behavior: - - .. code-block:: ipython - - In [2]: df = pd.DataFrame({"A": [1, 2, 3]}) - - In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1) - Out[3]: - A C - 0 2 -1 - 1 3 -2 - 2 4 -3 - - New behavior: - - .. ipython:: python - - df.assign(A=df.A + 1, C=lambda df: df.A * -1) - - - -.. _whatsnew_0230.enhancements.merge_on_columns_and_levels: - -Merging on a combination of columns and index levels -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Strings passed to :meth:`DataFrame.merge` as the ``on``, ``left_on``, and ``right_on`` -parameters may now refer to either column names or index level names. -This enables merging ``DataFrame`` instances on a combination of index levels -and columns without resetting indexes. See the :ref:`Merge on columns and -levels ` documentation section. -(:issue:`14355`) - -.. ipython:: python - - left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') - - left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], - 'B': ['B0', 'B1', 'B2', 'B3'], - 'key2': ['K0', 'K1', 'K0', 'K1']}, - index=left_index) - - right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') - - right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], - 'D': ['D0', 'D1', 'D2', 'D3'], - 'key2': ['K0', 'K0', 'K0', 'K1']}, - index=right_index) - - left.merge(right, on=['key1', 'key2']) - -.. _whatsnew_0230.enhancements.sort_by_columns_and_levels: .. _whatsnew_0.23.0.contributors: From a034332c9429ad5cc41d09b06acf7a72064ecce6 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 10:23:43 -0400 Subject: [PATCH 22/26] Debug --- doc/source/whatsnew/v0.23.0.rst | 113 ++++++++------------------------ 1 file changed, 29 insertions(+), 84 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 68fbaf315e8d6..628e7ed88f48a 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -11,101 +11,46 @@ What's new in 0.23.0 (May 15, 2018) from pandas import * # noqa F401, F403 -This is a major release from 0.22.0 and includes a number of API changes, -deprecations, new features, enhancements, and performance improvements along -with a large number of bug fixes. We recommend that all users upgrade to this -version. - -Highlights include: - -- :ref:`Round-trippable JSON format with 'table' orient `. -- :ref:`Instantiation from dicts respects order for Python 3.6+ `. -- :ref:`Dependent column arguments for assign `. -- :ref:`Merging / sorting on a combination of columns and index levels `. -- :ref:`Extending pandas with custom types `. -- :ref:`Excluding unobserved categories from groupby `. -- :ref:`Changes to make output shape of DataFrame.apply consistent `. - -Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. - -.. warning:: - - Starting January 1, 2019, pandas feature releases will support Python 3 only. - See `Dropping Python 2.7 `_ for more. - -.. contents:: What's new in v0.23.0 - :local: - :backlinks: none - :depth: 2 - -.. _whatsnew_0230.enhancements: - -New features -~~~~~~~~~~~~ - -.. _whatsnew_0230.enhancements.round-trippable_json: - -JSON read/write round-trippable with ``orient='table'`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata. - -.. code-block:: ipython +.. ipython:: python + :okwarning: - In [1]: df = pd.DataFrame({'foo': [1, 2, 3, 4], - ...: 'bar': ['a', 'b', 'c', 'd'], - ...: 'baz': pd.date_range('2018-01-01', freq='D', periods=4), - ...: 'qux': pd.Categorical(['a', 'b', 'c', 'c'])}, - ...: index=pd.Index(range(4), name='idx')) + df.index.name = 'index' - In [2]: df - Out[2]: - foo bar baz qux - idx - 0 1 a 2018-01-01 a - 1 2 b 2018-01-02 b - 2 3 c 2018-01-03 c - 3 4 d 2018-01-04 c + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes - [4 rows x 4 columns] +.. ipython:: python + :suppress: - In [3]: df.dtypes - Out[3]: - foo int64 - bar object - baz datetime64[ns] - qux category - Length: 4, dtype: object + import os + os.remove('test.json') - In [4]: df.to_json('test.json', orient='table') +.. ipython:: python - In [5]: new_df = pd.read_json('test.json', orient='table') + df = pd.DataFrame({'A': [1, 2, 3]}) + df + df.assign(B=df.A, C=lambda x: x['A'] + x['B']) - In [6]: new_df - Out[6]: - foo bar baz qux - idx - 0 1 a 2018-01-01 a - 1 2 b 2018-01-02 b - 2 3 c 2018-01-03 c - 3 4 d 2018-01-04 c + .. ipython:: python - [4 rows x 4 columns] + df.assign(A=df.A + 1, C=lambda df: df.A * -1) - In [7]: new_df.dtypes - Out[7]: - foo int64 - bar object - baz datetime64[ns] - qux category - Length: 4, dtype: object +.. ipython:: python -Please note that the string ``index`` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. + left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key2': ['K0', 'K1', 'K0', 'K1']}, + index=left_index) -.. _whatsnew_0.23.0.contributors: + right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') -Contributors -~~~~~~~~~~~~ + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3'], + 'key2': ['K0', 'K0', 'K0', 'K1']}, + index=right_index) -.. contributors:: v0.22.0..v0.23.0 + left.merge(right, on=['key1', 'key2']) From 0b0a8501d467c5f146bdae409a7fb6fb6d79e3a7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 10:33:50 -0400 Subject: [PATCH 23/26] Debug --- doc/source/whatsnew/v0.23.0.rst | 34 --------------------------------- 1 file changed, 34 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 628e7ed88f48a..b2e0b6533e929 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -20,37 +20,3 @@ What's new in 0.23.0 (May 15, 2018) new_df = pd.read_json('test.json', orient='table') new_df new_df.dtypes - -.. ipython:: python - :suppress: - - import os - os.remove('test.json') - -.. ipython:: python - - df = pd.DataFrame({'A': [1, 2, 3]}) - df - df.assign(B=df.A, C=lambda x: x['A'] + x['B']) - - .. ipython:: python - - df.assign(A=df.A + 1, C=lambda df: df.A * -1) - -.. ipython:: python - - left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') - - left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], - 'B': ['B0', 'B1', 'B2', 'B3'], - 'key2': ['K0', 'K1', 'K0', 'K1']}, - index=left_index) - - right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') - - right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], - 'D': ['D0', 'D1', 'D2', 'D3'], - 'key2': ['K0', 'K0', 'K0', 'K1']}, - index=right_index) - - left.merge(right, on=['key1', 'key2']) From 747f5ce2a8210a45ab7b5c154f1be2e57a05db5a Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 10:55:37 -0400 Subject: [PATCH 24/26] Debug --- doc/source/whatsnew/v0.23.0.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index b2e0b6533e929..29e13780dd326 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -14,6 +14,16 @@ What's new in 0.23.0 (May 15, 2018) .. ipython:: python :okwarning: + df = pd.DataFrame( + { + 'foo': [1, 2, 3, 4], + 'bar': ['a', 'b', 'c', 'd'], + 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + 'qux': pd.Categorical(['a', 'b', 'c', 'c']) + }, + index=pd.Index(range(4), name='idx') + ) + df.index.name = 'index' df.to_json('test.json', orient='table') From 0b111fa89680a90b4186142881c272dd7bb68e6e Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 11:15:27 -0400 Subject: [PATCH 25/26] No whammies! --- doc/source/whatsnew/v0.23.0.rst | 1513 +++++++++++++++++++++++++++++++ 1 file changed, 1513 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 29e13780dd326..bc1841331d700 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -11,6 +11,97 @@ What's new in 0.23.0 (May 15, 2018) from pandas import * # noqa F401, F403 +This is a major release from 0.22.0 and includes a number of API changes, +deprecations, new features, enhancements, and performance improvements along +with a large number of bug fixes. We recommend that all users upgrade to this +version. + +Highlights include: + +- :ref:`Round-trippable JSON format with 'table' orient `. +- :ref:`Instantiation from dicts respects order for Python 3.6+ `. +- :ref:`Dependent column arguments for assign `. +- :ref:`Merging / sorting on a combination of columns and index levels `. +- :ref:`Extending pandas with custom types `. +- :ref:`Excluding unobserved categories from groupby `. +- :ref:`Changes to make output shape of DataFrame.apply consistent `. + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.0 + :local: + :backlinks: none + :depth: 2 + +.. _whatsnew_0230.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0230.enhancements.round-trippable_json: + +JSON read/write round-trippable with ``orient='table'`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata. + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'foo': [1, 2, 3, 4], + ...: 'bar': ['a', 'b', 'c', 'd'], + ...: 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + ...: 'qux': pd.Categorical(['a', 'b', 'c', 'c'])}, + ...: index=pd.Index(range(4), name='idx')) + + In [2]: df + Out[2]: + foo bar baz qux + idx + 0 1 a 2018-01-01 a + 1 2 b 2018-01-02 b + 2 3 c 2018-01-03 c + 3 4 d 2018-01-04 c + + [4 rows x 4 columns] + + In [3]: df.dtypes + Out[3]: + foo int64 + bar object + baz datetime64[ns] + qux category + Length: 4, dtype: object + + In [4]: df.to_json('test.json', orient='table') + + In [5]: new_df = pd.read_json('test.json', orient='table') + + In [6]: new_df + Out[6]: + foo bar baz qux + idx + 0 1 a 2018-01-01 a + 1 2 b 2018-01-02 b + 2 3 c 2018-01-03 c + 3 4 d 2018-01-04 c + + [4 rows x 4 columns] + + In [7]: new_df.dtypes + Out[7]: + foo int64 + bar object + baz datetime64[ns] + qux category + Length: 4, dtype: object + +Please note that the string ``index`` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. + .. ipython:: python :okwarning: @@ -30,3 +121,1425 @@ What's new in 0.23.0 (May 15, 2018) new_df = pd.read_json('test.json', orient='table') new_df new_df.dtypes + +.. ipython:: python + :suppress: + + import os + os.remove('test.json') + + +.. _whatsnew_0230.enhancements.assign_dependent: + + +Method ``.assign()`` accepts dependent arguments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`DataFrame.assign` now accepts dependent keyword arguments for python version later than 3.6 (see also `PEP 468 +`_). Later keyword arguments may now refer to earlier ones if the argument is a callable. See the +:ref:`documentation here ` (:issue:`14207`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3]}) + df + df.assign(B=df.A, C=lambda x: x['A'] + x['B']) + +.. warning:: + + This may subtly change the behavior of your code when you're + using ``.assign()`` to update an existing column. Previously, callables + referring to other variables being updated would get the "old" values + + Previous behavior: + + .. code-block:: ipython + + In [2]: df = pd.DataFrame({"A": [1, 2, 3]}) + + In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1) + Out[3]: + A C + 0 2 -1 + 1 3 -2 + 2 4 -3 + + New behavior: + + .. ipython:: python + + df.assign(A=df.A + 1, C=lambda df: df.A * -1) + + + +.. _whatsnew_0230.enhancements.merge_on_columns_and_levels: + +Merging on a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.merge` as the ``on``, ``left_on``, and ``right_on`` +parameters may now refer to either column names or index level names. +This enables merging ``DataFrame`` instances on a combination of index levels +and columns without resetting indexes. See the :ref:`Merge on columns and +levels ` documentation section. +(:issue:`14355`) + +.. ipython:: python + + left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key2': ['K0', 'K1', 'K0', 'K1']}, + index=left_index) + + right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') + + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3'], + 'key2': ['K0', 'K0', 'K0', 'K1']}, + index=right_index) + + left.merge(right, on=['key1', 'key2']) + +.. _whatsnew_0230.enhancements.sort_by_columns_and_levels: + +Sorting by a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.sort_values` as the ``by`` parameter may +now refer to either column names or index level names. This enables sorting +``DataFrame`` instances by a combination of index levels and columns without +resetting indexes. See the :ref:`Sorting by Indexes and Values +` documentation section. +(:issue:`14353`) + +.. ipython:: python + + # Build MultiIndex + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), + ('b', 2), ('b', 1), ('b', 1)]) + idx.names = ['first', 'second'] + + # Build DataFrame + df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, + index=idx) + df_multi + + # Sort by 'second' (index) and 'A' (column) + df_multi.sort_values(by=['second', 'A']) + + +.. _whatsnew_023.enhancements.extension: + +Extending pandas with custom types (experimental) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas now supports storing array-like objects that aren't necessarily 1-D NumPy +arrays as columns in a DataFrame or values in a Series. This allows third-party +libraries to implement extensions to NumPy's types, similar to how pandas +implemented categoricals, datetimes with timezones, periods, and intervals. + +As a demonstration, we'll use cyberpandas_, which provides an ``IPArray`` type +for storing ip addresses. + +.. code-block:: ipython + + In [1]: from cyberpandas import IPArray + + In [2]: values = IPArray([ + ...: 0, + ...: 3232235777, + ...: 42540766452641154071740215577757643572 + ...: ]) + ...: + ...: + +``IPArray`` isn't a normal 1-D NumPy array, but because it's a pandas +:class:`~pandas.api.extensions.ExtensionArray`, it can be stored properly inside pandas' containers. + +.. code-block:: ipython + + In [3]: ser = pd.Series(values) + + In [4]: ser + Out[4]: + 0 0.0.0.0 + 1 192.168.1.1 + 2 2001:db8:85a3::8a2e:370:7334 + dtype: ip + +Notice that the dtype is ``ip``. The missing value semantics of the underlying +array are respected: + +.. code-block:: ipython + + In [5]: ser.isna() + Out[5]: + 0 True + 1 False + 2 False + dtype: bool + +For more, see the :ref:`extension types ` +documentation. If you build an extension array, publicize it on `the ecosystem page `_. + +.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest/ + + +.. _whatsnew_0230.enhancements.categorical_grouping: + +New ``observed`` keyword for excluding unobserved categories in ``GroupBy`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Grouping by a categorical includes the unobserved categories in the output. +When grouping by multiple categorical columns, this means you get the cartesian product of all the +categories, including combinations where there are no observations, which can result in a large +number of groups. We have added a keyword ``observed`` to control this behavior, it defaults to +``observed=False`` for backward-compatibility. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`, :issue:`20902`) + +.. ipython:: python + + cat1 = pd.Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + cat2 = pd.Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df['C'] = ['foo', 'bar'] * 2 + df + +To show all values, the previous behavior: + +.. ipython:: python + + df.groupby(['A', 'B', 'C'], observed=False).count() + + +To show only observed values: + +.. ipython:: python + + df.groupby(['A', 'B', 'C'], observed=True).count() + +For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: + +.. ipython:: python + + cat1 = pd.Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + cat2 = pd.Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df + + +.. code-block:: ipython + + In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True) + + Out[1]: + values + A B + a c 1.0 + d 2.0 + b c 3.0 + d 4.0 + + In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False) + + Out[2]: + values + A B + a c 1.0 + d 2.0 + y NaN + b c 3.0 + d 4.0 + y NaN + z c NaN + d NaN + y NaN + + +.. _whatsnew_0230.enhancements.window_raw: + +Rolling/Expanding.apply() accepts ``raw=False`` to pass a ``Series`` to the function +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`Series.rolling().apply() <.Rolling.apply>`, :func:`DataFrame.rolling().apply() <.Rolling.apply>`, +:func:`Series.expanding().apply() <.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <.Expanding.apply>` have gained a ``raw=None`` parameter. +This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The +default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``. +In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`) + +.. ipython:: python + + s = pd.Series(np.arange(5), np.arange(5) + 1) + s + +Pass a ``Series``: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False) + +Mimic the original behavior of passing a ndarray: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True) + + +.. _whatsnew_0210.enhancements.limit_area: + +``DataFrame.interpolate`` has gained the ``limit_area`` kwarg +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.interpolate` has gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. +Use ``limit_area='inside'`` to fill only NaNs surrounded by valid values or use ``limit_area='outside'`` to fill only ``NaN`` s +outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. + + +.. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, + np.nan, 13, np.nan, np.nan]) + ser + +Fill one consecutive inside value in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + +Fill all consecutive outside values backward + +.. ipython:: python + + ser.interpolate(limit_direction='backward', limit_area='outside') + +Fill all consecutive outside values in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='outside') + +.. _whatsnew_0210.enhancements.get_dummies_dtype: + +Function ``get_dummies`` now supports ``dtype`` argument +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtype for the new columns. The default remains uint8. (:issue:`18330`) + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) + pd.get_dummies(df, columns=['c']).dtypes + pd.get_dummies(df, columns=['c'], dtype=bool).dtypes + + +.. _whatsnew_0230.enhancements.timedelta_mod: + +Timedelta mod method +^^^^^^^^^^^^^^^^^^^^ + +``mod`` (%) and ``divmod`` operations are now defined on ``Timedelta`` objects +when operating with either timedelta-like or with numeric arguments. +See the :ref:`documentation here `. (:issue:`19365`) + +.. ipython:: python + + td = pd.Timedelta(hours=37) + td % pd.Timedelta(minutes=45) + +.. _whatsnew_0230.enhancements.ran_inf: + +Method ``.rank()`` handles ``inf`` values when ``NaN`` are present +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, ``.rank()`` would assign ``inf`` elements ``NaN`` as their ranks. Now ranks are calculated properly. (:issue:`6945`) + +.. ipython:: python + + s = pd.Series([-np.inf, 0, 1, np.nan, np.inf]) + s + +Previous behavior: + +.. code-block:: ipython + + In [11]: s.rank() + Out[11]: + 0 1.0 + 1 2.0 + 2 3.0 + 3 NaN + 4 NaN + dtype: float64 + +Current behavior: + +.. ipython:: python + + s.rank() + +Furthermore, previously if you rank ``inf`` or ``-inf`` values together with ``NaN`` values, the calculation won't distinguish ``NaN`` from infinity when using 'top' or 'bottom' argument. + +.. ipython:: python + + s = pd.Series([np.nan, np.nan, -np.inf, -np.inf]) + s + +Previous behavior: + +.. code-block:: ipython + + In [15]: s.rank(na_option='top') + Out[15]: + 0 2.5 + 1 2.5 + 2 2.5 + 3 2.5 + dtype: float64 + +Current behavior: + +.. ipython:: python + + s.rank(na_option='top') + +These bugs were squashed: + +- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) +- Bug in :func:`DataFrameGroupBy.rank` where ranks were incorrect when both infinity and ``NaN`` were present (:issue:`20561`) + + +.. _whatsnew_0230.enhancements.str_cat_align: + +``Series.str.cat`` has gained the ``join`` kwarg +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :meth:`Series.str.cat` did not -- in contrast to most of ``pandas`` -- align :class:`Series` on their index before concatenation (see :issue:`18657`). +The method has now gained a keyword ``join`` to control the manner of alignment, see examples below and :ref:`here `. + +In v.0.23 ``join`` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. + +.. ipython:: python + :okwarning: + + s = pd.Series(['a', 'b', 'c', 'd']) + t = pd.Series(['b', 'd', 'e', 'c'], index=[1, 3, 4, 2]) + s.str.cat(t) + s.str.cat(t, join='left', na_rep='-') + +Furthermore, :meth:`Series.str.cat` now works for ``CategoricalIndex`` as well (previously raised a ``ValueError``; see :issue:`20842`). + +.. _whatsnew_0230.enhancements.astype_category: + +``DataFrame.astype`` performs column-wise conversion to ``Categorical`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.astype` can now perform column-wise conversion to ``Categorical`` by supplying the string ``'category'`` or +a :class:`~pandas.api.types.CategoricalDtype`. Previously, attempting this would raise a ``NotImplementedError``. See the +:ref:`categorical.objectcreation` section of the documentation for more details and examples. (:issue:`12860`, :issue:`18099`) + +Supplying the string ``'category'`` performs column-wise conversion, with only labels appearing in a given column set as categories: + +.. ipython:: python + + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + df = df.astype('category') + df['A'].dtype + df['B'].dtype + + +Supplying a ``CategoricalDtype`` will make the categories in each column consistent with the supplied dtype: + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + cdt = CategoricalDtype(categories=list('abcd'), ordered=True) + df = df.astype(cdt) + df['A'].dtype + df['B'].dtype + + +.. _whatsnew_0230.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Unary ``+`` now permitted for ``Series`` and ``DataFrame`` as numeric operator (:issue:`16073`) +- Better support for :meth:`~pandas.io.formats.style.Styler.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`) +- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`) +- :func:`MultiIndex.unique` now supports the ``level=`` argument, to get unique values from a specific index level (:issue:`17896`) +- :class:`pandas.io.formats.style.Styler` now has method ``hide_index()`` to determine whether the index will be rendered in output (:issue:`14194`) +- :class:`pandas.io.formats.style.Styler` now has method ``hide_columns()`` to determine whether columns will be hidden in output (:issue:`14194`) +- Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`) +- :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`) +- :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`) +- Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) +- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). +- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) +- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) +- :func:`read_excel` has gained the ``nrows`` parameter (:issue:`16645`) +- :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`) +- :meth:`DataFrame.to_json` and :meth:`Series.to_json` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) +- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) +- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` + can now take a callable as their argument (:issue:`18862`) +- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`) +- ``Resampler`` objects now have a functioning :attr:`.Resampler.pipe` method. + Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). +- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`). +- Added :func:`pandas.api.extensions.register_dataframe_accessor`, + :func:`pandas.api.extensions.register_series_accessor`, and + :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas + to register custom accessors like ``.cat`` on pandas objects. See + :ref:`Registering Custom Accessors ` for more (:issue:`14781`). + +- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) +- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) +- Added :func:`.SeriesGroupBy.is_monotonic_increasing` and :func:`.SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) +- For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) +- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) +- Added option ``display.html.use_mathjax`` so `MathJax `_ can be disabled when rendering tables in ``Jupyter`` notebooks (:issue:`19856`, :issue:`19824`) +- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) +- :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) +- :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) +- :meth:`DataFrame.to_sql` now performs a multi-value insert if the underlying connection supports itk rather than inserting row by row. + ``SQLAlchemy`` dialects supporting multi-value inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) +- :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) +- :func:`read_html` now reads all ```` elements in a ``
``, not just the first. (:issue:`20690`) +- :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile` now accept the ``interpolation`` keyword, ``linear`` by default (:issue:`20497`) +- zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`) +- :class:`~pandas.tseries.offsets.WeekOfMonth` constructor now supports ``n=0`` (:issue:`20517`). +- :class:`DataFrame` and :class:`Series` now support matrix multiplication (``@``) operator (:issue:`10259`) for Python>=3.5 +- Updated :meth:`DataFrame.to_gbq` and :meth:`pandas.read_gbq` signature and documentation to reflect changes from + the pandas-gbq library version 0.4.0. Adds intersphinx mapping to pandas-gbq + library. (:issue:`20564`) +- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`) +- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`) +- :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`) +- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`) + +.. _whatsnew_0230.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0230.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15184`). +If installed, we now require: + ++-----------------+-----------------+----------+---------------+ +| Package | Minimum Version | Required | Issue | ++=================+=================+==========+===============+ +| python-dateutil | 2.5.0 | X | :issue:`15184`| ++-----------------+-----------------+----------+---------------+ +| openpyxl | 2.4.0 | | :issue:`15184`| ++-----------------+-----------------+----------+---------------+ +| beautifulsoup4 | 4.2.1 | | :issue:`20082`| ++-----------------+-----------------+----------+---------------+ +| setuptools | 24.2.0 | | :issue:`20698`| ++-----------------+-----------------+----------+---------------+ + +.. _whatsnew_0230.api_breaking.dict_insertion_order: + +Instantiation from dicts preserves dict insertion order for Python 3.6+ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Until Python 3.6, dicts in Python had no formally defined ordering. For Python +version 3.6 and later, dicts are ordered by insertion order, see +`PEP 468 `_. +pandas will use the dict's insertion order, when creating a ``Series`` or +``DataFrame`` from a dict and you're using Python version 3.6 or +higher. (:issue:`19884`) + +Previous behavior (and current behavior if on Python < 3.6): + +.. code-block:: ipython + + In [16]: pd.Series({'Income': 2000, + ....: 'Expenses': -1500, + ....: 'Taxes': -200, + ....: 'Net result': 300}) + Out[16]: + Expenses -1500 + Income 2000 + Net result 300 + Taxes -200 + dtype: int64 + +Note the Series above is ordered alphabetically by the index values. + +New behavior (for Python >= 3.6): + +.. ipython:: python + + pd.Series({'Income': 2000, + 'Expenses': -1500, + 'Taxes': -200, + 'Net result': 300}) + +Notice that the Series is now ordered by insertion order. This new behavior is +used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries`` +and ``SparseDataFrame``). + +If you wish to retain the old behavior while using Python >= 3.6, you can use +``.sort_index()``: + +.. ipython:: python + + pd.Series({'Income': 2000, + 'Expenses': -1500, + 'Taxes': -200, + 'Net result': 300}).sort_index() + +.. _whatsnew_0230.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` was deprecated in the 0.20.x release, showing as a ``DeprecationWarning``. Using ``Panel`` will now show a ``FutureWarning``. The recommended way to represent 3-D data are +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`13563`, :issue:`18324`). + +.. code-block:: ipython + + In [75]: import pandas._testing as tm + + In [76]: p = tm.makePanel() + + In [77]: p + Out[77]: + + Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + +Convert to a MultiIndex DataFrame + +.. code-block:: ipython + + In [78]: p.to_frame() + Out[78]: + ItemA ItemB ItemC + major minor + 2000-01-03 A 0.469112 0.721555 0.404705 + B -1.135632 0.271860 -1.039268 + C 0.119209 0.276232 -1.344312 + D -2.104569 0.113648 -0.109050 + 2000-01-04 A -0.282863 -0.706771 0.577046 + B 1.212112 -0.424972 -0.370647 + C -1.044236 -1.087401 0.844885 + D -0.494929 -1.478427 1.643563 + 2000-01-05 A -1.509059 -1.039575 -1.715002 + B -0.173215 0.567020 -1.157892 + C -0.861849 -0.673690 1.075770 + D 1.071804 0.524988 -1.469388 + + [12 rows x 3 columns] + +Convert to an xarray DataArray + +.. code-block:: ipython + + In [79]: p.to_xarray() + Out[79]: + + array([[[ 0.469112, -1.135632, 0.119209, -2.104569], + [-0.282863, 1.212112, -1.044236, -0.494929], + [-1.509059, -0.173215, -0.861849, 1.071804]], + + [[ 0.721555, 0.27186 , 0.276232, 0.113648], + [-0.706771, -0.424972, -1.087401, -1.478427], + [-1.039575, 0.56702 , -0.67369 , 0.524988]], + + [[ 0.404705, -1.039268, -1.344312, -0.10905 ], + [ 0.577046, -0.370647, 0.844885, 1.643563], + [-1.715002, -1.157892, 1.07577 , -1.469388]]]) + Coordinates: + * items (items) object 'ItemA' 'ItemB' 'ItemC' + * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 + * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' + + +.. _whatsnew_0230.api_breaking.core_common: + +pandas.core.common removals +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following error & warning messages are removed from ``pandas.core.common`` (:issue:`13634`, :issue:`19769`): + +- ``PerformanceWarning`` +- ``UnsupportedFunctionCall`` +- ``UnsortedIndexError`` +- ``AbstractMethodError`` + +These are available from import from ``pandas.errors`` (since 0.19.0). + + +.. _whatsnew_0230.api_breaking.apply: + +Changes to make output of ``DataFrame.apply`` consistent +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies +are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case +where a list-like (e.g. ``tuple`` or ``list`` is returned) (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, +:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`). + +.. ipython:: python + + df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + df + +Previous behavior: if the returned shape happened to match the length of original columns, this would return a ``DataFrame``. +If the return shape did not match, a ``Series`` with lists was returned. + +.. code-block:: python + + In [3]: df.apply(lambda x: [1, 2, 3], axis=1) + Out[3]: + A B C + 0 1 2 3 + 1 1 2 3 + 2 1 2 3 + 3 1 2 3 + 4 1 2 3 + 5 1 2 3 + + In [4]: df.apply(lambda x: [1, 2], axis=1) + Out[4]: + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + 3 [1, 2] + 4 [1, 2] + 5 [1, 2] + dtype: object + + +New behavior: When the applied function returns a list-like, this will now *always* return a ``Series``. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1) + df.apply(lambda x: [1, 2], axis=1) + +To have expanded columns, you can use ``result_type='expand'`` + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') + +To broadcast the result across the original columns (the old behaviour for +list-likes of the correct length), you can use ``result_type='broadcast'``. +The shape must match the original columns. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') + +Returning a ``Series`` allows one to control the exact return structure and column names: + +.. ipython:: python + + df.apply(lambda x: pd.Series([1, 2, 3], index=['D', 'E', 'F']), axis=1) + +.. _whatsnew_0230.api_breaking.concat: + +Concatenation will no longer sort +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned. +The current behavior is the same as the previous (sorting), but now a warning is issued when ``sort`` is not specified and the non-concatenation axis is not aligned (:issue:`4588`). + +.. ipython:: python + :okwarning: + + df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a']) + df2 = pd.DataFrame({"a": [4, 5]}) + + pd.concat([df1, df2]) + +To keep the previous behavior (sorting) and silence the warning, pass ``sort=True`` + +.. ipython:: python + + pd.concat([df1, df2], sort=True) + +To accept the future behavior (no sorting), pass ``sort=False`` + +.. ipython + + pd.concat([df1, df2], sort=False) + +Note that this change also applies to :meth:`DataFrame.append`, which has also received a ``sort`` keyword for controlling this behavior. + + +.. _whatsnew_0230.api_breaking.build_changes: + +Build changes +^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) +- Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) +- Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) + +.. _whatsnew_0230.api_breaking.index_division_by_zero: + +Index division by zero fills correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and ``0 / 0`` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) + +Previous behavior: + +.. code-block:: ipython + + In [6]: index = pd.Int64Index([-1, 0, 1]) + + In [7]: index / 0 + Out[7]: Int64Index([0, 0, 0], dtype='int64') + + # Previous behavior yielded different results depending on the type of zero in the divisor + In [8]: index / 0.0 + Out[8]: Float64Index([-inf, nan, inf], dtype='float64') + + In [9]: index = pd.UInt64Index([0, 1]) + + In [10]: index / np.array([0, 0], dtype=np.uint64) + Out[10]: UInt64Index([0, 0], dtype='uint64') + + In [11]: pd.RangeIndex(1, 5) / 0 + ZeroDivisionError: integer division or modulo by zero + +Current behavior: + +.. code-block:: ipython + + In [12]: index = pd.Int64Index([-1, 0, 1]) + # division by zero gives -infinity where negative, + # +infinity where positive, and NaN for 0 / 0 + In [13]: index / 0 + + # The result of division by zero should not depend on + # whether the zero is int or float + In [14]: index / 0.0 + + In [15]: index = pd.UInt64Index([0, 1]) + In [16]: index / np.array([0, 0], dtype=np.uint64) + + In [17]: pd.RangeIndex(1, 5) / 0 + +.. _whatsnew_0230.api_breaking.extract: + +Extraction of matching patterns from strings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, extracting matching patterns from strings with :func:`str.extract` used to return a +``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was +extracted). As of pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless +``expand`` is set to ``False``. Finally, ``None`` was an accepted value for +the ``expand`` parameter (which was equivalent to ``False``), but now raises a ``ValueError``. (:issue:`11386`) + +Previous behavior: + +.. code-block:: ipython + + In [1]: s = pd.Series(['number 10', '12 eggs']) + + In [2]: extracted = s.str.extract(r'.*(\d\d).*') + + In [3]: extracted + Out [3]: + 0 10 + 1 12 + dtype: object + + In [4]: type(extracted) + Out [4]: + pandas.core.series.Series + +New behavior: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract(r'.*(\d\d).*') + extracted + type(extracted) + +To restore previous behavior, simply set ``expand`` to ``False``: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract(r'.*(\d\d).*', expand=False) + extracted + type(extracted) + +.. _whatsnew_0230.api_breaking.cdt_ordered: + +Default value for the ``ordered`` parameter of ``CategoricalDtype`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default value of the ``ordered`` parameter for :class:`~pandas.api.types.CategoricalDtype` has changed from ``False`` to ``None`` to allow updating of ``categories`` without impacting ``ordered``. Behavior should remain consistent for downstream objects, such as :class:`Categorical` (:issue:`18790`) + +In previous versions, the default value for the ``ordered`` parameter was ``False``. This could potentially lead to the ``ordered`` parameter unintentionally being changed from ``True`` to ``False`` when users attempt to update ``categories`` if ``ordered`` is not explicitly specified, as it would silently default to ``False``. The new behavior for ``ordered=None`` is to retain the existing value of ``ordered``. + +New behavior: + +.. code-block:: ipython + + In [2]: from pandas.api.types import CategoricalDtype + + In [3]: cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) + + In [4]: cat + Out[4]: + [a, b, c, a, b, a] + Categories (3, object): [c < b < a] + + In [5]: cdt = CategoricalDtype(categories=list('cbad')) + + In [6]: cat.astype(cdt) + Out[6]: + [a, b, c, a, b, a] + Categories (4, object): [c < b < a < d] + +Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. + +Note that the unintentional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. + +.. _whatsnew_0230.api_breaking.pretty_printing: + +Better pretty-printing of DataFrames in a terminal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the default value for the maximum number of columns was +``pd.options.display.max_columns=20``. This meant that relatively wide data +frames would not fit within the terminal width, and pandas would introduce line +breaks to display these 20 columns. This resulted in an output that was +relatively difficult to read: + +.. image:: ../_static/print_df_old.png + +If Python runs in a terminal, the maximum number of columns is now determined +automatically so that the printed data frame fits within the current terminal +width (``pd.options.display.max_columns=0``) (:issue:`17023`). If Python runs +as a Jupyter kernel (such as the Jupyter QtConsole or a Jupyter notebook, as +well as in many IDEs), this value cannot be inferred automatically and is thus +set to ``20`` as in previous versions. In a terminal, this results in a much +nicer output: + +.. image:: ../_static/print_df_new.png + +Note that if you don't like the new default, you can always set this option +yourself. To revert to the old setting, you can run this line: + +.. code-block:: python + + pd.options.display.max_columns = 20 + +.. _whatsnew_0230.api.datetimelike: + +Datetimelike API changes +^^^^^^^^^^^^^^^^^^^^^^^^ + +- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) +- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) +- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) +- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError`` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) +- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) +- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) +- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mismatched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) +- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) +- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) +- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) +- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) +- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) +- :attr:`Series.first` and :attr:`DataFrame.first` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). +- :attr:`Series.last` and :attr:`DataFrame.last` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). +- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) +- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) +- :class:`Timestamp` constructor now accepts a ``nanosecond`` keyword or positional argument (:issue:`18898`) +- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) +- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) + +.. _whatsnew_0230.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`) +- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`) +- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) +- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). +- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) +- Both construction and renaming of ``Index``/``MultiIndex`` with non-hashable ``name``/``names`` will now raise ``TypeError`` (:issue:`20527`) +- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). +- :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) +- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) +- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) +- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) +- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) +- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) +- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) +- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) +- Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) +- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`) +- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) +- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) +- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`) +- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) +- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) +- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) +- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) +- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) +- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) +- :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) +- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) +- ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) +- :func:`Series.str.replace` now takes an optional ``regex`` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) +- :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) +- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). +- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) +- A user-defined-function that is passed to :func:`Series.rolling().aggregate() <.Rolling.aggregate>`, :func:`DataFrame.rolling().aggregate() <.Rolling.aggregate>`, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) +- Rolling and Expanding types raise ``NotImplementedError`` upon iteration (:issue:`11704`). + +.. _whatsnew_0230.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`). +- ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`). +- ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`) +- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated. + In the future, a tuple passed to ``'by'`` will always refer to a single key + that is the actual tuple, instead of treating the tuple as multiple keys. To + retain the previous behavior, use a list instead of a tuple (:issue:`18314`) +- ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`). +- :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) +- :meth:`ExcelFile.parse` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with :func:`read_excel` (:issue:`20920`). +- The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). +- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) +- ``DataFrame.from_items`` is deprecated. Use :func:`DataFrame.from_dict` instead, or ``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`, :issue:`17312`) +- Indexing a :class:`MultiIndex` or a :class:`FloatIndex` with a list containing some missing keys will now show a :class:`FutureWarning`, which is consistent with other types of indexes (:issue:`17758`). + +- The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) +- The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) +- The ``order`` parameter of :func:`factorize` is deprecated and will be removed in a future release (:issue:`19727`) +- :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` are deprecated in favor of :meth:`Timestamp.day_name`, :meth:`DatetimeIndex.day_name`, and :meth:`Series.dt.day_name` (:issue:`12806`) + +- ``pandas.tseries.plotting.tsplot`` is deprecated. Use :func:`Series.plot` instead (:issue:`18627`) +- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`) +- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`) +- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`). +- :func:`Series.rolling().apply() <.Rolling.apply>`, :func:`DataFrame.rolling().apply() <.Rolling.apply>`, :func:`Series.expanding().apply() <.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <.Expanding.apply>` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`) +- The ``data``, ``base``, ``strides``, ``flags`` and ``itemsize`` properties + of the ``Series`` and ``Index`` classes have been deprecated and will be + removed in a future version (:issue:`20419`). +- ``DatetimeIndex.offset`` is deprecated. Use ``DatetimeIndex.freq`` instead (:issue:`20716`) +- Floor division between an integer ndarray and a :class:`Timedelta` is deprecated. Divide by :attr:`Timedelta.value` instead (:issue:`19761`) +- Setting ``PeriodIndex.freq`` (which was not guaranteed to work correctly) is deprecated. Use :meth:`PeriodIndex.asfreq` instead (:issue:`20678`) +- ``Index.get_duplicates()`` is deprecated and will be removed in a future version (:issue:`20239`) +- The previous default behavior of negative indices in ``Categorical.take`` is deprecated. In a future version it will change from meaning missing values to meaning positional indices from the right. The future behavior is consistent with :meth:`Series.take` (:issue:`20664`). +- Passing multiple axes to the ``axis`` parameter in :func:`DataFrame.dropna` has been deprecated and will be removed in a future version (:issue:`20987`) + + +.. _whatsnew_0230.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`) +- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`). +- ``pd.tseries.util.pivot_annual`` has been removed (deprecated since v0.19). Use ``pivot_table`` instead (:issue:`18370`) +- ``pd.tseries.util.isleapyear`` has been removed (deprecated since v0.19). Use ``.is_leap_year`` property in Datetime-likes instead (:issue:`18370`) +- ``pd.ordered_merge`` has been removed (deprecated since v0.19). Use ``pd.merge_ordered`` instead (:issue:`18459`) +- The ``SparseList`` class has been removed (:issue:`14007`) +- The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`) +- ``Categorical.from_array`` has been removed (:issue:`13854`) +- The ``freq`` and ``how`` parameters have been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame + and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:`18601` & :issue:`18668`) +- ``DatetimeIndex.to_datetime``, ``Timestamp.to_datetime``, ``PeriodIndex.to_datetime``, and ``Index.to_datetime`` have been removed (:issue:`8254`, :issue:`14096`, :issue:`14113`) +- :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`) +- :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`) +- :func:`read_csv` has dropped the ``buffer_lines`` parameter (:issue:`13360`) +- :func:`read_csv` has dropped the ``compact_ints`` and ``use_unsigned`` parameters (:issue:`13323`) +- The ``Timestamp`` class has dropped the ``offset`` attribute in favor of ``freq`` (:issue:`13593`) +- The ``Series``, ``Categorical``, and ``Index`` classes have dropped the ``reshape`` method (:issue:`13012`) +- ``pandas.tseries.frequencies.get_standard_freq`` has been removed in favor of ``pandas.tseries.frequencies.to_offset(freq).rule_code`` (:issue:`13874`) +- The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) +- The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) +- The ``Panel`` class has dropped the ``to_long`` and ``toLong`` methods (:issue:`19077`) +- The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) +- The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attr:`Categorical.codes` (:issue:`7768`) +- The ``flavor`` parameter have been removed from :func:`to_sql` method (:issue:`13611`) +- The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) +- The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). + Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) +- Imports from ``pandas.core.common`` for functions such as ``is_datetime64_dtype`` are now removed. These are located in ``pandas.api.types``. (:issue:`13634`, :issue:`19769`) +- The ``infer_dst`` keyword in :meth:`Series.tz_localize`, :meth:`DatetimeIndex.tz_localize` + and :class:`DatetimeIndex` have been removed. ``infer_dst=True`` is equivalent to + ``ambiguous='infer'``, and ``infer_dst=False`` to ``ambiguous='raise'`` (:issue:`7963`). +- When ``.resample()`` was changed from an eager to a lazy operation, like ``.groupby()`` in v0.18.0, we put in place compatibility (with a ``FutureWarning``), + so operations would continue to work. This is now fully removed, so a ``Resampler`` will no longer forward compat operations (:issue:`20554`) +- Remove long deprecated ``axis=None`` parameter from ``.replace()`` (:issue:`20271`) + +.. _whatsnew_0230.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) +- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) +- :class:`DateOffset` arithmetic performance is improved (:issue:`18218`) +- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) +- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`) +- The overridden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) +- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) +- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) +- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`) +- Improved performance of :func:`IntervalIndex.symmetric_difference` (:issue:`18475`) +- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) +- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) +- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) +- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) +- Improved performance of :func:`MultiIndex.remove_unused_levels` when there are no unused levels, at the cost of a reduction in performance when there are (:issue:`19289`) +- Improved performance of :func:`Index.get_loc` for non-unique indexes (:issue:`19478`) +- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) +- Improved performance of :func:`.GroupBy.rank` (:issue:`15779`) +- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) +- Improved performance of :func:`.GroupBy.ffill` and :func:`.GroupBy.bfill` (:issue:`11296`) +- Improved performance of :func:`.GroupBy.any` and :func:`.GroupBy.all` (:issue:`15435`) +- Improved performance of :func:`.GroupBy.pct_change` (:issue:`19165`) +- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) +- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) +- Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) +- Improved performance of :func:`.Categorical.from_codes` (:issue:`18501`) + +.. _whatsnew_0230.docs: + +Documentation changes +~~~~~~~~~~~~~~~~~~~~~ + +Thanks to all of the contributors who participated in the pandas Documentation +Sprint, which took place on March 10th. We had about 500 participants from over +30 locations across the world. You should notice that many of the +:ref:`API docstrings ` have greatly improved. + +There were too many simultaneous contributions to include a release note for each +improvement, but this `GitHub search`_ should give you an idea of how many docstrings +were improved. + +Special thanks to `Marc Garcia`_ for organizing the sprint. For more information, +read the `NumFOCUS blogpost`_ recapping the sprint. + +.. _GitHub search: https://github.com/pandas-dev/pandas/pulls?utf8=%E2%9C%93&q=is%3Apr+label%3ADocs+created%3A2018-03-10..2018-03-15+ +.. _NumFOCUS blogpost: https://www.numfocus.org/blog/worldwide-pandas-sprint/ +.. _Marc Garcia: https://github.com/datapythonista + +- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`) +- Consistency when introducing code samples, using either colon or period. + Rewrote some sentences for greater clarity, added more dynamic references + to functions, methods and classes. + (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) +- Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) + +.. _whatsnew_0230.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +.. warning:: + + A class of bugs were introduced in pandas 0.21 with ``CategoricalDtype`` that + affects the correctness of operations like ``merge``, ``concat``, and + indexing when comparing multiple unordered ``Categorical`` arrays that have + the same categories, but in a different order. We highly recommend upgrading + or manually aligning your categories before doing these operations. + +- Bug in ``Categorical.equals`` returning the wrong result when comparing two + unordered ``Categorical`` arrays with the same categories, but in a different + order (:issue:`16603`) +- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result + when for unordered categoricals with the categories in a different order. + This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). +- Bug in :func:`pandas.merge` returning the wrong result when joining on an + unordered ``Categorical`` that had the same categories but in a different + order (:issue:`19551`) +- Bug in :meth:`CategoricalIndex.get_indexer` returning the wrong result when + ``target`` was an unordered ``Categorical`` that had the same categories as + ``self`` but in a different order (:issue:`19551`) +- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) +- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in :meth:`Series.str.split` with ``expand=True`` incorrectly raising an IndexError on empty strings (:issue:`20002`). +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19032`) +- Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19565`) +- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) +- Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`) +- Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`) +- Bug in :meth:`Series.take` with categorical data interpreting ``-1`` in ``indices`` as missing value markers, rather than the last element of the Series (:issue:`20664`) + +Datetimelike +^^^^^^^^^^^^ + +- Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) +- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) +- Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) +- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) +- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`) +- Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) +- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) +- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) +- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) +- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) +- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) +- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) +- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) +- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) +- Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where setting the ``freq`` attribute was not fully supported (:issue:`20678`) + +Timedelta +^^^^^^^^^ + +- Bug in :func:`Timedelta.__mul__` where multiplying by ``NaT`` returned ``NaT`` instead of raising a ``TypeError`` (:issue:`19819`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) +- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) +- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) +- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) +- Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) +- Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) +- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`, :issue:`19834`) +- Bug in :func:`Timedelta.total_seconds` causing precision errors, for example ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) +- Bug in :func:`Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`) +- Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mismatch (:issue:`19333`) +- Bug in indexing a :class:`TimedeltaIndex` with a ``np.timedelta64`` object which was raising a ``TypeError`` (:issue:`20393`) + + +Timezones +^^^^^^^^^ + +- Bug in creating a ``Series`` from an array that contains both tz-naive and tz-aware values will result in a ``Series`` whose dtype is tz-aware instead of object (:issue:`16406`) +- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`) +- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`) +- Bug in comparing :class:`DatetimeIndex`, which failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) +- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`) +- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) +- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) +- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) +- Bug in :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) +- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) +- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`) +- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`) +- Bug in :func:`melt` that converted tz-aware dtypes to tz-naive (:issue:`15785`) +- Bug in :func:`Dataframe.count` that raised an ``ValueError``, if :func:`Dataframe.dropna` was called for a single column with timezone-aware values. (:issue:`13407`) + +Offsets +^^^^^^^ + +- Bug in :class:`WeekOfMonth` and :class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`, :issue:`18672`, :issue:`18864`) +- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) +- Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) +- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) + +Numeric +^^^^^^^ +- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) +- Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) +- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) +- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) +- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) +- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) +- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`) + +Strings +^^^^^^^ +- Bug in :func:`Series.str.get` with a dictionary in the values and the index not in the keys, raising ``KeyError`` (:issue:`20671`) + + +Indexing +^^^^^^^^ + +- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) +- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) +- Bug in :func:`DataFrame.drop`, :meth:`Panel.drop`, :meth:`Series.drop`, :meth:`Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) +- Bug in indexing a datetimelike ``Index`` that raised ``ValueError`` instead of ``IndexError`` (:issue:`18386`). +- :func:`Index.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) +- :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) +- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) +- Bug in indexing with iterator containing only missing keys, which raised no error (:issue:`20748`) +- Fixed inconsistency in ``.ix`` between list and scalar keys when the index has integer dtype and does not include the desired keys (:issue:`20753`) +- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) +- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) +- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) +- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) +- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) +- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (:issue:`19726`) +- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) +- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). +- Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) +- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) +- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) +- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) +- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`) +- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`) +- Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`) + +MultiIndex +^^^^^^^^^^ + +- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) +- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) +- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) +- Bug in :func:`MultiIndex.unique` when called on empty :class:`MultiIndex` (:issue:`20568`) +- Bug in :func:`MultiIndex.unique` which would not preserve level names (:issue:`20570`) +- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) +- Bug in :func:`MultiIndex.from_tuples` which would fail to take zipped tuples in python3 (:issue:`18434`) +- Bug in :func:`MultiIndex.get_loc` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) +- Bug in :func:`MultiIndex.get_loc` which would cast boolean to integer labels (:issue:`19086`) +- Bug in :func:`MultiIndex.get_loc` which would fail to locate keys containing ``NaN`` (:issue:`18485`) +- Bug in :func:`MultiIndex.get_loc` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) +- Bug in indexing where nested indexers having only numpy arrays are handled incorrectly (:issue:`19686`) + + +IO +^^ + +- :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) +- :meth:`DataFrame.to_html` now has an option to add an id to the leading ``
`` tag (:issue:`8496`) +- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) +- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) +- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) +- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`) +- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) +- Bug in :func:`DataFrame.to_latex` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) +- Bug in :func:`DataFrame.to_latex` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) +- Bug in :func:`DataFrame.to_latex` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`) +- Bug in :func:`DataFrame.to_latex` where the combination of an index name and the ``index_names=False`` option would result in incorrect output (:issue:`18326`) +- Bug in :func:`DataFrame.to_latex` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`) +- Bug in :func:`DataFrame.to_latex` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`) +- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) +- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) +- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) +- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`) +- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) +- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`) +- Bug in :meth:`pandas.io.json.json_normalize` where sub-records are not properly normalized if any sub-records values are NoneType (:issue:`20030`) +- Bug in ``usecols`` parameter in :func:`read_csv` where error is not raised correctly when passing a string. (:issue:`20529`) +- Bug in :func:`HDFStore.keys` when reading a file with a soft link causes exception (:issue:`20523`) +- Bug in :func:`HDFStore.select_column` where a key which is not a valid store raised an ``AttributeError`` instead of a ``KeyError`` (:issue:`17912`) + +Plotting +^^^^^^^^ + +- Better error message when attempting to plot but matplotlib is not installed (:issue:`19810`). +- :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) +- Bug in :func:`DataFrame.plot` when ``x`` and ``y`` arguments given as positions caused incorrect referenced columns for line, bar and area plots (:issue:`20056`) +- Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). +- :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). +- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) + + +GroupBy/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) +- Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) +- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) +- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) +- Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) +- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) +- Bug in :func:`DataFrame.groupby` passing the ``on=`` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) +- Bug in :func:`DataFrame.resample().aggregate <.Resampler.aggregate>` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) +- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`) +- Bug in :func:`DataFrame.resample` that dropped timezone information (:issue:`13238`) +- Bug in :func:`DataFrame.groupby` where transformations using ``np.all`` and ``np.any`` were raising a ``ValueError`` (:issue:`20653`) +- Bug in :func:`DataFrame.resample` where ``ffill``, ``bfill``, ``pad``, ``backfill``, ``fillna``, ``interpolate``, and ``asfreq`` were ignoring ``loffset``. (:issue:`20744`) +- Bug in :func:`DataFrame.groupby` when applying a function that has mixed data types and the user supplied function can fail on the grouping column (:issue:`20949`) +- Bug in :func:`DataFrameGroupBy.rolling().apply() <.Rolling.apply>` where operations performed against the associated :class:`DataFrameGroupBy` object could impact the inclusion of the grouped item(s) in the result (:issue:`14013`) + +Sparse +^^^^^^ + +- Bug in which creating a :class:`SparseDataFrame` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) +- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) +- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) +- Bug in constructing a :class:`SparseArray`: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) + +Reshaping +^^^^^^^^^ + +- Bug in :func:`DataFrame.merge` where referencing a ``CategoricalIndex`` by name, where the ``by`` kwarg would ``KeyError`` (:issue:`20777`) +- Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) +- Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`) +- Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`) +- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) +- Fixed construction of a :class:`DataFrame` from a ``dict`` containing ``NaN`` as key (:issue:`18455`) +- Disabled construction of a :class:`Series` where len(index) > len(data) = 1, which previously would broadcast the data item, and now raises a ``ValueError`` (:issue:`18819`) +- Suppressed error in the construction of a :class:`DataFrame` from a ``dict`` containing scalar values when the corresponding keys are not included in the passed index (:issue:`18600`) + +- Fixed (changed from ``object`` to ``float64``) dtype of :class:`DataFrame` initialized with axes, no data, and ``dtype=int`` (:issue:`19646`) +- Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) +- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) +- Bug in :func:`DataFrame.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) +- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) +- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) +- Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) +- Bug in :func:`concat` when concatenating sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) +- Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) +- Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) +- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) +- Bug in :func:`~DataFrame.rename` where an Index of same-length tuples was converted to a MultiIndex (:issue:`19497`) +- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) +- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) +- Bug in :class:`Series` constructor with ``Categorical`` where a ``ValueError`` is not raised when an index of different length is given (:issue:`19342`) +- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) +- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`) +- Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`) +- Bug in :func:`get_dummies`, and :func:`select_dtypes`, where duplicate column names caused incorrect behavior (:issue:`20848`) +- Bug in :func:`isna`, which cannot handle ambiguous typed lists (:issue:`20675`) +- Bug in :func:`concat` which raises an error when concatenating TZ-aware dataframes and all-NaT dataframes (:issue:`12396`) +- Bug in :func:`concat` which raises an error when concatenating empty TZ-aware series (:issue:`18447`) + +Other +^^^^^ + +- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existent option key in some cases (:issue:`19789`) +- Bug in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) + +.. _whatsnew_0.23.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.22.0..v0.23.0 From 7273d4f621ec47e82df916b31bf2ca39874f9ab1 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 27 Jul 2025 11:37:22 -0400 Subject: [PATCH 26/26] Finish up --- .github/workflows/code-checks.yml | 376 ++++++------- .github/workflows/unit-tests.yml | 846 +++++++++++++++--------------- doc/source/whatsnew/index.rst | 1 + 3 files changed, 612 insertions(+), 611 deletions(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index f5e77b75c97df..728019b06e053 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -1,188 +1,188 @@ -#name: Code Checks -# -#on: -# push: -# branches: -# - main -# - 2.3.x -# pull_request: -# branches: -# - main -# - 2.3.x -# -#env: -# ENV_FILE: environment.yml -# PANDAS_CI: 1 -# -#permissions: -# contents: read -# -## pre-commit run by https://pre-commit.ci/ -#jobs: -# docstring_typing_manual_hooks: -# name: Docstring validation, typing, and other manual pre-commit hooks -# runs-on: ubuntu-24.04 -# defaults: -# run: -# shell: bash -el {0} -# -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-code-checks -# cancel-in-progress: true -# -# steps: -# - name: Checkout -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Set up Conda -# uses: ./.github/actions/setup-conda -# -# - name: Build Pandas -# id: build -# uses: ./.github/actions/build_pandas -# with: -# editable: false -# -# # The following checks are independent of each other and should still be run if one fails -# -# # TODO: The doctests have to be run first right now, since the Cython doctests only work -# # with pandas installed in non-editable mode -# # This can be removed once pytest-cython doesn't require C extensions to be installed inplace -# -# - name: Extra installs -# # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd -# run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 -# -# - name: Run doctests -# run: cd ci && ./code_checks.sh doctests -# if: ${{ steps.build.outcome == 'success' && always() }} -# -# - name: Install pandas in editable mode -# id: build-editable -# if: ${{ steps.build.outcome == 'success' && always() }} -# uses: ./.github/actions/build_pandas -# with: -# editable: true -# -# - name: Check for no warnings when building single-page docs -# run: ci/code_checks.sh single-docs -# if: ${{ steps.build.outcome == 'success' && always() }} -# -# - name: Run checks on imported code -# run: ci/code_checks.sh code -# if: ${{ steps.build.outcome == 'success' && always() }} -# -# - name: Run docstring validation -# run: ci/code_checks.sh docstrings -# if: ${{ steps.build.outcome == 'success' && always() }} -# -# - name: Run check of documentation notebooks -# run: ci/code_checks.sh notebooks -# if: ${{ steps.build.outcome == 'success' && always() }} -# -# - name: Use existing environment for type checking -# run: | -# echo $PATH >> $GITHUB_PATH -# echo "PYTHONHOME=$PYTHONHOME" >> $GITHUB_ENV -# echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV -# if: ${{ steps.build.outcome == 'success' && always() }} -# -# - name: Typing -# uses: pre-commit/action@v3.0.1 -# with: -# extra_args: --verbose --hook-stage manual --all-files -# if: ${{ steps.build.outcome == 'success' && always() }} -# -# - name: Run docstring validation script tests -# run: pytest scripts -# if: ${{ steps.build.outcome == 'success' && always() }} -# -# asv-benchmarks: -# name: ASV Benchmarks -# runs-on: ubuntu-24.04 -# defaults: -# run: -# shell: bash -el {0} -# -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-asv-benchmarks -# cancel-in-progress: true -# -# steps: -# - name: Checkout -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Set up Conda -# uses: ./.github/actions/setup-conda -# -# - name: Build Pandas -# id: build -# uses: ./.github/actions/build_pandas -# -# - name: Run ASV benchmarks -# run: | -# cd asv_bench -# asv machine --yes -# asv run --quick --dry-run --durations=30 --python=same --show-stderr -# -# build_docker_dev_environment: -# name: Build Docker Dev Environment -# runs-on: ubuntu-24.04 -# defaults: -# run: -# shell: bash -el {0} -# -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-build_docker_dev_environment -# cancel-in-progress: true -# -# steps: -# - name: Clean up dangling images -# run: docker image prune -f -# -# - name: Checkout -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Build image -# run: docker build --pull --no-cache --tag pandas-dev-env . -# -# - name: Show environment -# run: docker run --rm pandas-dev-env python -c "import pandas as pd; print(pd.show_versions())" -# -# requirements-dev-text-installable: -# name: Test install requirements-dev.txt -# runs-on: ubuntu-24.04 -# -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-requirements-dev-text-installable -# cancel-in-progress: true -# -# steps: -# - name: Checkout -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Setup Python -# id: setup_python -# uses: actions/setup-python@v5 -# with: -# python-version: '3.10' -# cache: 'pip' -# cache-dependency-path: 'requirements-dev.txt' -# -# - name: Install requirements-dev.txt -# run: pip install -r requirements-dev.txt -# -# - name: Check Pip Cache Hit -# run: echo ${{ steps.setup_python.outputs.cache-hit }} +name: Code Checks + +on: + push: + branches: + - main + - 2.3.x + pull_request: + branches: + - main + - 2.3.x + +env: + ENV_FILE: environment.yml + PANDAS_CI: 1 + +permissions: + contents: read + +# pre-commit run by https://pre-commit.ci/ +jobs: + docstring_typing_manual_hooks: + name: Docstring validation, typing, and other manual pre-commit hooks + runs-on: ubuntu-24.04 + defaults: + run: + shell: bash -el {0} + + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-code-checks + cancel-in-progress: true + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Conda + uses: ./.github/actions/setup-conda + + - name: Build Pandas + id: build + uses: ./.github/actions/build_pandas + with: + editable: false + + # The following checks are independent of each other and should still be run if one fails + + # TODO: The doctests have to be run first right now, since the Cython doctests only work + # with pandas installed in non-editable mode + # This can be removed once pytest-cython doesn't require C extensions to be installed inplace + + - name: Extra installs + # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd + run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 + + - name: Run doctests + run: cd ci && ./code_checks.sh doctests + if: ${{ steps.build.outcome == 'success' && always() }} + + - name: Install pandas in editable mode + id: build-editable + if: ${{ steps.build.outcome == 'success' && always() }} + uses: ./.github/actions/build_pandas + with: + editable: true + + - name: Check for no warnings when building single-page docs + run: ci/code_checks.sh single-docs + if: ${{ steps.build.outcome == 'success' && always() }} + + - name: Run checks on imported code + run: ci/code_checks.sh code + if: ${{ steps.build.outcome == 'success' && always() }} + + - name: Run docstring validation + run: ci/code_checks.sh docstrings + if: ${{ steps.build.outcome == 'success' && always() }} + + - name: Run check of documentation notebooks + run: ci/code_checks.sh notebooks + if: ${{ steps.build.outcome == 'success' && always() }} + + - name: Use existing environment for type checking + run: | + echo $PATH >> $GITHUB_PATH + echo "PYTHONHOME=$PYTHONHOME" >> $GITHUB_ENV + echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV + if: ${{ steps.build.outcome == 'success' && always() }} + + - name: Typing + uses: pre-commit/action@v3.0.1 + with: + extra_args: --verbose --hook-stage manual --all-files + if: ${{ steps.build.outcome == 'success' && always() }} + + - name: Run docstring validation script tests + run: pytest scripts + if: ${{ steps.build.outcome == 'success' && always() }} + + asv-benchmarks: + name: ASV Benchmarks + runs-on: ubuntu-24.04 + defaults: + run: + shell: bash -el {0} + + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-asv-benchmarks + cancel-in-progress: true + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Conda + uses: ./.github/actions/setup-conda + + - name: Build Pandas + id: build + uses: ./.github/actions/build_pandas + + - name: Run ASV benchmarks + run: | + cd asv_bench + asv machine --yes + asv run --quick --dry-run --durations=30 --python=same --show-stderr + + build_docker_dev_environment: + name: Build Docker Dev Environment + runs-on: ubuntu-24.04 + defaults: + run: + shell: bash -el {0} + + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-build_docker_dev_environment + cancel-in-progress: true + + steps: + - name: Clean up dangling images + run: docker image prune -f + + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Build image + run: docker build --pull --no-cache --tag pandas-dev-env . + + - name: Show environment + run: docker run --rm pandas-dev-env python -c "import pandas as pd; print(pd.show_versions())" + + requirements-dev-text-installable: + name: Test install requirements-dev.txt + runs-on: ubuntu-24.04 + + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-requirements-dev-text-installable + cancel-in-progress: true + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'pip' + cache-dependency-path: 'requirements-dev.txt' + + - name: Install requirements-dev.txt + run: pip install -r requirements-dev.txt + + - name: Check Pip Cache Hit + run: echo ${{ steps.setup_python.outputs.cache-hit }} diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index d951287ab808b..412f27cba9c4f 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -1,423 +1,423 @@ -#name: Unit Tests -# -#on: -# push: -# branches: -# - main -# - 2.3.x -# pull_request: -# branches: -# - main -# - 2.3.x -# paths-ignore: -# - "doc/**" -# - "web/**" -# -#permissions: -# contents: read -# -#defaults: -# run: -# shell: bash -el {0} -# -#jobs: -# ubuntu: -# runs-on: ${{ matrix.platform }} -# timeout-minutes: 90 -# strategy: -# matrix: -# platform: [ubuntu-24.04, ubuntu-24.04-arm] -# env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] -# # Prevent the include jobs from overriding other jobs -# pattern: [""] -# pandas_future_infer_string: ["1"] -# include: -# - name: "Downstream Compat" -# env_file: actions-311-downstream_compat.yaml -# pattern: "not slow and not network and not single_cpu" -# pytest_target: "pandas/tests/test_downstream.py" -# platform: ubuntu-24.04 -# - name: "Minimum Versions" -# env_file: actions-310-minimum_versions.yaml -# pattern: "not slow and not network and not single_cpu" -# platform: ubuntu-24.04 -# - name: "Freethreading" -# env_file: actions-313-freethreading.yaml -# pattern: "not slow and not network and not single_cpu" -# platform: ubuntu-24.04 -# - name: "Without PyArrow" -# env_file: actions-312.yaml -# pattern: "not slow and not network and not single_cpu" -# platform: ubuntu-24.04 -# - name: "Locale: it_IT" -# env_file: actions-311.yaml -# pattern: "not slow and not network and not single_cpu" -# extra_apt: "language-pack-it" -# # Use the utf8 version as the default, it has no bad side-effect. -# lang: "it_IT.utf8" -# lc_all: "it_IT.utf8" -# # Also install it_IT (its encoding is ISO8859-1) but do not activate it. -# # It will be temporarily activated during tests with locale.setlocale -# extra_loc: "it_IT" -# platform: ubuntu-24.04 -# - name: "Locale: zh_CN" -# env_file: actions-311.yaml -# pattern: "not slow and not network and not single_cpu" -# extra_apt: "language-pack-zh-hans" -# # Use the utf8 version as the default, it has no bad side-effect. -# lang: "zh_CN.utf8" -# lc_all: "zh_CN.utf8" -# # Also install zh_CN (its encoding is gb2312) but do not activate it. -# # It will be temporarily activated during tests with locale.setlocale -# extra_loc: "zh_CN" -# platform: ubuntu-24.04 -# - name: "Past no infer strings" -# env_file: actions-312.yaml -# pandas_future_infer_string: "0" -# platform: ubuntu-24.04 -# - name: "Numpy Dev" -# env_file: actions-311-numpydev.yaml -# pattern: "not slow and not network and not single_cpu" -# test_args: "-W error::DeprecationWarning -W error::FutureWarning" -# platform: ubuntu-24.04 -# - name: "Pyarrow Nightly" -# env_file: actions-311-pyarrownightly.yaml -# pattern: "not slow and not network and not single_cpu" -# platform: ubuntu-24.04 -# fail-fast: false -# name: ${{ matrix.name || format('{0} {1}', matrix.platform, matrix.env_file) }} -# env: -# PATTERN: ${{ matrix.pattern }} -# LANG: ${{ matrix.lang || 'C.UTF-8' }} -# LC_ALL: ${{ matrix.lc_all || '' }} -# PANDAS_CI: '1' -# PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '1' }} -# TEST_ARGS: ${{ matrix.test_args || '' }} -# PYTEST_WORKERS: 'auto' -# PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} -# # Clipboard tests -# QT_QPA_PLATFORM: offscreen -# REMOVE_PYARROW: ${{ matrix.name == 'Without PyArrow' && '1' || '0' }} -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }} -# cancel-in-progress: true -# -# services: -# mysql: -# image: mysql:9 -# env: -# MYSQL_ALLOW_EMPTY_PASSWORD: yes -# MYSQL_DATABASE: pandas -# options: >- -# --health-cmd "mysqladmin ping" -# --health-interval 10s -# --health-timeout 5s -# --health-retries 5 -# ports: -# - 3306:3306 -# -# postgres: -# image: postgres:17 -# env: -# PGUSER: postgres -# POSTGRES_USER: postgres -# POSTGRES_PASSWORD: postgres -# POSTGRES_DB: pandas -# options: >- -# --health-cmd pg_isready -# --health-interval 10s -# --health-timeout 5s -# --health-retries 5 -# ports: -# - 5432:5432 -# -# moto: -# image: motoserver/moto:5.0.27 -# ports: -# - 5000:5000 -# -# steps: -# - name: Checkout -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Extra installs -# # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd -# run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 ${{ matrix.extra_apt || ''}} -# -# - name: Generate extra locales -# # These extra locales will be available for locale.setlocale() calls in tests -# run: sudo locale-gen ${{ matrix.extra_loc }} -# if: ${{ matrix.extra_loc }} -# -# - name: Set up Conda -# uses: ./.github/actions/setup-conda -# with: -# environment-file: ci/deps/${{ matrix.env_file }} -# -# - name: Build Pandas -# id: build -# uses: ./.github/actions/build_pandas -# with: -# # xref https://github.com/cython/cython/issues/6870 -# werror: ${{ matrix.name != 'Freethreading' }} -# -# - name: Test (not single_cpu) -# uses: ./.github/actions/run-tests -# env: -# # Set pattern to not single_cpu if not already set -# PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} -# -# - name: Test (single_cpu) -# uses: ./.github/actions/run-tests -# env: -# PATTERN: 'single_cpu' -# PYTEST_WORKERS: 0 -# if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}} -# -# macos-windows: -# timeout-minutes: 90 -# strategy: -# matrix: -# # Note: Don't use macOS latest since macos 14 appears to be arm64 only -# os: [macos-13, macos-14, windows-latest] -# env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] -# fail-fast: false -# runs-on: ${{ matrix.os }} -# name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }} -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.os }} -# cancel-in-progress: true -# env: -# PANDAS_CI: 1 -# PYTEST_TARGET: pandas -# PATTERN: "not slow and not db and not network and not single_cpu" -# PYTEST_WORKERS: 'auto' -# -# steps: -# - name: Checkout -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Set up Conda -# uses: ./.github/actions/setup-conda -# with: -# environment-file: ci/deps/${{ matrix.env_file }} -# -# - name: Build Pandas -# uses: ./.github/actions/build_pandas -# -# - name: Test -# uses: ./.github/actions/run-tests -# -# Linux-32-bit: -# runs-on: ubuntu-24.04 -# container: -# image: quay.io/pypa/manylinux2014_i686 -# options: --platform linux/386 -# steps: -# - name: Checkout pandas Repo -# # actions/checkout does not work since it requires node -# run: | -# git config --global --add safe.directory $PWD -# -# if [ $GITHUB_EVENT_NAME != pull_request ]; then -# git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE -# git reset --hard $GITHUB_SHA -# else -# git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE -# git fetch origin $GITHUB_REF:my_ref_name -# git checkout $GITHUB_BASE_REF -# git -c user.email="you@example.com" merge --no-commit my_ref_name -# fi -# - name: Build environment and Run Tests -# # https://github.com/numpy/numpy/issues/24703#issuecomment-1722379388 -# # Note: Pinned to Cython 3.0.10 to avoid numerical instability in 32-bit environments -# # https://github.com/pandas-dev/pandas/pull/61423 -# run: | -# /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev -# . ~/virtualenvs/pandas-dev/bin/activate -# python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 -# python -m pip install numpy -Csetup-args="-Dallow-noblas=true" -# python -m pip install --no-cache-dir versioneer[toml] cython==3.0.10 python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 -# python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" -# python -m pip list --no-cache-dir -# PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit -# cancel-in-progress: true -# -# Linux-Musl: -# runs-on: ubuntu-24.04 -# container: -# image: quay.io/pypa/musllinux_1_2_x86_64 -# steps: -# - name: Checkout pandas Repo -# # actions/checkout does not work since it requires node -# run: | -# git config --global --add safe.directory $PWD -# -# if [ $GITHUB_EVENT_NAME != pull_request ]; then -# git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE -# git reset --hard $GITHUB_SHA -# else -# git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE -# git fetch origin $GITHUB_REF:my_ref_name -# git checkout $GITHUB_BASE_REF -# git -c user.email="you@example.com" merge --no-commit my_ref_name -# fi -# - name: Configure System Packages -# run: | -# apk update -# apk add musl-locales -# - name: Build environment -# run: | -# /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev -# . ~/virtualenvs/pandas-dev/bin/activate -# python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 -# python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 -# python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" -# python -m pip list --no-cache-dir -# -# - name: Run Tests -# run: | -# . ~/virtualenvs/pandas-dev/bin/activate -# PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl -# cancel-in-progress: true -# -# python-dev: -# # This job may or may not run depending on the state of the next -# # unreleased Python version. DO NOT DELETE IT. -# # -# # In general, this will remain frozen(present, but not running) until: -# # - The next unreleased Python version has released beta 1 -# # - This version should be available on GitHub Actions. -# # - Our required build/runtime dependencies(numpy, Cython, python-dateutil) -# # support that unreleased Python version. -# # To unfreeze, comment out the ``if: false`` condition, and make sure you update -# # the name of the workflow and Python version in actions/setup-python ``python-version:`` -# # -# # After it has been unfrozen, this file should remain unfrozen(present, and running) until: -# # - The next Python version has been officially released. -# # OR -# # - Most/All of our optional dependencies support the next Python version AND -# # - The next Python version has released a rc(we are guaranteed a stable ABI). -# # To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs -# # to the corresponding posix/windows-macos/sdist etc. workflows. -# # Feel free to modify this comment as necessary. -# if: false -# defaults: -# run: -# shell: bash -eou pipefail {0} -# runs-on: ${{ matrix.os }} -# strategy: -# fail-fast: false -# matrix: -# # Separate out macOS 13 and 14, since macOS 14 is arm64 only -# os: [ubuntu-24.04, macOS-13, macOS-14, windows-latest] -# -# timeout-minutes: 90 -# -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-python-dev -# cancel-in-progress: true -# -# env: -# PYTEST_WORKERS: "auto" -# PANDAS_CI: 1 -# PATTERN: "not slow and not network and not clipboard and not single_cpu" -# PYTEST_TARGET: pandas -# -# steps: -# - uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Set up Python Dev Version -# uses: actions/setup-python@v5 -# with: -# python-version: '3.13-dev' -# -# - name: Build Environment -# run: | -# python --version -# python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 -# python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy -# python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov -# python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror" -# python -m pip list -# -# - name: Run Tests -# uses: ./.github/actions/run-tests -# -# # NOTE: this job must be kept in sync with the Pyodide build job in wheels.yml -# emscripten: -# # Note: the Python version, Emscripten toolchain version are determined -# # by the Pyodide version. The appropriate versions can be found in the -# # Pyodide repodata.json "info" field, or in the Makefile.envs file: -# # https://github.com/pyodide/pyodide/blob/stable/Makefile.envs#L2 -# # The Node.js version can be determined via Pyodide: -# # https://pyodide.org/en/stable/usage/index.html#node-js -# name: Pyodide build -# runs-on: ubuntu-24.04 -# concurrency: -# # https://github.community/t/concurrecy-not-work-for-push/183068/7 -# group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-wasm -# cancel-in-progress: true -# steps: -# - name: Checkout pandas Repo -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Set up Python for pyodide-build -# id: setup-python -# uses: actions/setup-python@v5 -# with: -# python-version: '3.12' -# -# - name: Set up Emscripten toolchain -# uses: mymindstorm/setup-emsdk@v14 -# with: -# version: '3.1.58' -# actions-cache-folder: emsdk-cache -# -# - name: Install pyodide-build -# run: pip install "pyodide-build>=0.29.2" -# -# - name: Build pandas for Pyodide -# run: | -# pyodide build -# -# - name: Set up Node.js -# uses: actions/setup-node@v4 -# with: -# node-version: '20' -# -# - name: Set up Pyodide virtual environment -# env: -# pyodide-version: '0.27.1' -# run: | -# pyodide xbuildenv install ${{ env.pyodide-version }} -# pyodide venv .venv-pyodide -# source .venv-pyodide/bin/activate -# pip install dist/*.whl -# -# - name: Test pandas for Pyodide -# env: -# PANDAS_CI: 1 -# run: | -# source .venv-pyodide/bin/activate -# pip install pytest hypothesis -# # do not import pandas from the checked out repo -# cd .. -# python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])' +name: Unit Tests + +on: + push: + branches: + - main + - 2.3.x + pull_request: + branches: + - main + - 2.3.x + paths-ignore: + - "doc/**" + - "web/**" + +permissions: + contents: read + +defaults: + run: + shell: bash -el {0} + +jobs: + ubuntu: + runs-on: ${{ matrix.platform }} + timeout-minutes: 90 + strategy: + matrix: + platform: [ubuntu-24.04, ubuntu-24.04-arm] + env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] + # Prevent the include jobs from overriding other jobs + pattern: [""] + pandas_future_infer_string: ["1"] + include: + - name: "Downstream Compat" + env_file: actions-311-downstream_compat.yaml + pattern: "not slow and not network and not single_cpu" + pytest_target: "pandas/tests/test_downstream.py" + platform: ubuntu-24.04 + - name: "Minimum Versions" + env_file: actions-310-minimum_versions.yaml + pattern: "not slow and not network and not single_cpu" + platform: ubuntu-24.04 + - name: "Freethreading" + env_file: actions-313-freethreading.yaml + pattern: "not slow and not network and not single_cpu" + platform: ubuntu-24.04 + - name: "Without PyArrow" + env_file: actions-312.yaml + pattern: "not slow and not network and not single_cpu" + platform: ubuntu-24.04 + - name: "Locale: it_IT" + env_file: actions-311.yaml + pattern: "not slow and not network and not single_cpu" + extra_apt: "language-pack-it" + # Use the utf8 version as the default, it has no bad side-effect. + lang: "it_IT.utf8" + lc_all: "it_IT.utf8" + # Also install it_IT (its encoding is ISO8859-1) but do not activate it. + # It will be temporarily activated during tests with locale.setlocale + extra_loc: "it_IT" + platform: ubuntu-24.04 + - name: "Locale: zh_CN" + env_file: actions-311.yaml + pattern: "not slow and not network and not single_cpu" + extra_apt: "language-pack-zh-hans" + # Use the utf8 version as the default, it has no bad side-effect. + lang: "zh_CN.utf8" + lc_all: "zh_CN.utf8" + # Also install zh_CN (its encoding is gb2312) but do not activate it. + # It will be temporarily activated during tests with locale.setlocale + extra_loc: "zh_CN" + platform: ubuntu-24.04 + - name: "Past no infer strings" + env_file: actions-312.yaml + pandas_future_infer_string: "0" + platform: ubuntu-24.04 + - name: "Numpy Dev" + env_file: actions-311-numpydev.yaml + pattern: "not slow and not network and not single_cpu" + test_args: "-W error::DeprecationWarning -W error::FutureWarning" + platform: ubuntu-24.04 + - name: "Pyarrow Nightly" + env_file: actions-311-pyarrownightly.yaml + pattern: "not slow and not network and not single_cpu" + platform: ubuntu-24.04 + fail-fast: false + name: ${{ matrix.name || format('{0} {1}', matrix.platform, matrix.env_file) }} + env: + PATTERN: ${{ matrix.pattern }} + LANG: ${{ matrix.lang || 'C.UTF-8' }} + LC_ALL: ${{ matrix.lc_all || '' }} + PANDAS_CI: '1' + PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '1' }} + TEST_ARGS: ${{ matrix.test_args || '' }} + PYTEST_WORKERS: 'auto' + PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} + # Clipboard tests + QT_QPA_PLATFORM: offscreen + REMOVE_PYARROW: ${{ matrix.name == 'Without PyArrow' && '1' || '0' }} + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }} + cancel-in-progress: true + + services: + mysql: + image: mysql:9 + env: + MYSQL_ALLOW_EMPTY_PASSWORD: yes + MYSQL_DATABASE: pandas + options: >- + --health-cmd "mysqladmin ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 3306:3306 + + postgres: + image: postgres:17 + env: + PGUSER: postgres + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: pandas + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + moto: + image: motoserver/moto:5.0.27 + ports: + - 5000:5000 + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Extra installs + # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd + run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 ${{ matrix.extra_apt || ''}} + + - name: Generate extra locales + # These extra locales will be available for locale.setlocale() calls in tests + run: sudo locale-gen ${{ matrix.extra_loc }} + if: ${{ matrix.extra_loc }} + + - name: Set up Conda + uses: ./.github/actions/setup-conda + with: + environment-file: ci/deps/${{ matrix.env_file }} + + - name: Build Pandas + id: build + uses: ./.github/actions/build_pandas + with: + # xref https://github.com/cython/cython/issues/6870 + werror: ${{ matrix.name != 'Freethreading' }} + + - name: Test (not single_cpu) + uses: ./.github/actions/run-tests + env: + # Set pattern to not single_cpu if not already set + PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} + + - name: Test (single_cpu) + uses: ./.github/actions/run-tests + env: + PATTERN: 'single_cpu' + PYTEST_WORKERS: 0 + if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}} + + macos-windows: + timeout-minutes: 90 + strategy: + matrix: + # Note: Don't use macOS latest since macos 14 appears to be arm64 only + os: [macos-13, macos-14, windows-latest] + env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] + fail-fast: false + runs-on: ${{ matrix.os }} + name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }} + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.os }} + cancel-in-progress: true + env: + PANDAS_CI: 1 + PYTEST_TARGET: pandas + PATTERN: "not slow and not db and not network and not single_cpu" + PYTEST_WORKERS: 'auto' + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Conda + uses: ./.github/actions/setup-conda + with: + environment-file: ci/deps/${{ matrix.env_file }} + + - name: Build Pandas + uses: ./.github/actions/build_pandas + + - name: Test + uses: ./.github/actions/run-tests + + Linux-32-bit: + runs-on: ubuntu-24.04 + container: + image: quay.io/pypa/manylinux2014_i686 + options: --platform linux/386 + steps: + - name: Checkout pandas Repo + # actions/checkout does not work since it requires node + run: | + git config --global --add safe.directory $PWD + + if [ $GITHUB_EVENT_NAME != pull_request ]; then + git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE + git reset --hard $GITHUB_SHA + else + git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE + git fetch origin $GITHUB_REF:my_ref_name + git checkout $GITHUB_BASE_REF + git -c user.email="you@example.com" merge --no-commit my_ref_name + fi + - name: Build environment and Run Tests + # https://github.com/numpy/numpy/issues/24703#issuecomment-1722379388 + # Note: Pinned to Cython 3.0.10 to avoid numerical instability in 32-bit environments + # https://github.com/pandas-dev/pandas/pull/61423 + run: | + /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev + . ~/virtualenvs/pandas-dev/bin/activate + python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 + python -m pip install numpy -Csetup-args="-Dallow-noblas=true" + python -m pip install --no-cache-dir versioneer[toml] cython==3.0.10 python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 + python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" + python -m pip list --no-cache-dir + PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit + cancel-in-progress: true + + Linux-Musl: + runs-on: ubuntu-24.04 + container: + image: quay.io/pypa/musllinux_1_2_x86_64 + steps: + - name: Checkout pandas Repo + # actions/checkout does not work since it requires node + run: | + git config --global --add safe.directory $PWD + + if [ $GITHUB_EVENT_NAME != pull_request ]; then + git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE + git reset --hard $GITHUB_SHA + else + git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE + git fetch origin $GITHUB_REF:my_ref_name + git checkout $GITHUB_BASE_REF + git -c user.email="you@example.com" merge --no-commit my_ref_name + fi + - name: Configure System Packages + run: | + apk update + apk add musl-locales + - name: Build environment + run: | + /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev + . ~/virtualenvs/pandas-dev/bin/activate + python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 + python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 + python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" + python -m pip list --no-cache-dir + + - name: Run Tests + run: | + . ~/virtualenvs/pandas-dev/bin/activate + PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl + cancel-in-progress: true + + python-dev: + # This job may or may not run depending on the state of the next + # unreleased Python version. DO NOT DELETE IT. + # + # In general, this will remain frozen(present, but not running) until: + # - The next unreleased Python version has released beta 1 + # - This version should be available on GitHub Actions. + # - Our required build/runtime dependencies(numpy, Cython, python-dateutil) + # support that unreleased Python version. + # To unfreeze, comment out the ``if: false`` condition, and make sure you update + # the name of the workflow and Python version in actions/setup-python ``python-version:`` + # + # After it has been unfrozen, this file should remain unfrozen(present, and running) until: + # - The next Python version has been officially released. + # OR + # - Most/All of our optional dependencies support the next Python version AND + # - The next Python version has released a rc(we are guaranteed a stable ABI). + # To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs + # to the corresponding posix/windows-macos/sdist etc. workflows. + # Feel free to modify this comment as necessary. + if: false + defaults: + run: + shell: bash -eou pipefail {0} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + # Separate out macOS 13 and 14, since macOS 14 is arm64 only + os: [ubuntu-24.04, macOS-13, macOS-14, windows-latest] + + timeout-minutes: 90 + + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-python-dev + cancel-in-progress: true + + env: + PYTEST_WORKERS: "auto" + PANDAS_CI: 1 + PATTERN: "not slow and not network and not clipboard and not single_cpu" + PYTEST_TARGET: pandas + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python Dev Version + uses: actions/setup-python@v5 + with: + python-version: '3.13-dev' + + - name: Build Environment + run: | + python --version + python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 + python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy + python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov + python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror" + python -m pip list + + - name: Run Tests + uses: ./.github/actions/run-tests + + # NOTE: this job must be kept in sync with the Pyodide build job in wheels.yml + emscripten: + # Note: the Python version, Emscripten toolchain version are determined + # by the Pyodide version. The appropriate versions can be found in the + # Pyodide repodata.json "info" field, or in the Makefile.envs file: + # https://github.com/pyodide/pyodide/blob/stable/Makefile.envs#L2 + # The Node.js version can be determined via Pyodide: + # https://pyodide.org/en/stable/usage/index.html#node-js + name: Pyodide build + runs-on: ubuntu-24.04 + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-wasm + cancel-in-progress: true + steps: + - name: Checkout pandas Repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python for pyodide-build + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Set up Emscripten toolchain + uses: mymindstorm/setup-emsdk@v14 + with: + version: '3.1.58' + actions-cache-folder: emsdk-cache + + - name: Install pyodide-build + run: pip install "pyodide-build>=0.29.2" + + - name: Build pandas for Pyodide + run: | + pyodide build + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Set up Pyodide virtual environment + env: + pyodide-version: '0.27.1' + run: | + pyodide xbuildenv install ${{ env.pyodide-version }} + pyodide venv .venv-pyodide + source .venv-pyodide/bin/activate + pip install dist/*.whl + + - name: Test pandas for Pyodide + env: + PANDAS_CI: 1 + run: | + source .venv-pyodide/bin/activate + pip install pytest hypothesis + # do not import pandas from the checked out repo + cd .. + python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])' diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index a63eb0db4c6ba..dfa87184c21fe 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -168,6 +168,7 @@ Version 0.23 v0.23.3 v0.23.2 v0.23.1 + v0.23.0 Version 0.22 ------------