Skip to content

Commit 3b02d68

Browse files
Merge remote-tracking branch 'upstream/main'
2 parents b81dd0f + 8de38e8 commit 3b02d68

File tree

212 files changed

+2404
-1571
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

212 files changed

+2404
-1571
lines changed

.github/workflows/unit-tests.yml

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml]
3131
# Prevent the include jobs from overriding other jobs
3232
pattern: [""]
33-
pandas_future_infer_string: ["0"]
33+
pandas_future_infer_string: ["1"]
3434
include:
3535
- name: "Downstream Compat"
3636
env_file: actions-311-downstream_compat.yaml
@@ -45,6 +45,10 @@ jobs:
4545
env_file: actions-313-freethreading.yaml
4646
pattern: "not slow and not network and not single_cpu"
4747
platform: ubuntu-24.04
48+
- name: "Without PyArrow"
49+
env_file: actions-312.yaml
50+
pattern: "not slow and not network and not single_cpu"
51+
platform: ubuntu-24.04
4852
- name: "Locale: it_IT"
4953
env_file: actions-311.yaml
5054
pattern: "not slow and not network and not single_cpu"
@@ -67,18 +71,9 @@ jobs:
6771
# It will be temporarily activated during tests with locale.setlocale
6872
extra_loc: "zh_CN"
6973
platform: ubuntu-24.04
70-
- name: "Future infer strings"
74+
- name: "Past no infer strings"
7175
env_file: actions-312.yaml
72-
pandas_future_infer_string: "1"
73-
platform: ubuntu-24.04
74-
- name: "Future infer strings (without pyarrow)"
75-
env_file: actions-311.yaml
76-
pandas_future_infer_string: "1"
77-
platform: ubuntu-24.04
78-
- name: "Pypy"
79-
env_file: actions-pypy-39.yaml
80-
pattern: "not slow and not network and not single_cpu"
81-
test_args: "--max-worker-restart 0"
76+
pandas_future_infer_string: "0"
8277
platform: ubuntu-24.04
8378
- name: "Numpy Dev"
8479
env_file: actions-311-numpydev.yaml
@@ -88,7 +83,6 @@ jobs:
8883
- name: "Pyarrow Nightly"
8984
env_file: actions-311-pyarrownightly.yaml
9085
pattern: "not slow and not network and not single_cpu"
91-
pandas_future_infer_string: "1"
9286
platform: ubuntu-24.04
9387
fail-fast: false
9488
name: ${{ matrix.name || format('{0} {1}', matrix.platform, matrix.env_file) }}
@@ -103,7 +97,7 @@ jobs:
10397
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
10498
# Clipboard tests
10599
QT_QPA_PLATFORM: offscreen
106-
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
100+
REMOVE_PYARROW: ${{ matrix.name == 'Without PyArrow' && '1' || '0' }}
107101
concurrency:
108102
# https://github.community/t/concurrecy-not-work-for-push/183068/7
109103
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }}
@@ -140,9 +134,6 @@ jobs:
140134

141135
moto:
142136
image: motoserver/moto:5.0.27
143-
env:
144-
AWS_ACCESS_KEY_ID: foobar_key
145-
AWS_SECRET_ACCESS_KEY: foobar_secret
146137
ports:
147138
- 5000:5000
148139

@@ -172,12 +163,9 @@ jobs:
172163
with:
173164
# xref https://github.com/cython/cython/issues/6870
174165
werror: ${{ matrix.name != 'Freethreading' }}
175-
# TODO: Re-enable once Pypy has Pypy 3.10 on conda-forge
176-
if: ${{ matrix.name != 'Pypy' }}
177166

178167
- name: Test (not single_cpu)
179168
uses: ./.github/actions/run-tests
180-
if: ${{ matrix.name != 'Pypy' }}
181169
env:
182170
# Set pattern to not single_cpu if not already set
183171
PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }}

.github/workflows/wheels.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ jobs:
101101
- [macos-14, macosx_arm64]
102102
- [windows-2022, win_amd64]
103103
- [windows-11-arm, win_arm64]
104-
# TODO: support PyPy?
105104
python: [["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"]]
106105
include:
107106
# Build Pyodide wheels and upload them to Anaconda.org

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.11.12
22+
rev: v0.12.2
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -47,7 +47,7 @@ repos:
4747
types_or: [python, rst, markdown, cython, c]
4848
additional_dependencies: [tomli]
4949
- repo: https://github.com/MarcoGorelli/cython-lint
50-
rev: v0.16.6
50+
rev: v0.16.7
5151
hooks:
5252
- id: cython-lint
5353
- id: double-quote-cython-strings
@@ -95,14 +95,14 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v20.1.5
98+
rev: v20.1.7
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include
102102
args: [-i]
103103
types_or: [c, c++]
104104
- repo: https://github.com/trim21/pre-commit-mirror-meson
105-
rev: v1.8.1
105+
rev: v1.8.2
106106
hooks:
107107
- id: meson-fmt
108108
args: ['--inplace']

AUTHORS.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ About the Copyright Holders
77
led by Wes McKinney. AQR released the source under this license in 2009.
88
* Copyright (c) 2011-2012, Lambda Foundry, Inc.
99

10-
Wes is now an employee of Lambda Foundry, and remains the pandas project
10+
Wes became an employee of Lambda Foundry, and remained the pandas project
1111
lead.
1212
* Copyright (c) 2011-2012, PyData Development Team
1313

1414
The PyData Development Team is the collection of developers of the PyData
15-
project. This includes all of the PyData sub-projects, including pandas. The
15+
project. This includes all of the PyData sub-projects, such as pandas. The
1616
core team that coordinates development on GitHub can be found here:
1717
https://github.com/pydata.
1818

@@ -23,11 +23,11 @@ Our Copyright Policy
2323

2424
PyData uses a shared copyright model. Each contributor maintains copyright
2525
over their contributions to PyData. However, it is important to note that
26-
these contributions are typically only changes to the repositories. Thus,
26+
these contributions are typically limited to changes to the repositories. Thus,
2727
the PyData source code, in its entirety, is not the copyright of any single
2828
person or institution. Instead, it is the collective copyright of the
2929
entire PyData Development Team. If individual contributors want to maintain
30-
a record of what changes/contributions they have specific copyright on,
30+
a record of the specific changes or contributions they hold copyright to,
3131
they should indicate their copyright in the commit message of the change
3232
when they commit the change to one of the PyData repositories.
3333

@@ -50,7 +50,7 @@ Other licenses can be found in the LICENSES directory.
5050
License
5151
=======
5252

53-
pandas is distributed under a 3-clause ("Simplified" or "New") BSD
53+
pandas is distributed under the 3-clause ("Simplified" or "New") BSD
5454
license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
55-
BSD-compatible licenses, are included. Their licenses follow the pandas
55+
BSD-compatible licenses, are included. Their licenses are compatible with the pandas
5656
license.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ All contributions, bug reports, bug fixes, documentation improvements, enhanceme
175175

176176
A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**.
177177

178-
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
178+
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?q=is%3Aissue%20state%3Aopen%20label%3ADocs%20sort%3Aupdated-desc) and [good first issue](https://github.com/pandas-dev/pandas/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22%20sort%3Aupdated-desc) where you could start out.
179179

180180
You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
181181

asv_bench/benchmarks/gil.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from .pandas_vb_common import BaseIO # isort:skip
3737

3838

39-
def test_parallel(num_threads=2, kwargs_list=None):
39+
def run_parallel(num_threads=2, kwargs_list=None):
4040
"""
4141
Decorator to run the same function multiple times in parallel.
4242
@@ -95,7 +95,7 @@ def setup(self, threads, method):
9595
{"key": np.random.randint(0, ngroups, size=N), "data": np.random.randn(N)}
9696
)
9797

98-
@test_parallel(num_threads=threads)
98+
@run_parallel(num_threads=threads)
9999
def parallel():
100100
getattr(df.groupby("key")["data"], method)()
101101

@@ -123,7 +123,7 @@ def setup(self, threads):
123123
ngroups = 10**3
124124
data = Series(np.random.randint(0, ngroups, size=size))
125125

126-
@test_parallel(num_threads=threads)
126+
@run_parallel(num_threads=threads)
127127
def get_groups():
128128
data.groupby(data).groups
129129

@@ -142,7 +142,7 @@ def setup(self, dtype):
142142
df = DataFrame({"col": np.arange(N, dtype=dtype)})
143143
indexer = np.arange(100, len(df) - 100)
144144

145-
@test_parallel(num_threads=2)
145+
@run_parallel(num_threads=2)
146146
def parallel_take1d():
147147
take_nd(df["col"].values, indexer)
148148

@@ -163,7 +163,7 @@ def setup(self):
163163
k = 5 * 10**5
164164
kwargs_list = [{"arr": np.random.randn(N)}, {"arr": np.random.randn(N)}]
165165

166-
@test_parallel(num_threads=2, kwargs_list=kwargs_list)
166+
@run_parallel(num_threads=2, kwargs_list=kwargs_list)
167167
def parallel_kth_smallest(arr):
168168
algos.kth_smallest(arr, k)
169169

@@ -180,42 +180,42 @@ def setup(self):
180180
self.period = self.dti.to_period("D")
181181

182182
def time_datetime_field_year(self):
183-
@test_parallel(num_threads=2)
183+
@run_parallel(num_threads=2)
184184
def run(dti):
185185
dti.year
186186

187187
run(self.dti)
188188

189189
def time_datetime_field_day(self):
190-
@test_parallel(num_threads=2)
190+
@run_parallel(num_threads=2)
191191
def run(dti):
192192
dti.day
193193

194194
run(self.dti)
195195

196196
def time_datetime_field_daysinmonth(self):
197-
@test_parallel(num_threads=2)
197+
@run_parallel(num_threads=2)
198198
def run(dti):
199199
dti.days_in_month
200200

201201
run(self.dti)
202202

203203
def time_datetime_field_normalize(self):
204-
@test_parallel(num_threads=2)
204+
@run_parallel(num_threads=2)
205205
def run(dti):
206206
dti.normalize()
207207

208208
run(self.dti)
209209

210210
def time_datetime_to_period(self):
211-
@test_parallel(num_threads=2)
211+
@run_parallel(num_threads=2)
212212
def run(dti):
213213
dti.to_period("s")
214214

215215
run(self.dti)
216216

217217
def time_period_to_datetime(self):
218-
@test_parallel(num_threads=2)
218+
@run_parallel(num_threads=2)
219219
def run(period):
220220
period.to_timestamp()
221221

@@ -232,7 +232,7 @@ def setup(self, method):
232232
if hasattr(DataFrame, "rolling"):
233233
df = DataFrame(arr).rolling(win)
234234

235-
@test_parallel(num_threads=2)
235+
@run_parallel(num_threads=2)
236236
def parallel_rolling():
237237
getattr(df, method)()
238238

@@ -249,7 +249,7 @@ def parallel_rolling():
249249
"std": rolling_std,
250250
}
251251

252-
@test_parallel(num_threads=2)
252+
@run_parallel(num_threads=2)
253253
def parallel_rolling():
254254
rolling[method](arr, win)
255255

@@ -286,7 +286,7 @@ def setup(self, dtype):
286286
self.fname = f"__test_{dtype}__.csv"
287287
df.to_csv(self.fname)
288288

289-
@test_parallel(num_threads=2)
289+
@run_parallel(num_threads=2)
290290
def parallel_read_csv():
291291
read_csv(self.fname)
292292

@@ -305,7 +305,7 @@ class ParallelFactorize:
305305
def setup(self, threads):
306306
strings = Index([f"i-{i}" for i in range(100000)], dtype=object)
307307

308-
@test_parallel(num_threads=threads)
308+
@run_parallel(num_threads=threads)
309309
def parallel():
310310
factorize(strings)
311311

asv_bench/benchmarks/io/csv.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,25 @@ def time_frame(self, kind):
5353
self.df.to_csv(self.fname)
5454

5555

56+
class ToCSVFloatFormatVariants(BaseIO):
57+
fname = "__test__.csv"
58+
59+
def setup(self):
60+
self.df = DataFrame(np.random.default_rng(seed=42).random((1000, 1000)))
61+
62+
def time_old_style_percent_format(self):
63+
self.df.to_csv(self.fname, float_format="%.6f")
64+
65+
def time_new_style_brace_format(self):
66+
self.df.to_csv(self.fname, float_format="{:.6f}")
67+
68+
def time_new_style_thousands_format(self):
69+
self.df.to_csv(self.fname, float_format="{:,.2f}")
70+
71+
def time_callable_format(self):
72+
self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}")
73+
74+
5675
class ToCSVMultiIndexUnusedLevels(BaseIO):
5776
fname = "__test__.csv"
5877

ci/code_checks.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
5858

5959
MSG='Python and Cython Doctests' ; echo "$MSG"
6060
python -c 'import pandas as pd; pd.test(run_doctests=True)'
61-
RET=$(($RET + $?)) ; echo "$MSG" "DONE"
61+
# TEMP don't let doctests fail the build until all string dtype changes are fixed
62+
# RET=$(($RET + $?)) ; echo "$MSG" "DONE"
63+
echo "$MSG" "DONE"
6264

6365
fi
6466

@@ -72,6 +74,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7274
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
7375
-i "pandas.Period.freq GL08" \
7476
-i "pandas.Period.ordinal GL08" \
77+
-i "pandas.errors.IncompatibleFrequency SA01,SS06,EX01" \
7578
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
7679
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
7780
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \

ci/deps/actions-310-minimum_versions.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ dependencies:
2222

2323
# required dependencies
2424
- python-dateutil=2.8.2
25-
- numpy=1.23.5
25+
- numpy=1.26.0
2626

2727
# optional dependencies
2828
- beautifulsoup4=4.12.3
@@ -41,7 +41,7 @@ dependencies:
4141
- qtpy=2.3.0
4242
- openpyxl=3.1.2
4343
- psycopg2=2.9.6
44-
- pyarrow=10.0.1
44+
- pyarrow=12.0.1
4545
- pyiceberg=0.7.1
4646
- pymysql=1.1.0
4747
- pyqt=5.15.9
@@ -62,4 +62,4 @@ dependencies:
6262
- pip:
6363
- adbc-driver-postgresql==0.10.0
6464
- adbc-driver-sqlite==0.8.0
65-
- tzdata==2022.7
65+
- tzdata==2023.3

ci/deps/actions-310.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies:
3939
- qtpy>=2.3.0
4040
- openpyxl>=3.1.2
4141
- psycopg2>=2.9.6
42-
- pyarrow>=10.0.1
42+
- pyarrow>=12.0.1
4343
- pyiceberg>=0.7.1
4444
- pymysql>=1.1.0
4545
- pyqt>=5.15.9
@@ -60,4 +60,4 @@ dependencies:
6060
- pip:
6161
- adbc-driver-postgresql>=0.10.0
6262
- adbc-driver-sqlite>=0.8.0
63-
- tzdata>=2022.7
63+
- tzdata>=2023.3

0 commit comments

Comments
 (0)