Skip to content

Commit 2cc47a1

Browse files
Merge branch 'pandas-dev:main' into shiny-new-feature
2 parents 3e8c1e4 + 8de38e8 commit 2cc47a1

File tree

148 files changed

+1641
-814
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+1641
-814
lines changed

.github/workflows/unit-tests.yml

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml]
3131
# Prevent the include jobs from overriding other jobs
3232
pattern: [""]
33-
pandas_future_infer_string: ["0"]
33+
pandas_future_infer_string: ["1"]
3434
include:
3535
- name: "Downstream Compat"
3636
env_file: actions-311-downstream_compat.yaml
@@ -45,6 +45,10 @@ jobs:
4545
env_file: actions-313-freethreading.yaml
4646
pattern: "not slow and not network and not single_cpu"
4747
platform: ubuntu-24.04
48+
- name: "Without PyArrow"
49+
env_file: actions-312.yaml
50+
pattern: "not slow and not network and not single_cpu"
51+
platform: ubuntu-24.04
4852
- name: "Locale: it_IT"
4953
env_file: actions-311.yaml
5054
pattern: "not slow and not network and not single_cpu"
@@ -67,18 +71,9 @@ jobs:
6771
# It will be temporarily activated during tests with locale.setlocale
6872
extra_loc: "zh_CN"
6973
platform: ubuntu-24.04
70-
- name: "Future infer strings"
74+
- name: "Past no infer strings"
7175
env_file: actions-312.yaml
72-
pandas_future_infer_string: "1"
73-
platform: ubuntu-24.04
74-
- name: "Future infer strings (without pyarrow)"
75-
env_file: actions-311.yaml
76-
pandas_future_infer_string: "1"
77-
platform: ubuntu-24.04
78-
- name: "Pypy"
79-
env_file: actions-pypy-39.yaml
80-
pattern: "not slow and not network and not single_cpu"
81-
test_args: "--max-worker-restart 0"
76+
pandas_future_infer_string: "0"
8277
platform: ubuntu-24.04
8378
- name: "Numpy Dev"
8479
env_file: actions-311-numpydev.yaml
@@ -88,7 +83,6 @@ jobs:
8883
- name: "Pyarrow Nightly"
8984
env_file: actions-311-pyarrownightly.yaml
9085
pattern: "not slow and not network and not single_cpu"
91-
pandas_future_infer_string: "1"
9286
platform: ubuntu-24.04
9387
fail-fast: false
9488
name: ${{ matrix.name || format('{0} {1}', matrix.platform, matrix.env_file) }}
@@ -103,7 +97,7 @@ jobs:
10397
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
10498
# Clipboard tests
10599
QT_QPA_PLATFORM: offscreen
106-
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
100+
REMOVE_PYARROW: ${{ matrix.name == 'Without PyArrow' && '1' || '0' }}
107101
concurrency:
108102
# https://github.community/t/concurrecy-not-work-for-push/183068/7
109103
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }}
@@ -169,12 +163,9 @@ jobs:
169163
with:
170164
# xref https://github.com/cython/cython/issues/6870
171165
werror: ${{ matrix.name != 'Freethreading' }}
172-
# TODO: Re-enable once Pypy has Pypy 3.10 on conda-forge
173-
if: ${{ matrix.name != 'Pypy' }}
174166

175167
- name: Test (not single_cpu)
176168
uses: ./.github/actions/run-tests
177-
if: ${{ matrix.name != 'Pypy' }}
178169
env:
179170
# Set pattern to not single_cpu if not already set
180171
PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }}

.github/workflows/wheels.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ jobs:
101101
- [macos-14, macosx_arm64]
102102
- [windows-2022, win_amd64]
103103
- [windows-11-arm, win_arm64]
104-
# TODO: support PyPy?
105104
python: [["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"]]
106105
include:
107106
# Build Pyodide wheels and upload them to Anaconda.org

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.11.12
22+
rev: v0.12.2
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -47,7 +47,7 @@ repos:
4747
types_or: [python, rst, markdown, cython, c]
4848
additional_dependencies: [tomli]
4949
- repo: https://github.com/MarcoGorelli/cython-lint
50-
rev: v0.16.6
50+
rev: v0.16.7
5151
hooks:
5252
- id: cython-lint
5353
- id: double-quote-cython-strings
@@ -95,14 +95,14 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v20.1.5
98+
rev: v20.1.7
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include
102102
args: [-i]
103103
types_or: [c, c++]
104104
- repo: https://github.com/trim21/pre-commit-mirror-meson
105-
rev: v1.8.1
105+
rev: v1.8.2
106106
hooks:
107107
- id: meson-fmt
108108
args: ['--inplace']

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ All contributions, bug reports, bug fixes, documentation improvements, enhanceme
175175

176176
A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**.
177177

178-
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
178+
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?q=is%3Aissue%20state%3Aopen%20label%3ADocs%20sort%3Aupdated-desc) and [good first issue](https://github.com/pandas-dev/pandas/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22%20sort%3Aupdated-desc) where you could start out.
179179

180180
You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
181181

asv_bench/benchmarks/gil.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from .pandas_vb_common import BaseIO # isort:skip
3737

3838

39-
def test_parallel(num_threads=2, kwargs_list=None):
39+
def run_parallel(num_threads=2, kwargs_list=None):
4040
"""
4141
Decorator to run the same function multiple times in parallel.
4242
@@ -95,7 +95,7 @@ def setup(self, threads, method):
9595
{"key": np.random.randint(0, ngroups, size=N), "data": np.random.randn(N)}
9696
)
9797

98-
@test_parallel(num_threads=threads)
98+
@run_parallel(num_threads=threads)
9999
def parallel():
100100
getattr(df.groupby("key")["data"], method)()
101101

@@ -123,7 +123,7 @@ def setup(self, threads):
123123
ngroups = 10**3
124124
data = Series(np.random.randint(0, ngroups, size=size))
125125

126-
@test_parallel(num_threads=threads)
126+
@run_parallel(num_threads=threads)
127127
def get_groups():
128128
data.groupby(data).groups
129129

@@ -142,7 +142,7 @@ def setup(self, dtype):
142142
df = DataFrame({"col": np.arange(N, dtype=dtype)})
143143
indexer = np.arange(100, len(df) - 100)
144144

145-
@test_parallel(num_threads=2)
145+
@run_parallel(num_threads=2)
146146
def parallel_take1d():
147147
take_nd(df["col"].values, indexer)
148148

@@ -163,7 +163,7 @@ def setup(self):
163163
k = 5 * 10**5
164164
kwargs_list = [{"arr": np.random.randn(N)}, {"arr": np.random.randn(N)}]
165165

166-
@test_parallel(num_threads=2, kwargs_list=kwargs_list)
166+
@run_parallel(num_threads=2, kwargs_list=kwargs_list)
167167
def parallel_kth_smallest(arr):
168168
algos.kth_smallest(arr, k)
169169

@@ -180,42 +180,42 @@ def setup(self):
180180
self.period = self.dti.to_period("D")
181181

182182
def time_datetime_field_year(self):
183-
@test_parallel(num_threads=2)
183+
@run_parallel(num_threads=2)
184184
def run(dti):
185185
dti.year
186186

187187
run(self.dti)
188188

189189
def time_datetime_field_day(self):
190-
@test_parallel(num_threads=2)
190+
@run_parallel(num_threads=2)
191191
def run(dti):
192192
dti.day
193193

194194
run(self.dti)
195195

196196
def time_datetime_field_daysinmonth(self):
197-
@test_parallel(num_threads=2)
197+
@run_parallel(num_threads=2)
198198
def run(dti):
199199
dti.days_in_month
200200

201201
run(self.dti)
202202

203203
def time_datetime_field_normalize(self):
204-
@test_parallel(num_threads=2)
204+
@run_parallel(num_threads=2)
205205
def run(dti):
206206
dti.normalize()
207207

208208
run(self.dti)
209209

210210
def time_datetime_to_period(self):
211-
@test_parallel(num_threads=2)
211+
@run_parallel(num_threads=2)
212212
def run(dti):
213213
dti.to_period("s")
214214

215215
run(self.dti)
216216

217217
def time_period_to_datetime(self):
218-
@test_parallel(num_threads=2)
218+
@run_parallel(num_threads=2)
219219
def run(period):
220220
period.to_timestamp()
221221

@@ -232,7 +232,7 @@ def setup(self, method):
232232
if hasattr(DataFrame, "rolling"):
233233
df = DataFrame(arr).rolling(win)
234234

235-
@test_parallel(num_threads=2)
235+
@run_parallel(num_threads=2)
236236
def parallel_rolling():
237237
getattr(df, method)()
238238

@@ -249,7 +249,7 @@ def parallel_rolling():
249249
"std": rolling_std,
250250
}
251251

252-
@test_parallel(num_threads=2)
252+
@run_parallel(num_threads=2)
253253
def parallel_rolling():
254254
rolling[method](arr, win)
255255

@@ -286,7 +286,7 @@ def setup(self, dtype):
286286
self.fname = f"__test_{dtype}__.csv"
287287
df.to_csv(self.fname)
288288

289-
@test_parallel(num_threads=2)
289+
@run_parallel(num_threads=2)
290290
def parallel_read_csv():
291291
read_csv(self.fname)
292292

@@ -305,7 +305,7 @@ class ParallelFactorize:
305305
def setup(self, threads):
306306
strings = Index([f"i-{i}" for i in range(100000)], dtype=object)
307307

308-
@test_parallel(num_threads=threads)
308+
@run_parallel(num_threads=threads)
309309
def parallel():
310310
factorize(strings)
311311

asv_bench/benchmarks/io/csv.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,25 @@ def time_frame(self, kind):
5353
self.df.to_csv(self.fname)
5454

5555

56+
class ToCSVFloatFormatVariants(BaseIO):
57+
fname = "__test__.csv"
58+
59+
def setup(self):
60+
self.df = DataFrame(np.random.default_rng(seed=42).random((1000, 1000)))
61+
62+
def time_old_style_percent_format(self):
63+
self.df.to_csv(self.fname, float_format="%.6f")
64+
65+
def time_new_style_brace_format(self):
66+
self.df.to_csv(self.fname, float_format="{:.6f}")
67+
68+
def time_new_style_thousands_format(self):
69+
self.df.to_csv(self.fname, float_format="{:,.2f}")
70+
71+
def time_callable_format(self):
72+
self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}")
73+
74+
5675
class ToCSVMultiIndexUnusedLevels(BaseIO):
5776
fname = "__test__.csv"
5877

ci/code_checks.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
5858

5959
MSG='Python and Cython Doctests' ; echo "$MSG"
6060
python -c 'import pandas as pd; pd.test(run_doctests=True)'
61-
RET=$(($RET + $?)) ; echo "$MSG" "DONE"
61+
# TEMP don't let doctests fail the build until all string dtype changes are fixed
62+
# RET=$(($RET + $?)) ; echo "$MSG" "DONE"
63+
echo "$MSG" "DONE"
6264

6365
fi
6466

@@ -72,6 +74,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7274
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
7375
-i "pandas.Period.freq GL08" \
7476
-i "pandas.Period.ordinal GL08" \
77+
-i "pandas.errors.IncompatibleFrequency SA01,SS06,EX01" \
7578
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
7679
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
7780
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \

ci/deps/actions-310-minimum_versions.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ dependencies:
2222

2323
# required dependencies
2424
- python-dateutil=2.8.2
25-
- numpy=1.23.5
25+
- numpy=1.26.0
2626

2727
# optional dependencies
2828
- beautifulsoup4=4.12.3
@@ -62,4 +62,4 @@ dependencies:
6262
- pip:
6363
- adbc-driver-postgresql==0.10.0
6464
- adbc-driver-sqlite==0.8.0
65-
- tzdata==2022.7
65+
- tzdata==2023.3

ci/deps/actions-310.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,4 @@ dependencies:
6060
- pip:
6161
- adbc-driver-postgresql>=0.10.0
6262
- adbc-driver-sqlite>=0.8.0
63-
- tzdata>=2022.7
63+
- tzdata>=2023.3

ci/deps/actions-311-downstream_compat.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,4 @@ dependencies:
7373
- pip:
7474
- adbc-driver-postgresql>=0.10.0
7575
- adbc-driver-sqlite>=0.8.0
76-
- tzdata>=2022.7
76+
- tzdata>=2023.3

0 commit comments

Comments
 (0)