Skip to content

Commit 212c9f2

Browse files
shoyerXarray-Beam authors
authored andcommitted
Improve xarray_beam.Dataset __repr__.
The `__repr__` now replaces dask array representations within the template with `...` for brevity and clarity, as the dask chunks don't necessarily match the `xarray_beam` chunks. The test is updated to check for a more complete and accurate representation. PiperOrigin-RevId: 828189621
1 parent 7d98a7d commit 212c9f2

File tree

3 files changed

+19
-13
lines changed

3 files changed

+19
-13
lines changed

xarray_beam/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,4 @@
5555
DatasetToZarr as DatasetToZarr,
5656
)
5757

58-
__version__ = '0.11.4' # automatically synchronized to pyproject.toml
58+
__version__ = '0.11.5' # automatically synchronized to pyproject.toml

xarray_beam/_src/dataset.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,10 @@ def chunk_count(self) -> int:
537537
)
538538

539539
def __repr__(self):
540-
base = repr(self.template)
540+
template_repr = repr(self.template)
541+
# replace dask.array reprs with ..., both for the sake of brevity and
542+
# because the dask chunks are not the same as the Dataset chunks.
543+
template_repr = re.sub(r'dask.array\<.*\>', '...', template_repr)
541544
chunks_str = ', '.join(
542545
[f'{k}: {v}' for k, v in self.chunks.items()]
543546
+ [f'split_vars={self.split_vars}']
@@ -551,7 +554,7 @@ def __repr__(self):
551554
f'PTransform: {self._ptransform}\n'
552555
f'Chunks: {chunk_size} ({chunks_str})\n'
553556
f'Template: {total_size} ({chunk_count} chunk{plural})\n'
554-
+ textwrap.indent('\n'.join(base.split('\n')[1:]), ' ' * 4)
557+
+ textwrap.indent('\n'.join(template_repr.split('\n')[1:]), ' ' * 4)
555558
)
556559

557560
@classmethod

xarray_beam/_src/dataset_test.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import re
15+
import textwrap
1516

1617
from absl.testing import absltest
1718
from absl.testing import parameterized
1819
import apache_beam as beam
1920
import numpy as np
20-
import pandas as pd
2121
import xarray
2222
import xarray_beam as xbeam
2323
from xarray_beam._src import dataset as xbeam_dataset
@@ -475,16 +475,19 @@ class DatasetTest(test_util.TestCase):
475475

476476
def test_repr(self):
477477
ds = xarray.Dataset({'foo': ('x', np.arange(10))})
478-
beam_ds = xbeam.Dataset.from_xarray(ds, {'x': 5})
479-
self.assertRegex(
478+
beam_ds = xbeam.Dataset.from_xarray(ds, {'x': 5}, label='my_label')
479+
self.assertEqual(
480480
repr(beam_ds),
481-
re.escape(
482-
'<xarray_beam.Dataset>\n'
483-
'PTransform: <DatasetToChunks>\n'
484-
'Chunks: 40B (x: 5, split_vars=False)\n'
485-
'Template: 80B (2 chunks)\n'
486-
' Dimensions:'
487-
).replace('DatasetToChunks', 'DatasetToChunks.*'),
481+
textwrap.dedent("""\
482+
<xarray_beam.Dataset>
483+
PTransform: <DatasetToChunks(PTransform) label=[my_label]>
484+
Chunks: 40B (x: 5, split_vars=False)
485+
Template: 80B (2 chunks)
486+
Dimensions: (x: 10)
487+
Dimensions without coordinates: x
488+
Data variables:
489+
foo (x) int64 80B ...
490+
""").strip(),
488491
)
489492

490493
def test_from_ptransform(self):

0 commit comments

Comments
 (0)