Skip to content

Commit a755269

Browse files
tomwhitemergify[bot]
authored andcommitted
Change Dataset.dims to Dataset.sizes to address Xarray FutureWarning
1 parent 2ab47b5 commit a755269

31 files changed

+97
-97
lines changed

sgkit/display.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def truncate(ds: xr.Dataset, max_sizes: Mapping[Hashable, int]) -> xr.Dataset:
125125
"""
126126
sel = dict()
127127
for dim, size in max_sizes.items():
128-
if ds.dims[dim] <= size:
128+
if ds.sizes[dim] <= size:
129129
# No truncation required
130130
pass
131131
else:
@@ -194,7 +194,7 @@ def display_genotypes(
194194
ds_calls = set_index_if_unique(ds_calls, "variants", variant_index)
195195
# convert call genotypes to strings
196196
calls = ds_calls["call_genotype"].values
197-
max_chars = max(2, len(str(ds.dims["alleles"] - 1)))
197+
max_chars = max(2, len(str(ds.sizes["alleles"] - 1)))
198198
if "call_genotype_phased" in ds_calls:
199199
phased = ds_calls["call_genotype_phased"].values
200200
else:

sgkit/io/bgen/bgen_reader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -486,8 +486,8 @@ def rechunk_bgen(
486486
if isinstance(output, Path):
487487
output = str(output)
488488

489-
chunk_length = min(chunk_length, ds.dims["variants"])
490-
chunk_width = min(chunk_width, ds.dims["samples"])
489+
chunk_length = min(chunk_length, ds.sizes["variants"])
490+
chunk_width = min(chunk_width, ds.sizes["samples"])
491491

492492
if pack:
493493
ds = pack_variables(ds)

sgkit/io/plink/plink_writer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ def write_plink(
9090
raise ValueError(
9191
"Either `path` or all 3 of `{bed,bim,fam}_path` must be specified but not both"
9292
)
93-
if "ploidy" in ds.dims and ds.dims["ploidy"] != 2:
93+
if "ploidy" in ds.sizes and ds.sizes["ploidy"] != 2:
9494
raise ValueError("write_plink only works for diploid genotypes")
95-
if "alleles" in ds.dims and ds.dims["alleles"] != 2:
95+
if "alleles" in ds.sizes and ds.sizes["alleles"] != 2:
9696
raise ValueError("write_plink only works for biallelic genotypes")
9797

9898
if path:

sgkit/io/vcf/vcf_reader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -571,8 +571,8 @@ def vcf_to_zarr_sequential(
571571

572572
if first_variants_chunk:
573573
# limit chunk width to actual number of samples seen in first chunk
574-
if ds.dims["samples"] > 0:
575-
chunk_width = min(chunk_width, ds.dims["samples"])
574+
if ds.sizes["samples"] > 0:
575+
chunk_width = min(chunk_width, ds.sizes["samples"])
576576

577577
# ensure that booleans are not stored as int8 by xarray https://github.com/pydata/xarray/issues/4386
578578
for var in ds.data_vars:

sgkit/io/vcf/vcf_writer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def write_vcf(
147147

148148
print(vcf_header, end="", file=output)
149149

150-
if input.dims["variants"] == 0:
150+
if input.sizes["variants"] == 0:
151151
return
152152

153153
header_info_fields = _info_fields(vcf_header)
@@ -174,8 +174,8 @@ def dataset_chunk_to_vcf(
174174

175175
ds = ds.load() # load dataset chunk into memory
176176

177-
n_variants = ds.dims["variants"] # number of variants in this chunk
178-
n_samples = ds.dims["samples"] # number of samples in whole dataset
177+
n_variants = ds.sizes["variants"] # number of variants in this chunk
178+
n_samples = ds.sizes["samples"] # number of samples in whole dataset
179179

180180
# fixed fields
181181

sgkit/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,8 @@ def create_genotype_dosage_dataset(
169169

170170
def num_contigs(ds: xr.Dataset) -> ArrayLike:
171171
"""Return the number of contigs in a dataset."""
172-
if DIM_CONTIG in ds.dims:
173-
return ds.dims[DIM_CONTIG]
172+
if DIM_CONTIG in ds.sizes:
173+
return ds.sizes[DIM_CONTIG]
174174
else:
175175
return len(ds.attrs["contigs"])
176176

sgkit/stats/aggregation.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def count_call_alleles(
7575
from .aggregation_numba_fns import count_alleles
7676

7777
variables.validate(ds, {call_genotype: variables.call_genotype_spec})
78-
n_alleles = ds.dims["alleles"]
78+
n_alleles = ds.sizes["alleles"]
7979
G = da.asarray(ds[call_genotype])
8080
shape = (G.chunks[0], G.chunks[1], n_alleles)
8181
# use numpy array to avoid dask task dependencies between chunks
@@ -170,8 +170,8 @@ def count_variant_alleles(
170170
from .aggregation_numba_fns import count_alleles
171171

172172
variables.validate(ds, {call_genotype: variables.call_genotype_spec})
173-
n_alleles = ds.dims["alleles"]
174-
n_variant = ds.dims["variants"]
173+
n_alleles = ds.sizes["alleles"]
174+
n_variant = ds.sizes["variants"]
175175
G = da.asarray(ds[call_genotype]).reshape((n_variant, -1))
176176
shape = (G.chunks[0], n_alleles)
177177
# use uint64 dummy array to return uin64 counts array
@@ -227,7 +227,7 @@ def count_cohort_alleles(
227227
>>> ds = sg.simulate_genotype_call_dataset(n_variant=5, n_sample=4)
228228
229229
>>> # Divide samples into two cohorts
230-
>>> ds["sample_cohort"] = xr.DataArray(np.repeat([0, 1], ds.dims["samples"] // 2), dims="samples")
230+
>>> ds["sample_cohort"] = xr.DataArray(np.repeat([0, 1], ds.sizes["samples"] // 2), dims="samples")
231231
>>> sg.display_genotypes(ds) # doctest: +NORMALIZE_WHITESPACE
232232
samples S0 S1 S2 S3
233233
variants
@@ -364,8 +364,8 @@ def count_variant_genotypes(
364364
mixed_ploidy = ds[call_genotype].attrs.get("mixed_ploidy", False)
365365
if mixed_ploidy:
366366
raise ValueError("Mixed-ploidy dataset")
367-
ploidy = ds.dims["ploidy"]
368-
n_alleles = ds.dims["alleles"]
367+
ploidy = ds.sizes["ploidy"]
368+
n_alleles = ds.sizes["alleles"]
369369
n_genotypes = _comb_with_replacement(n_alleles, ploidy)
370370
G = da.asarray(ds[call_genotype].data)
371371
N = np.empty(n_genotypes, np.uint64)
@@ -432,8 +432,8 @@ def genotype_coords(
432432
"""
433433
from .conversion_numba_fns import _comb_with_replacement, _index_as_genotype
434434

435-
n_alleles = ds.dims["alleles"]
436-
ploidy = ds.dims["ploidy"]
435+
n_alleles = ds.sizes["alleles"]
436+
ploidy = ds.sizes["ploidy"]
437437
n_genotypes = _comb_with_replacement(n_alleles, ploidy)
438438
max_chars = len(str(n_alleles - 1))
439439
# dummy variable for ploidy dim also specifies output dtype
@@ -553,7 +553,7 @@ def cohort_allele_frequencies(
553553
>>> ds = sg.simulate_genotype_call_dataset(n_variant=5, n_sample=4)
554554
555555
>>> # Divide samples into two cohorts
556-
>>> ds["sample_cohort"] = xr.DataArray(np.repeat([0, 1], ds.dims["samples"] // 2), dims="samples")
556+
>>> ds["sample_cohort"] = xr.DataArray(np.repeat([0, 1], ds.sizes["samples"] // 2), dims="samples")
557557
>>> sg.display_genotypes(ds) # doctest: +NORMALIZE_WHITESPACE
558558
samples S0 S1 S2 S3
559559
variants

sgkit/stats/association.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def gwas_linear_regression(
210210

211211
if len(covariates) == 0:
212212
if add_intercept:
213-
X = da.ones((ds.dims["samples"], 1), dtype=np.float32)
213+
X = da.ones((ds.sizes["samples"], 1), dtype=np.float32)
214214
else:
215215
raise ValueError("add_intercept must be True if no covariates specified")
216216
else:

sgkit/stats/conversion.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def convert_call_to_index(
100100
raise ValueError("Mixed-ploidy dataset")
101101
G = da.asarray(ds[call_genotype].data)
102102
shape = G.chunks[0:2]
103-
if ds.dims.get("alleles") == 2: # default to general case
103+
if ds.sizes.get("alleles") == 2: # default to general case
104104
X = da.map_blocks(
105105
biallelic_genotype_call_index,
106106
G,
@@ -169,10 +169,10 @@ def convert_probability_to_call(
169169
variables.validate(
170170
ds, {call_genotype_probability: variables.call_genotype_probability_spec}
171171
)
172-
if ds.dims["genotypes"] != 3:
172+
if ds.sizes["genotypes"] != 3:
173173
raise NotImplementedError(
174174
f"Hard call conversion only supported for diploid, biallelic genotypes; "
175-
f"num genotypes in provided probabilities array = {ds.dims['genotypes']}."
175+
f"num genotypes in provided probabilities array = {ds.sizes['genotypes']}."
176176
)
177177
GP = da.asarray(ds[call_genotype_probability])
178178
# Remove chunking in genotypes dimension, if present

sgkit/stats/grm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def genomic_relationship(
176176
[1, 0, 0],
177177
[1, 1, 2]], dtype=uint8)
178178
>>> # use sample population frequency as ancestral frequency
179-
>>> ds["sample_frequency"] = ds.call_dosage.mean(dim="samples") / ds.dims["ploidy"]
179+
>>> ds["sample_frequency"] = ds.call_dosage.mean(dim="samples") / ds.sizes["ploidy"]
180180
>>> ds = sg.genomic_relationship(ds, ancestral_frequency="sample_frequency")
181181
>>> ds.stat_genomic_relationship.values # doctest: +NORMALIZE_WHITESPACE
182182
array([[ 0.93617021, -0.21276596, -0.72340426],
@@ -208,7 +208,7 @@ def genomic_relationship(
208208
[ 2., 2., 0., 0.]])
209209
>>> ds["sample_frequency"] = ds.call_dosage.mean(
210210
... dim="samples", skipna=True
211-
... ) / ds.dims["ploidy"]
211+
... ) / ds.sizes["ploidy"]
212212
>>> ds = sg.genomic_relationship(
213213
... ds, ancestral_frequency="sample_frequency", skipna=True
214214
... )
@@ -249,7 +249,7 @@ def genomic_relationship(
249249
[2. , 2. , 0. , 0. ]])
250250
>>> ds["sample_frequency"] = ds.call_dosage.mean(
251251
... dim="samples", skipna=True
252-
... ) / ds.dims["ploidy"]
252+
... ) / ds.sizes["ploidy"]
253253
>>> ds = sg.genomic_relationship(
254254
... ds,
255255
... call_dosage="call_dosage_imputed",
@@ -293,7 +293,7 @@ def genomic_relationship(
293293

294294
estimator = estimator or EST_VAN_RADEN
295295
# TODO: raise on mixed ploidy
296-
ploidy = ploidy or ds.dims.get("ploidy")
296+
ploidy = ploidy or ds.sizes.get("ploidy")
297297
if ploidy is None:
298298
raise ValueError("Ploidy must be specified when the ploidy dimension is absent")
299299
dosage = da.array(ds[call_dosage].data)

0 commit comments

Comments
 (0)