Skip to content

Commit 9763263

Browse files
committed
feat: WIP generic vector agg/apply methods
1 parent f8f79ad commit 9763263

File tree

2 files changed

+165
-43
lines changed

2 files changed

+165
-43
lines changed

docs/overview.ipynb

Lines changed: 17 additions & 17 deletions
Large diffs are not rendered by default.

multilandpy/core.py

Lines changed: 148 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import shutil
44
import tempfile
55
import warnings
6-
from collections.abc import Callable, Iterable
6+
from collections.abc import Callable, Iterable, Mapping, Sequence
77
from os import path
88

99
import geopandas as gpd
@@ -159,6 +159,151 @@ def building_gdf(self) -> gpd.GeoDataFrame | None:
159159
).to_crs(self.CRS)
160160
return self._building_gdf
161161

162+
@staticmethod
163+
def _compute_vector(
164+
gdf: gpd.GeoDataFrame | gpd.GeoSeries | utils.PathType,
165+
site_gser: gpd.GeoSeries,
166+
buffer_dists: Iterable[float],
167+
_site_gb_to_feature_ser: Callable,
168+
*args,
169+
**kwargs: utils.KwargsType,
170+
):
171+
# TODO: DRY with `_multiscale_raster_feature_df`
172+
site_index_name = site_gser.index.name
173+
if site_index_name is None:
174+
site_index_name = "site_id"
175+
site_gser = site_gser.rename_axis(site_index_name)
176+
177+
feature_dfs = []
178+
for buffer_dist in buffer_dists:
179+
site_gb = (
180+
site_gser.buffer(buffer_dist)
181+
.to_frame(name="geometry")
182+
.sjoin(gdf)
183+
.reset_index(site_index_name)
184+
.groupby(by=site_index_name)
185+
)
186+
feature_dfs.append(
187+
_site_gb_to_feature_ser(site_gb, *args, **kwargs).assign(
188+
buffer_dist=buffer_dist
189+
)
190+
)
191+
192+
return (
193+
pd.concat(
194+
feature_dfs,
195+
axis="rows",
196+
)
197+
.fillna(0)
198+
.set_index("buffer_dist", append=True)
199+
.sort_index()
200+
)
201+
202+
@staticmethod
203+
def compute_vector_agg(
204+
data: gpd.GeoDataFrame | gpd.GeoSeries | utils.PathType,
205+
site_gser: gpd.GeoSeries,
206+
buffer_dists: Iterable[float],
207+
agg_func: Callable | str | Sequence | Mapping,
208+
*,
209+
agg_func_args: Sequence | None = None,
210+
**agg_func_kwargs: utils.KwargsType,
211+
):
212+
"""Compute vector aggregation features.
213+
214+
Parameters
215+
----------
216+
data : geopandas.GeoDataFrame, geopandas.GeoSeries or path-like
217+
Vector data to compute features.
218+
site_gser : geopandas.GeoSeries
219+
Site locations (point geometries) to compute features.
220+
buffer_dists : iterable of numeric
221+
The buffer distances to compute features, in the same units as the tree
222+
canopy raster CRS.
223+
agg_func : callable, str, sequence or mapping
224+
The aggregation to apply to the data, passed to the `agg` method of the
225+
group-by object This can be a callable, a string (e.g., "sum", "mean"),
226+
a sequence of strings (e.g., ["sum", "mean"]), or a mapping of column names
227+
to aggregation functions (e.g., {"building_area": "sum"}).
228+
agg_func_args, agg_func_kwargs : sequence (args), mapping (kwargs), optional
229+
Additional arguments and keyword arguments to pass to the aggregation
230+
function.
231+
232+
Returns
233+
-------
234+
features : pandas.DataFrame or pandas.Series
235+
The computed features for each site (first-level index) and buffer distance
236+
(second-level index), as a series (for a single feature) or data frame (for
237+
multiple features).
238+
"""
239+
if agg_func_args is None:
240+
agg_func_args = []
241+
if agg_func_kwargs is None:
242+
agg_func_kwargs = {}
243+
244+
def _agg(site_gb):
245+
return site_gb.agg(agg_func, *agg_func_args, **agg_func_kwargs)
246+
247+
return MultiScaleFeatureComputer._compute_vector(
248+
data, site_gser, buffer_dists, _agg
249+
)
250+
251+
@staticmethod
252+
def compute_vector_apply(
253+
data: gpd.GeoDataFrame | gpd.GeoSeries | utils.PathType,
254+
site_gser: gpd.GeoSeries,
255+
buffer_dists: Iterable[float],
256+
apply_func: Callable,
257+
*,
258+
include_groups: bool = False,
259+
apply_func_args: Sequence | None = None,
260+
**apply_func_kwargs: utils.KwargsType,
261+
):
262+
"""Compute vector features using a groupby-apply approach.
263+
264+
Parameters
265+
----------
266+
data : geopandas.GeoDataFrame, geopandas.GeoSeries or path-like
267+
Vector data to compute features.
268+
site_gser : geopandas.GeoSeries
269+
Site locations (point geometries) to compute features.
270+
buffer_dists : iterable of numeric
271+
The buffer distances to compute features, in the same units as the tree
272+
canopy raster CRS.
273+
apply_func : callable
274+
The function to apply to the data, passed to the `apply` method of the
275+
group-by object.
276+
include_groups : bool, default False
277+
If True, apply the function to the groupings in the case that they are
278+
columns of the DataFrame (passed as the `include_groups` argument of the
279+
`apply` method).
280+
apply_func_kwargs : mapping, optional
281+
Keyword arguments to pass to `apply_func`.
282+
283+
Returns
284+
-------
285+
features : pandas.DataFrame or pandas.Series
286+
The computed features for each site (first-level index) and buffer distance
287+
(second-level index), as a series (for a single feature) or data frame (for
288+
multiple features).
289+
"""
290+
if apply_func_args is None:
291+
apply_func_args = []
292+
if apply_func_kwargs is None:
293+
apply_func_kwargs = {}
294+
295+
def _apply(site_gb):
296+
return site_gb.progress_apply(
297+
apply_func,
298+
*apply_func_args,
299+
include_groups=include_groups,
300+
**apply_func_kwargs,
301+
)
302+
303+
return MultiScaleFeatureComputer._compute_vector(
304+
data, site_gser, buffer_dists, _apply
305+
)
306+
162307
def compute_building_features(
163308
self,
164309
site_gser: gpd.GeoSeries,
@@ -223,31 +368,8 @@ def _compute_building_area_vol(_building_gdf):
223368
else:
224369
_compute_features = _compute_building_area
225370

226-
# TODO: DRY with `_multiscale_raster_feature_df`
227-
site_index_name = site_gser.index.name
228-
if site_index_name is None:
229-
site_index_name = "site_id"
230-
site_gser = site_gser.rename_axis(site_index_name)
231-
232-
return (
233-
pd.concat(
234-
[
235-
(
236-
site_gser.buffer(buffer_dist)
237-
.to_frame(name="geometry")
238-
.sjoin(building_gdf)
239-
.reset_index(site_index_name)
240-
.groupby(by=site_index_name)
241-
.progress_apply(_compute_features, include_groups=False)
242-
# / (np.pi * buffer_dist**2)
243-
).assign(buffer_dist=buffer_dist)
244-
for buffer_dist in buffer_dists
245-
],
246-
axis="rows",
247-
)
248-
.fillna(0)
249-
.set_index("buffer_dist", append=True)
250-
.sort_index()
371+
return MultiScaleFeatureComputer.compute_vector_apply(
372+
building_gdf, site_gser, buffer_dists, _compute_features
251373
)
252374

253375
@staticmethod

0 commit comments

Comments
 (0)