|
3 | 3 | import shutil
|
4 | 4 | import tempfile
|
5 | 5 | import warnings
|
6 |
| -from collections.abc import Callable, Iterable |
| 6 | +from collections.abc import Callable, Iterable, Mapping, Sequence |
7 | 7 | from os import path
|
8 | 8 |
|
9 | 9 | import geopandas as gpd
|
@@ -159,6 +159,151 @@ def building_gdf(self) -> gpd.GeoDataFrame | None:
|
159 | 159 | ).to_crs(self.CRS)
|
160 | 160 | return self._building_gdf
|
161 | 161 |
|
| 162 | + @staticmethod |
| 163 | + def _compute_vector( |
| 164 | + gdf: gpd.GeoDataFrame | gpd.GeoSeries | utils.PathType, |
| 165 | + site_gser: gpd.GeoSeries, |
| 166 | + buffer_dists: Iterable[float], |
| 167 | + _site_gb_to_feature_ser: Callable, |
| 168 | + *args, |
| 169 | + **kwargs: utils.KwargsType, |
| 170 | + ): |
| 171 | + # TODO: DRY with `_multiscale_raster_feature_df` |
| 172 | + site_index_name = site_gser.index.name |
| 173 | + if site_index_name is None: |
| 174 | + site_index_name = "site_id" |
| 175 | + site_gser = site_gser.rename_axis(site_index_name) |
| 176 | + |
| 177 | + feature_dfs = [] |
| 178 | + for buffer_dist in buffer_dists: |
| 179 | + site_gb = ( |
| 180 | + site_gser.buffer(buffer_dist) |
| 181 | + .to_frame(name="geometry") |
| 182 | + .sjoin(gdf) |
| 183 | + .reset_index(site_index_name) |
| 184 | + .groupby(by=site_index_name) |
| 185 | + ) |
| 186 | + feature_dfs.append( |
| 187 | + _site_gb_to_feature_ser(site_gb, *args, **kwargs).assign( |
| 188 | + buffer_dist=buffer_dist |
| 189 | + ) |
| 190 | + ) |
| 191 | + |
| 192 | + return ( |
| 193 | + pd.concat( |
| 194 | + feature_dfs, |
| 195 | + axis="rows", |
| 196 | + ) |
| 197 | + .fillna(0) |
| 198 | + .set_index("buffer_dist", append=True) |
| 199 | + .sort_index() |
| 200 | + ) |
| 201 | + |
| 202 | + @staticmethod |
| 203 | + def compute_vector_agg( |
| 204 | + data: gpd.GeoDataFrame | gpd.GeoSeries | utils.PathType, |
| 205 | + site_gser: gpd.GeoSeries, |
| 206 | + buffer_dists: Iterable[float], |
| 207 | + agg_func: Callable | str | Sequence | Mapping, |
| 208 | + *, |
| 209 | + agg_func_args: Sequence | None = None, |
| 210 | + **agg_func_kwargs: utils.KwargsType, |
| 211 | + ): |
| 212 | + """Compute vector aggregation features. |
| 213 | +
|
| 214 | + Parameters |
| 215 | + ---------- |
| 216 | + data : geopandas.GeoDataFrame, geopandas.GeoSeries or path-like |
| 217 | + Vector data to compute features. |
| 218 | + site_gser : geopandas.GeoSeries |
| 219 | + Site locations (point geometries) to compute features. |
| 220 | + buffer_dists : iterable of numeric |
| 221 | + The buffer distances to compute features, in the same units as the tree |
| 222 | + canopy raster CRS. |
| 223 | + agg_func : callable, str, sequence or mapping |
| 224 | + The aggregation to apply to the data, passed to the `agg` method of the |
| 225 | + group-by object This can be a callable, a string (e.g., "sum", "mean"), |
| 226 | + a sequence of strings (e.g., ["sum", "mean"]), or a mapping of column names |
| 227 | + to aggregation functions (e.g., {"building_area": "sum"}). |
| 228 | + agg_func_args, agg_func_kwargs : sequence (args), mapping (kwargs), optional |
| 229 | + Additional arguments and keyword arguments to pass to the aggregation |
| 230 | + function. |
| 231 | +
|
| 232 | + Returns |
| 233 | + ------- |
| 234 | + features : pandas.DataFrame or pandas.Series |
| 235 | + The computed features for each site (first-level index) and buffer distance |
| 236 | + (second-level index), as a series (for a single feature) or data frame (for |
| 237 | + multiple features). |
| 238 | + """ |
| 239 | + if agg_func_args is None: |
| 240 | + agg_func_args = [] |
| 241 | + if agg_func_kwargs is None: |
| 242 | + agg_func_kwargs = {} |
| 243 | + |
| 244 | + def _agg(site_gb): |
| 245 | + return site_gb.agg(agg_func, *agg_func_args, **agg_func_kwargs) |
| 246 | + |
| 247 | + return MultiScaleFeatureComputer._compute_vector( |
| 248 | + data, site_gser, buffer_dists, _agg |
| 249 | + ) |
| 250 | + |
| 251 | + @staticmethod |
| 252 | + def compute_vector_apply( |
| 253 | + data: gpd.GeoDataFrame | gpd.GeoSeries | utils.PathType, |
| 254 | + site_gser: gpd.GeoSeries, |
| 255 | + buffer_dists: Iterable[float], |
| 256 | + apply_func: Callable, |
| 257 | + *, |
| 258 | + include_groups: bool = False, |
| 259 | + apply_func_args: Sequence | None = None, |
| 260 | + **apply_func_kwargs: utils.KwargsType, |
| 261 | + ): |
| 262 | + """Compute vector features using a groupby-apply approach. |
| 263 | +
|
| 264 | + Parameters |
| 265 | + ---------- |
| 266 | + data : geopandas.GeoDataFrame, geopandas.GeoSeries or path-like |
| 267 | + Vector data to compute features. |
| 268 | + site_gser : geopandas.GeoSeries |
| 269 | + Site locations (point geometries) to compute features. |
| 270 | + buffer_dists : iterable of numeric |
| 271 | + The buffer distances to compute features, in the same units as the tree |
| 272 | + canopy raster CRS. |
| 273 | + apply_func : callable |
| 274 | + The function to apply to the data, passed to the `apply` method of the |
| 275 | + group-by object. |
| 276 | + include_groups : bool, default False |
| 277 | + If True, apply the function to the groupings in the case that they are |
| 278 | + columns of the DataFrame (passed as the `include_groups` argument of the |
| 279 | + `apply` method). |
| 280 | + apply_func_kwargs : mapping, optional |
| 281 | + Keyword arguments to pass to `apply_func`. |
| 282 | +
|
| 283 | + Returns |
| 284 | + ------- |
| 285 | + features : pandas.DataFrame or pandas.Series |
| 286 | + The computed features for each site (first-level index) and buffer distance |
| 287 | + (second-level index), as a series (for a single feature) or data frame (for |
| 288 | + multiple features). |
| 289 | + """ |
| 290 | + if apply_func_args is None: |
| 291 | + apply_func_args = [] |
| 292 | + if apply_func_kwargs is None: |
| 293 | + apply_func_kwargs = {} |
| 294 | + |
| 295 | + def _apply(site_gb): |
| 296 | + return site_gb.progress_apply( |
| 297 | + apply_func, |
| 298 | + *apply_func_args, |
| 299 | + include_groups=include_groups, |
| 300 | + **apply_func_kwargs, |
| 301 | + ) |
| 302 | + |
| 303 | + return MultiScaleFeatureComputer._compute_vector( |
| 304 | + data, site_gser, buffer_dists, _apply |
| 305 | + ) |
| 306 | + |
162 | 307 | def compute_building_features(
|
163 | 308 | self,
|
164 | 309 | site_gser: gpd.GeoSeries,
|
@@ -223,31 +368,8 @@ def _compute_building_area_vol(_building_gdf):
|
223 | 368 | else:
|
224 | 369 | _compute_features = _compute_building_area
|
225 | 370 |
|
226 |
| - # TODO: DRY with `_multiscale_raster_feature_df` |
227 |
| - site_index_name = site_gser.index.name |
228 |
| - if site_index_name is None: |
229 |
| - site_index_name = "site_id" |
230 |
| - site_gser = site_gser.rename_axis(site_index_name) |
231 |
| - |
232 |
| - return ( |
233 |
| - pd.concat( |
234 |
| - [ |
235 |
| - ( |
236 |
| - site_gser.buffer(buffer_dist) |
237 |
| - .to_frame(name="geometry") |
238 |
| - .sjoin(building_gdf) |
239 |
| - .reset_index(site_index_name) |
240 |
| - .groupby(by=site_index_name) |
241 |
| - .progress_apply(_compute_features, include_groups=False) |
242 |
| - # / (np.pi * buffer_dist**2) |
243 |
| - ).assign(buffer_dist=buffer_dist) |
244 |
| - for buffer_dist in buffer_dists |
245 |
| - ], |
246 |
| - axis="rows", |
247 |
| - ) |
248 |
| - .fillna(0) |
249 |
| - .set_index("buffer_dist", append=True) |
250 |
| - .sort_index() |
| 371 | + return MultiScaleFeatureComputer.compute_vector_apply( |
| 372 | + building_gdf, site_gser, buffer_dists, _compute_features |
251 | 373 | )
|
252 | 374 |
|
253 | 375 | @staticmethod
|
|
0 commit comments