diff --git a/polytope_feature/datacube/backends/qubed_fdb.py b/polytope_feature/datacube/backends/qubed_fdb.py deleted file mode 100644 index c3b339b2..00000000 --- a/polytope_feature/datacube/backends/qubed_fdb.py +++ /dev/null @@ -1,407 +0,0 @@ -import logging -import operator -from copy import deepcopy -from itertools import product - -import requests -from qubed import Qube - -from ...utility.exceptions import BadGridError, BadRequestError -from ...utility.geometry import nearest_pt -from .datacube import Datacube, TensorIndexTree - - -class FDBDatacube(Datacube): - def __init__( - self, gj, config=None, axis_options=None, compressed_axes_options=[], alternative_axes=[], context=None - ): - if config is None: - config = {} - if context is None: - context = {} - - super().__init__(axis_options, compressed_axes_options) - - logging.info("Created an FDB datacube with options: " + str(axis_options)) - - self.unwanted_path = {} - self.axis_options = axis_options - - # partial_request = config - # Find values in the level 3 FDB datacube - - self.gj = gj - self.fdb_tree = Qube.from_json( - requests.get( - "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json" - ).json() - ) - - if len(alternative_axes) == 0: - logging.info("Find GribJump axes for %s", context) - # TODO: change to get the axes from - # self.fdb_coordinates = self.gj.axes(partial_request, ctx=context) - self.fdb_coordinates = self.fdb_tree.axes() - logging.info("Retrieved available GribJump axes for %s", context) - if len(self.fdb_coordinates) == 0: - raise BadRequestError({}) - else: - self.fdb_coordinates = {} - for axis_config in alternative_axes: - self.fdb_coordinates[axis_config.axis_name] = axis_config.values - - fdb_coordinates_copy = deepcopy(self.fdb_coordinates) - for axis, vals in fdb_coordinates_copy.items(): - if len(vals) == 1: - if vals[0] == "": - self.fdb_coordinates.pop(axis) - - logging.info("Axes returned from GribJump are: " + str(self.fdb_coordinates)) - - self.fdb_coordinates["values"] = [] - for name, values in self.fdb_coordinates.items(): - values.sort() - options = None - for opt in self.axis_options: - if opt.axis_name == name: - options = opt - - self._check_and_add_axes(options, name, values) - self.treated_axes.append(name) - self.complete_axes.append(name) - - # add other options to axis which were just created above like "lat" for the mapper transformations for eg - for name in self._axes: - if name not in self.treated_axes: - options = None - for opt in self.axis_options: - if opt.axis_name == name: - options = opt - - val = self._axes[name].type - self._check_and_add_axes(options, name, val) - - logging.info("Polytope created axes for %s", self._axes.keys()) - - # def check_branching_axes(self, request): - # polytopes = request.polytopes() - # for polytope in polytopes: - # for ax in polytope._axes: - # if ax == "levtype": - # (upper, lower, idx) = polytope.extents(ax) - # if "sfc" in polytope.points[idx]: - # self.fdb_coordinates.pop("levelist", None) - - # if ax == "param": - # (upper, lower, idx) = polytope.extents(ax) - # if "140251" not in polytope.points[idx]: - # self.fdb_coordinates.pop("direction", None) - # self.fdb_coordinates.pop("frequency", None) - # else: - # # special param with direction and frequency - # if len(polytope.points[idx]) > 1: - # raise ValueError( - # "Param 251 is part of a special branching of the datacube. Please request it separately." # noqa: E501 - # ) - # self.fdb_coordinates.pop("quantile", None) - # self.fdb_coordinates.pop("year", None) - # self.fdb_coordinates.pop("month", None) - - # # NOTE: verify that we also remove the axis object for axes we've removed here - # axes_to_remove = set(self.complete_axes) - set(self.fdb_coordinates.keys()) - - # # Remove the keys from self._axes - # for axis_name in axes_to_remove: - # self._axes.pop(axis_name, None) - - def get(self, requests: TensorIndexTree, context=None): - if context is None: - context = {} - if len(requests.children) == 0: - return requests - fdb_requests = [] - fdb_requests_decoding_info = [] - self.get_fdb_requests(requests, fdb_requests, fdb_requests_decoding_info) - - # here, loop through the fdb requests and request from gj and directly add to the nodes - complete_list_complete_uncompressed_requests = [] - complete_fdb_decoding_info = [] - for j, compressed_request in enumerate(fdb_requests): - uncompressed_request = {} - - # Need to determine the possible decompressed requests - - # find the possible combinations of compressed indices - interm_branch_tuple_values = [] - for key in compressed_request[0].keys(): - interm_branch_tuple_values.append(compressed_request[0][key]) - request_combis = product(*interm_branch_tuple_values) - - # Need to extract the possible requests and add them to the right nodes - for combi in request_combis: - uncompressed_request = {} - for i, key in enumerate(compressed_request[0].keys()): - uncompressed_request[key] = combi[i] - complete_uncompressed_request = (uncompressed_request, compressed_request[1], self.grid_md5_hash) - complete_list_complete_uncompressed_requests.append(complete_uncompressed_request) - complete_fdb_decoding_info.append(fdb_requests_decoding_info[j]) - - if logging.root.level <= logging.DEBUG: - printed_list_to_gj = complete_list_complete_uncompressed_requests[::1000] - logging.debug("The requests we give GribJump are: %s", printed_list_to_gj) - logging.info("Requests given to GribJump extract for %s", context) - try: - output_values = self.gj.extract(complete_list_complete_uncompressed_requests, context) - except Exception as e: - if "BadValue: Grid hash mismatch" in str(e): - logging.info("Error is: %s", e) - raise BadGridError() - else: - raise e - - logging.info("Requests extracted from GribJump for %s", context) - if logging.root.level <= logging.DEBUG: - printed_output_values = output_values[::1000] - logging.debug("GribJump outputs: %s", printed_output_values) - self.assign_fdb_output_to_nodes(output_values, complete_fdb_decoding_info) - - def get_fdb_requests( - self, - requests: TensorIndexTree, - fdb_requests=[], - fdb_requests_decoding_info=[], - leaf_path=None, - ): - if leaf_path is None: - leaf_path = {} - - # First when request node is root, go to its children - if requests.axis.name == "root": - logging.debug("Looking for data for the tree") - - for c in requests.children: - self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info) - # If request node has no children, we have a leaf so need to assign fdb values to it - else: - key_value_path = {requests.axis.name: requests.values} - ax = requests.axis - (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( - key_value_path, leaf_path, self.unwanted_path - ) - leaf_path.update(key_value_path) - if len(requests.children[0].children[0].children) == 0: - # find the fdb_requests and associated nodes to which to add results - (path, current_start_idxs, fdb_node_ranges, lat_length) = self.get_2nd_last_values(requests, leaf_path) - ( - original_indices, - sorted_request_ranges, - fdb_node_ranges, - ) = self.sort_fdb_request_ranges(current_start_idxs, lat_length, fdb_node_ranges) - fdb_requests.append((path, sorted_request_ranges)) - fdb_requests_decoding_info.append((original_indices, fdb_node_ranges)) - - # Otherwise remap the path for this key and iterate again over children - else: - for c in requests.children: - self.get_fdb_requests(c, fdb_requests, fdb_requests_decoding_info, leaf_path) - - def remove_duplicates_in_request_ranges(self, fdb_node_ranges, current_start_idxs): - seen_indices = set() - for i, idxs_list in enumerate(current_start_idxs): - for k, sub_lat_idxs in enumerate(idxs_list): - actual_fdb_node = fdb_node_ranges[i][k] - original_fdb_node_range_vals = [] - new_current_start_idx = [] - for j, idx in enumerate(sub_lat_idxs): - if idx not in seen_indices: - # NOTE: need to remove it from the values in the corresponding tree node - # NOTE: need to read just the range we give to gj - original_fdb_node_range_vals.append(actual_fdb_node[0].values[j]) - seen_indices.add(idx) - new_current_start_idx.append(idx) - if original_fdb_node_range_vals != []: - actual_fdb_node[0].values = tuple(original_fdb_node_range_vals) - else: - # there are no values on this node anymore so can remove it - actual_fdb_node[0].remove_branch() - if len(new_current_start_idx) == 0: - current_start_idxs[i].pop(k) - else: - current_start_idxs[i][k] = new_current_start_idx - return (fdb_node_ranges, current_start_idxs) - - def nearest_lat_lon_search(self, requests): - if len(self.nearest_search) != 0: - first_ax_name = requests.children[0].axis.name - second_ax_name = requests.children[0].children[0].axis.name - - if first_ax_name not in self.nearest_search.keys() or second_ax_name not in self.nearest_search.keys(): - raise Exception("nearest point search axes are wrong") - - second_ax = requests.children[0].children[0].axis - - nearest_pts = [ - [lat_val, second_ax._remap_val_to_axis_range(lon_val)] - for (lat_val, lon_val) in zip( - self.nearest_search[first_ax_name][0], self.nearest_search[second_ax_name][0] - ) - ] - - found_latlon_pts = [] - for lat_child in requests.children: - for lon_child in lat_child.children: - found_latlon_pts.append([lat_child.values, lon_child.values]) - - # now find the nearest lat lon to the points requested - nearest_latlons = [] - for pt in nearest_pts: - nearest_latlon = nearest_pt(found_latlon_pts, pt) - nearest_latlons.append(nearest_latlon) - - # need to remove the branches that do not fit - lat_children_values = [child.values for child in requests.children] - for i in range(len(lat_children_values)): - lat_child_val = lat_children_values[i] - lat_child = [child for child in requests.children if child.values == lat_child_val][0] - if lat_child.values not in [(latlon[0],) for latlon in nearest_latlons]: - lat_child.remove_branch() - else: - possible_lons = [latlon[1] for latlon in nearest_latlons if (latlon[0],) == lat_child.values] - lon_children_values = [child.values for child in lat_child.children] - for j in range(len(lon_children_values)): - lon_child_val = lon_children_values[j] - lon_child = [child for child in lat_child.children if child.values == lon_child_val][0] - for value in lon_child.values: - if value not in possible_lons: - lon_child.remove_compressed_branch(value) - - def get_2nd_last_values(self, requests, leaf_path=None): - if leaf_path is None: - leaf_path = {} - # In this function, we recursively loop over the last two layers of the tree and store the indices of the - # request ranges in those layers - self.nearest_lat_lon_search(requests) - - lat_length = len(requests.children) - current_start_idxs = [False] * lat_length - fdb_node_ranges = [False] * lat_length - for i in range(len(requests.children)): - lat_child = requests.children[i] - lon_length = len(lat_child.children) - current_start_idxs[i] = [None] * lon_length - fdb_node_ranges[i] = [[TensorIndexTree.root for y in range(lon_length)] for x in range(lon_length)] - current_start_idx = deepcopy(current_start_idxs[i]) - fdb_range_nodes = deepcopy(fdb_node_ranges[i]) - key_value_path = {lat_child.axis.name: lat_child.values} - ax = lat_child.axis - (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( - key_value_path, leaf_path, self.unwanted_path - ) - leaf_path.update(key_value_path) - (current_start_idxs[i], fdb_node_ranges[i]) = self.get_last_layer_before_leaf( - lat_child, leaf_path, current_start_idx, fdb_range_nodes - ) - - leaf_path_copy = deepcopy(leaf_path) - leaf_path_copy.pop("values", None) - return (leaf_path_copy, current_start_idxs, fdb_node_ranges, lat_length) - - def get_last_layer_before_leaf(self, requests, leaf_path, current_idx, fdb_range_n): - current_idx = [[] for i in range(len(requests.children))] - fdb_range_n = [[] for i in range(len(requests.children))] - for i, c in enumerate(requests.children): - # now c are the leaves of the initial tree - key_value_path = {c.axis.name: c.values} - ax = c.axis - (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( - key_value_path, leaf_path, self.unwanted_path - ) - # TODO: change this to accommodate non consecutive indexes being compressed too - current_idx[i].extend(key_value_path["values"]) - fdb_range_n[i].append(c) - return (current_idx, fdb_range_n) - - def assign_fdb_output_to_nodes(self, output_values, fdb_requests_decoding_info): - for k in range(len(output_values)): - request_output_values = output_values[k] - ( - original_indices, - fdb_node_ranges, - ) = fdb_requests_decoding_info[k] - sorted_fdb_range_nodes = [fdb_node_ranges[i] for i in original_indices] - for i in range(len(sorted_fdb_range_nodes)): - n = sorted_fdb_range_nodes[i][0] - if len(request_output_values[0]) == 0: - # If we are here, no data was found for this path in the fdb - none_array = [None] * len(n.values) - n.result.extend(none_array) - else: - interm_request_output_values = request_output_values[0][i][0] - n.result.extend(interm_request_output_values) - - def sort_fdb_request_ranges(self, current_start_idx, lat_length, fdb_node_ranges): - (new_fdb_node_ranges, new_current_start_idx) = self.remove_duplicates_in_request_ranges( - fdb_node_ranges, current_start_idx - ) - interm_request_ranges = [] - # TODO: modify the start indexes to have as many arrays as the request ranges - new_fdb_node_ranges = [] - for i in range(lat_length): - interm_fdb_nodes = fdb_node_ranges[i] - old_interm_start_idx = current_start_idx[i] - for j in range(len(old_interm_start_idx)): - # TODO: if we sorted the cyclic values in increasing order on the tree too, - # then we wouldn't have to sort here? - sorted_list = sorted(enumerate(old_interm_start_idx[j]), key=lambda x: x[1]) - original_indices_idx, interm_start_idx = zip(*sorted_list) - for interm_fdb_nodes_obj in interm_fdb_nodes[j]: - interm_fdb_nodes_obj.values = tuple([interm_fdb_nodes_obj.values[k] for k in original_indices_idx]) - if abs(interm_start_idx[-1] + 1 - interm_start_idx[0]) <= len(interm_start_idx): - current_request_ranges = (interm_start_idx[0], interm_start_idx[-1] + 1) - interm_request_ranges.append(current_request_ranges) - new_fdb_node_ranges.append(interm_fdb_nodes[j]) - else: - jumps = list(map(operator.sub, interm_start_idx[1:], interm_start_idx[:-1])) - last_idx = 0 - for k, jump in enumerate(jumps): - if jump > 1: - current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[k] + 1) - new_fdb_node_ranges.append(interm_fdb_nodes[j]) - last_idx = k + 1 - interm_request_ranges.append(current_request_ranges) - if k == len(interm_start_idx) - 2: - current_request_ranges = (interm_start_idx[last_idx], interm_start_idx[-1] + 1) - interm_request_ranges.append(current_request_ranges) - new_fdb_node_ranges.append(interm_fdb_nodes[j]) - request_ranges_with_idx = list(enumerate(interm_request_ranges)) - sorted_list = sorted(request_ranges_with_idx, key=lambda x: x[1][0]) - original_indices, sorted_request_ranges = zip(*sorted_list) - return (original_indices, sorted_request_ranges, new_fdb_node_ranges) - - def datacube_natural_indexes(self, axis, subarray): - indexes = subarray.get(axis.name, None) - return indexes - - def select(self, path, unmapped_path): - return self.fdb_coordinates - - def ax_vals(self, name): - return self.fdb_coordinates.get(name, None) - - def prep_tree_encoding(self, node, unwanted_path=None): - # TODO: prepare the tree for protobuf encoding - # ie transform all axes for gribjump and adding the index property on the leaves - if unwanted_path is None: - unwanted_path = {} - - ax = node.axis - (new_node, unwanted_path) = ax.unmap_tree_node(node, unwanted_path) - - if len(node.children) != 0: - for c in new_node.children: - self.prep_tree_encoding(c, unwanted_path) - - def prep_tree_decoding(self, tree): - # TODO: transform the tree after decoding from protobuf - # ie unstransform all axes from gribjump and put the indexes back as a leaf/extra node - pass diff --git a/polytope_feature/engine/qubed_slicer.py b/polytope_feature/engine/qubed_slicer.py index fac8e961..081b2503 100644 --- a/polytope_feature/engine/qubed_slicer.py +++ b/polytope_feature/engine/qubed_slicer.py @@ -13,6 +13,7 @@ ) from .engine import Engine from .slicing_tools import slice +from copy import deepcopy class QubedSlicer(Engine): @@ -130,69 +131,6 @@ def _slice(self, q: Qube, polytopes, datacube, datacube_transformations, metadat if metadata_idx_stack is None: metadata_idx_stack = [[0]] - if len(q.children) == 0: - # add "fake" axes and their nodes in order -> what about merged axes?? - mapper_transformation = None - for transformation in datacube_transformations: - if isinstance(transformation, DatacubeMapper): - mapper_transformation = transformation - if not mapper_transformation: - # There is no grid mapping - pass - else: - # Slice on the two grid axes - grid_axes = mapper_transformation._mapped_axes() - - # Handle first grid axis - polytopes_on_axis = find_polytopes_on_axis(grid_axes[0], polytopes) - - for poly in polytopes_on_axis: - ax = datacube._axes[grid_axes[0]] - lower, upper, slice_axis_idx = poly.extents(grid_axes[0]) - - found_vals, _ = self.find_values_between(poly, ax, None, datacube, lower, upper) - - if len(found_vals) == 0: - continue - - sliced_polys = self.get_sliced_polys(found_vals, ax, grid_axes[0], poly, slice_axis_idx) - # decide if axis should be compressed or not according to polytope - # NOTE: actually the first grid axis will never be compressed - - # if it's not compressed, need to separate into different nodes to append to the tree - for i, found_val in enumerate(found_vals): - found_val = self.remap_values(ax, found_val) - child_polytopes = [p for p in polytopes if p != poly] - if sliced_polys[i]: - child_polytopes.append(sliced_polys[i]) - - second_axis_vals = mapper_transformation.second_axis_vals([found_val]) - flattened_path = {grid_axes[0]: (found_val,)} - # get second axis children through slicing - children = self._slice_second_grid_axis( - grid_axes[1], - child_polytopes, - datacube, - datacube_transformations, - second_axis_vals, - flattened_path, - ) - # If this node used to have children but now has none due to filtering, skip it. - if not children: - continue - if isinstance(found_val, pd.Timedelta) or isinstance(found_val, pd.Timestamp): - found_val = [str(found_val)] - - # TODO: remap the found_val using self.remap_values like in the hullslicer - - # TODO: when we have an axis that we would like to merge with another, we should skip the node creation here - # and instead keep/cache the value to merge with the node from before?? - - qube_node = Qube.make_node( - key=grid_axes[0], values=QEnum([found_val]), metadata={}, children=children - ) - result.append(qube_node) - for i, child in enumerate(q.children): # find polytopes which are defined on axis child.key polytopes_on_axis = find_polytopes_on_axis(child.key, polytopes) @@ -252,10 +190,79 @@ def find_metadata(metadata_idx): qube_node = Qube.make_node( key=child.key, values=QEnum(new_found_vals), metadata=metadata, children=children ) + if not children: + # We've reached the end of the qube + # qube_node.sliced_polys = sliced_polys + qube_node.sliced_polys = polytopes result.append(qube_node) return result + def slice_grid_axes(self, q: Qube, datacube, datacube_transformations): + # TODO: here, instead of checking if the qube is at the leaves, we instead give it the sub-tree and go to its leaves + # TODO: we then find the remaining sliced_polys to continue slicing and slice along lat + lon like before + # TODO: we then return the completed tree + compressed_leaves = [leaf for leaf in q.compressed_leaf_nodes()] + actual_leaves = deepcopy(compressed_leaves) + for j, leaf in enumerate(actual_leaves): + # for leaf in q.compressed_leaf_nodes(): + result = [] + mapper_transformation = None + for transformation in datacube_transformations: + if isinstance(transformation, DatacubeMapper): + mapper_transformation = transformation + if not mapper_transformation: + # There is no grid mapping + pass + else: + grid_axes = mapper_transformation._mapped_axes() + polytopes = leaf.sliced_polys + + # Handle first grid axis + polytopes_on_axis = find_polytopes_on_axis(grid_axes[0], polytopes) + + for poly in polytopes_on_axis: + ax = datacube._axes[grid_axes[0]] + lower, upper, slice_axis_idx = poly.extents(grid_axes[0]) + + found_vals, _ = self.find_values_between(poly, ax, None, datacube, lower, upper) + + if len(found_vals) == 0: + continue + + sliced_polys = self.get_sliced_polys(found_vals, ax, grid_axes[0], poly, slice_axis_idx) + # decide if axis should be compressed or not according to polytope + # NOTE: actually the first grid axis will never be compressed + + # if it's not compressed, need to separate into different nodes to append to the tree + for i, found_val in enumerate(found_vals): + found_val = self.remap_values(ax, found_val) + child_polytopes = [p for p in polytopes if p != poly] + if sliced_polys[i]: + child_polytopes.append(sliced_polys[i]) + + second_axis_vals = mapper_transformation.second_axis_vals([found_val]) + flattened_path = {grid_axes[0]: (found_val,)} + # get second axis children through slicing + children = self._slice_second_grid_axis( + grid_axes[1], + child_polytopes, + datacube, + datacube_transformations, + second_axis_vals, + flattened_path, + ) + # If this node used to have children but now has none due to filtering, skip it. + if not children: + continue + + qube_node = Qube.make_node( + key=grid_axes[0], values=QEnum([found_val]), metadata={}, children=children + ) + result.append(qube_node) + # leaf.children = result + compressed_leaves[j].children = result + def _slice_second_grid_axis( self, axis_name, polytopes, datacube, datacube_transformations, second_axis_vals, path ) -> list[Qube]: @@ -293,7 +300,15 @@ def _slice_second_grid_axis( def slice_tree(self, datacube, final_polys): q = datacube.q datacube_transformations = datacube.datacube_transformations - return Qube.make_root(self._slice(q, final_polys, datacube, datacube_transformations)) + # create sub-qube without grid first + partial_qube = Qube.make_root(self._slice(q, final_polys, datacube, datacube_transformations)) + # recompress this sub-qube + # partial_qube.compress() + partial_qube = partial_qube.compress_w_leaf_attrs("sliced_polys") + # complete the qube with grid axes and return it + complete_qube = self.slice_grid_axes(partial_qube, datacube, datacube_transformations) + # return complete_qube + return partial_qube def build_tree(self, polytopes_to_slice, datacube): groups, input_axes = group(polytopes_to_slice) diff --git a/tests/test_qubed_extraction.py b/tests/test_qubed_extraction.py deleted file mode 100644 index ef8e19d1..00000000 --- a/tests/test_qubed_extraction.py +++ /dev/null @@ -1,114 +0,0 @@ -# from qubed import Qube -# import requests -# from polytope_feature.datacube.datacube_axis import PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, FloatDatacubeAxis -# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice -# from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import TypeChangeStrToTimestamp, TypeChangeStrToTimedelta -# import pandas as pd -# from polytope_feature.datacube.transformations.datacube_mappers.mapper_types.healpix_nested import NestedHealpixGridMapper - -# from polytope_feature.shapes import ConvexPolytope - - -# fdb_tree = Qube.from_json(requests.get( -# "https://github.com/ecmwf/qubed/raw/refs/heads/main/tests/example_qubes/climate_dt.json").json()) - - -# # fdb_tree = fdb_tree.remove_by_key(["year"]).remove_by_key(["month"]) - -# fdb_tree.print() - -# print(fdb_tree.axes().keys()) - - -# # combi_polytopes = [ -# # ConvexPolytope(["param"], [["168"]]), -# # ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), -# # ConvexPolytope(["resolution"], [["high"]]), -# # ConvexPolytope(["type"], [["fc"]]), -# # ConvexPolytope(["model"], [['ifs-nemo']]), -# # ConvexPolytope(["stream"], [["clte"]]), -# # ConvexPolytope(["realization"], ["1"]), -# # ConvexPolytope(["expver"], [['0001']]), -# # ConvexPolytope(["experiment"], [['ssp3-7.0']]), -# # ConvexPolytope(["generation"], [["1"]]), -# # ConvexPolytope(["levtype"], [["sfc"]]), -# # ConvexPolytope(["activity"], [["scenariomip"]]), -# # ConvexPolytope(["dataset"], [["climate-dt"]]), -# # ConvexPolytope(["class"], [["d1"]]), -# # ConvexPolytope(["date"], [[pd.Timestamp("20210728")], [pd.Timestamp("20210729")]]) -# # ] - -# # TODO: add lat/lon polygon -# combi_polytopes = [ -# ConvexPolytope(["param"], [["164"]]), -# ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), -# ConvexPolytope(["resolution"], [["high"]]), -# ConvexPolytope(["type"], [["fc"]]), -# ConvexPolytope(["model"], [['ifs-nemo']]), -# ConvexPolytope(["stream"], [["clte"]]), -# ConvexPolytope(["realization"], ["1"]), -# ConvexPolytope(["expver"], [['0001']]), -# ConvexPolytope(["experiment"], [['ssp3-7.0']]), -# ConvexPolytope(["generation"], [["1"]]), -# ConvexPolytope(["levtype"], [["sfc"]]), -# ConvexPolytope(["activity"], [["scenariomip"]]), -# ConvexPolytope(["dataset"], [["climate-dt"]]), -# ConvexPolytope(["class"], [["d1"]]), -# ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), -# ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) -# ] - -# # TODO: add lat and lon axes -# datacube_axes = {"param": UnsliceableDatacubeAxis(), -# "time": PandasTimedeltaDatacubeAxis(), -# "resolution": UnsliceableDatacubeAxis(), -# "type": UnsliceableDatacubeAxis(), -# "model": UnsliceableDatacubeAxis(), -# "stream": UnsliceableDatacubeAxis(), -# "realization": UnsliceableDatacubeAxis(), -# "expver": UnsliceableDatacubeAxis(), -# "experiment": UnsliceableDatacubeAxis(), -# "generation": UnsliceableDatacubeAxis(), -# "levtype": UnsliceableDatacubeAxis(), -# "activity": UnsliceableDatacubeAxis(), -# "dataset": UnsliceableDatacubeAxis(), -# "class": UnsliceableDatacubeAxis(), -# "date": PandasTimestampDatacubeAxis(), -# "latitude": FloatDatacubeAxis(), -# "longitude": FloatDatacubeAxis()} - -# time_val = pd.Timedelta(hours=0, minutes=0) -# date_val = pd.Timestamp("20300101T000000") - - -# # TODO: add grid axis transformation -# datacube_transformations = { -# "time": TypeChangeStrToTimedelta("time", time_val), -# "date": TypeChangeStrToTimestamp("date", date_val), -# "values": NestedHealpixGridMapper("values", ["latitude", "longitude"], 1024) -# } - - -# sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) - - -# print("THE FINAL RESULT IS") -# print(sliced_tree) - -# # TODO: treat the transformations to talk to the qubed tree, maybe do it - -# # TODO: start iterating fdb_tree and creating a new request tree - -# # print(fdb_tree.) - - -# # Select("step", [0]), -# # Select("levtype", ["sfc"]), -# # Select("date", [pd.Timestamp("20231102T000000")]), -# # Select("domain", ["g"]), -# # Select("expver", ["0001"]), -# # Select("param", ["167"]), -# # Select("class", ["od"]), -# # Select("stream", ["oper"]), -# # Select("type", ["fc"]), -# # Box(["latitude", "longitude"], [0, 0], [80, 80]), diff --git a/tests/test_qubed_extraction_engine.py b/tests/test_qubed_extraction_engine.py index a1a6c744..bee87a3a 100644 --- a/tests/test_qubed_extraction_engine.py +++ b/tests/test_qubed_extraction_engine.py @@ -2,14 +2,11 @@ import pandas as pd import pygribjump as gj -import pytest import requests from qubed import Qube -from polytope_feature.datacube.backends.fdb import FDBDatacube from polytope_feature.datacube.backends.qubed import QubedDatacube from polytope_feature.datacube.datacube_axis import ( - FloatDatacubeAxis, PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, @@ -18,7 +15,6 @@ NestedHealpixGridMapper, ) -# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import ( TypeChangeStrToTimedelta, TypeChangeStrToTimestamp, @@ -26,7 +22,7 @@ from polytope_feature.engine.hullslicer import HullSlicer from polytope_feature.engine.qubed_slicer import QubedSlicer from polytope_feature.polytope import Polytope, Request -from polytope_feature.shapes import Box, ConvexPolytope, Select, Span +from polytope_feature.shapes import ConvexPolytope, Select def find_relevant_subcube_from_request(request, qube_url): @@ -87,8 +83,6 @@ def find_relevant_subcube_from_request(request, qube_url): "dataset": UnsliceableDatacubeAxis(), "class": UnsliceableDatacubeAxis(), "date": PandasTimestampDatacubeAxis(), - # "latitude": FloatDatacubeAxis(), - # "longitude": FloatDatacubeAxis() } time_val = pd.Timedelta(hours=0, minutes=0) @@ -107,10 +101,6 @@ def find_relevant_subcube_from_request(request, qube_url): "axis_config": [ {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, - # { - # "axis_name": "date", - # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], - # }, {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, { @@ -136,24 +126,6 @@ def find_relevant_subcube_from_request(request, qube_url): "type", ], "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper"}, - # "datacube_axes": {"param": UnsliceableDatacubeAxis(), - # "time": PandasTimedeltaDatacubeAxis(), - # "resolution": UnsliceableDatacubeAxis(), - # "type": UnsliceableDatacubeAxis(), - # "model": UnsliceableDatacubeAxis(), - # "stream": UnsliceableDatacubeAxis(), - # "realization": UnsliceableDatacubeAxis(), - # "expver": UnsliceableDatacubeAxis(), - # "experiment": UnsliceableDatacubeAxis(), - # "generation": UnsliceableDatacubeAxis(), - # "levtype": UnsliceableDatacubeAxis(), - # "activity": UnsliceableDatacubeAxis(), - # "dataset": UnsliceableDatacubeAxis(), - # "class": UnsliceableDatacubeAxis(), - # "date": PandasTimestampDatacubeAxis(), - # # "latitude": FloatDatacubeAxis(), - # # "longitude": FloatDatacubeAxis() - # } "datacube_axes": { "param": "UnsliceableDatacubeAxis", "time": "PandasTimedeltaDatacubeAxis", @@ -173,18 +145,6 @@ def find_relevant_subcube_from_request(request, qube_url): }, } -# request = Request( -# Select("step", [0]), -# Select("levtype", ["sfc"]), -# Select("date", [pd.Timestamp("20230625T120000")]), -# Select("domain", ["g"]), -# Select("expver", ["0001"]), -# Select("param", ["167"]), -# Select("class", ["od"]), -# Select("stream", ["oper"]), -# Select("type", ["an"]), -# Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), -# ) request = Request( ConvexPolytope(["param"], [[164]]), @@ -203,13 +163,11 @@ def find_relevant_subcube_from_request(request, qube_url): ConvexPolytope(["class"], [["d1"]]), ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) - # ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) ) qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) slicer = QubedSlicer() self_API = Polytope( - # datacube=qubeddatacube, datacube=fdb_tree, engine=slicer, options=options, @@ -231,10 +189,6 @@ def find_relevant_subcube_from_request(request, qube_url): "axis_config": [ {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, - # { - # "axis_name": "date", - # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], - # }, {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, { @@ -248,27 +202,12 @@ def find_relevant_subcube_from_request(request, qube_url): ], "compressed_axes_config": [ "longitude", - # "latitude", - # "levtype", - # "step", - # "date", - # "domain", - # "expver", - # "param", - # "class", - # "stream", - # "type", ], "pre_path": {"class": "d1", "model": "ifs-nemo", "resolution": "high"}, } fdbdatacube = gj.GribJump() slicer = HullSlicer() -# self_API = Polytope( -# datacube=fdbdatacube, -# engine=slicer, -# options=options, -# ) request = Request( @@ -297,10 +236,3 @@ def find_relevant_subcube_from_request(request, qube_url): print("TIME EXTRACTING USING GJ NORMAL") print(time4 - time3) - - -# print(result) - -# print(result.leaves) - -# sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) diff --git a/tests/test_qubed_extraction_engine_w_metadata.py b/tests/test_qubed_extraction_engine_w_metadata.py index 7156303d..b4089665 100644 --- a/tests/test_qubed_extraction_engine_w_metadata.py +++ b/tests/test_qubed_extraction_engine_w_metadata.py @@ -2,14 +2,11 @@ import pandas as pd import pygribjump as gj -import pytest import requests from qubed import Qube -from polytope_feature.datacube.backends.fdb import FDBDatacube from polytope_feature.datacube.backends.qubed import QubedDatacube from polytope_feature.datacube.datacube_axis import ( - FloatDatacubeAxis, PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, @@ -18,7 +15,6 @@ NestedHealpixGridMapper, ) -# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import ( TypeChangeStrToTimedelta, TypeChangeStrToTimestamp, @@ -26,7 +22,7 @@ from polytope_feature.engine.hullslicer import HullSlicer from polytope_feature.engine.qubed_slicer import QubedSlicer from polytope_feature.polytope import Polytope, Request -from polytope_feature.shapes import Box, ConvexPolytope, Select, Span +from polytope_feature.shapes import ConvexPolytope, Select def find_relevant_subcube_from_request(request, qube_url): @@ -52,26 +48,6 @@ def find_relevant_subcube_from_request(request, qube_url): ).json() ) - -# combi_polytopes = [ -# ConvexPolytope(["param"], [["164"]]), -# ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), -# ConvexPolytope(["resolution"], [["high"]]), -# ConvexPolytope(["type"], [["fc"]]), -# ConvexPolytope(["model"], [['ifs-nemo']]), -# ConvexPolytope(["stream"], [["clte"]]), -# ConvexPolytope(["realization"], ["1"]), -# ConvexPolytope(["expver"], [['0001']]), -# ConvexPolytope(["experiment"], [['ssp3-7.0']]), -# ConvexPolytope(["generation"], [["1"]]), -# ConvexPolytope(["levtype"], [["sfc"]]), -# ConvexPolytope(["activity"], [["scenariomip"]]), -# ConvexPolytope(["dataset"], [["climate-dt"]]), -# ConvexPolytope(["class"], [["d1"]]), -# ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), -# ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) -# ] - # TODO: add lat and lon axes datacube_axes = { "param": UnsliceableDatacubeAxis(), @@ -89,8 +65,6 @@ def find_relevant_subcube_from_request(request, qube_url): "dataset": UnsliceableDatacubeAxis(), "class": UnsliceableDatacubeAxis(), "date": PandasTimestampDatacubeAxis(), - # "latitude": FloatDatacubeAxis(), - # "longitude": FloatDatacubeAxis() } time_val = pd.Timedelta(hours=0, minutes=0) @@ -109,10 +83,6 @@ def find_relevant_subcube_from_request(request, qube_url): "axis_config": [ {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, - # { - # "axis_name": "date", - # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], - # }, {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, { @@ -152,18 +122,6 @@ def find_relevant_subcube_from_request(request, qube_url): }, } -# request = Request( -# Select("step", [0]), -# Select("levtype", ["sfc"]), -# Select("date", [pd.Timestamp("20230625T120000")]), -# Select("domain", ["g"]), -# Select("expver", ["0001"]), -# Select("param", ["167"]), -# Select("class", ["od"]), -# Select("stream", ["oper"]), -# Select("type", ["an"]), -# Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), -# ) request = Request( ConvexPolytope(["param"], [["167"]]), @@ -177,20 +135,17 @@ def find_relevant_subcube_from_request(request, qube_url): ConvexPolytope(["step"], [[1]]), ConvexPolytope(["date"], [[pd.Timestamp("20240407")]]), ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) - # ConvexPolytope(["latitude", "longitude"], [[0, 0], [-0.5, -0.5], [0, -0.5]]) ) -# print(fdb_tree) + qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) slicer = QubedSlicer() self_API = Polytope( - # datacube=qubeddatacube, datacube=fdb_tree, engine=slicer, options=options, ) time1 = time.time() result = self_API.retrieve(request) -# result = self_API.slice(request.polytopes()) time2 = time.time() print(result) @@ -205,10 +160,6 @@ def find_relevant_subcube_from_request(request, qube_url): "axis_config": [ {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, - # { - # "axis_name": "date", - # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], - # }, {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, { @@ -222,27 +173,12 @@ def find_relevant_subcube_from_request(request, qube_url): ], "compressed_axes_config": [ "longitude", - # "latitude", - # "levtype", - # "step", - # "date", - # "domain", - # "expver", - # "param", - # "class", - # "stream", - # "type", ], "pre_path": {"class": "d1", "model": "ifs-nemo", "resolution": "high"}, } fdbdatacube = gj.GribJump() slicer = HullSlicer() -# self_API = Polytope( -# datacube=fdbdatacube, -# engine=slicer, -# options=options, -# ) request = Request( @@ -265,16 +201,9 @@ def find_relevant_subcube_from_request(request, qube_url): ) time3 = time.time() -# result = self_API.retrieve(request) +result = self_API.retrieve(request) # result = self_API.slice(request.polytopes()) time4 = time.time() print("TIME EXTRACTING USING GJ NORMAL") print(time4 - time3) - - -# print(result) - -# print(result.leaves) - -# sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations) diff --git a/tests/test_qubed_extraction_service.py b/tests/test_qubed_extraction_service.py index c13f71cc..14c448e3 100644 --- a/tests/test_qubed_extraction_service.py +++ b/tests/test_qubed_extraction_service.py @@ -1,15 +1,11 @@ import time import pandas as pd -import pygribjump as gj -import pytest import requests from qubed import Qube -from polytope_feature.datacube.backends.fdb import FDBDatacube from polytope_feature.datacube.backends.qubed import QubedDatacube from polytope_feature.datacube.datacube_axis import ( - FloatDatacubeAxis, PandasTimedeltaDatacubeAxis, PandasTimestampDatacubeAxis, UnsliceableDatacubeAxis, @@ -18,15 +14,14 @@ NestedHealpixGridMapper, ) -# from polytope_feature.datacube.backends.test_qubed_slicing import actual_slice from polytope_feature.datacube.transformations.datacube_type_change.datacube_type_change import ( TypeChangeStrToTimedelta, TypeChangeStrToTimestamp, ) -from polytope_feature.engine.hullslicer import HullSlicer + from polytope_feature.engine.qubed_slicer import QubedSlicer from polytope_feature.polytope import Polytope, Request -from polytope_feature.shapes import Box, ConvexPolytope, Select, Span +from polytope_feature.shapes import ConvexPolytope, Select def find_relevant_subcube_from_request(request, qube_url): @@ -58,27 +53,6 @@ def get_fdb_tree(request): ) -# print(fdb_tree) - -# combi_polytopes = [ -# ConvexPolytope(["param"], [["164"]]), -# ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=12, minutes=0)]]), -# ConvexPolytope(["resolution"], [["high"]]), -# ConvexPolytope(["type"], [["fc"]]), -# ConvexPolytope(["model"], [['ifs-nemo']]), -# ConvexPolytope(["stream"], [["clte"]]), -# ConvexPolytope(["realization"], ["1"]), -# ConvexPolytope(["expver"], [['0001']]), -# ConvexPolytope(["experiment"], [['ssp3-7.0']]), -# ConvexPolytope(["generation"], [["1"]]), -# ConvexPolytope(["levtype"], [["sfc"]]), -# ConvexPolytope(["activity"], [["scenariomip"]]), -# ConvexPolytope(["dataset"], [["climate-dt"]]), -# ConvexPolytope(["class"], [["d1"]]), -# ConvexPolytope(["date"], [[pd.Timestamp("20220811")], [pd.Timestamp("20220912")]]), -# ConvexPolytope(["latitude", "longitude"], [[0, 0], [0.5, 0.5], [0, 0.5]]) -# ] - # TODO: add lat and lon axes datacube_axes = { "param": UnsliceableDatacubeAxis(), @@ -96,8 +70,6 @@ def get_fdb_tree(request): "dataset": UnsliceableDatacubeAxis(), "class": UnsliceableDatacubeAxis(), "date": PandasTimestampDatacubeAxis(), - # "latitude": FloatDatacubeAxis(), - # "longitude": FloatDatacubeAxis() } time_val = pd.Timedelta(hours=0, minutes=0) @@ -119,10 +91,6 @@ def get_fdb_tree(request): {"axis_name": "realization", "transformations": [{"name": "type_change", "type": "int"}]}, {"axis_name": "generation", "transformations": [{"name": "type_change", "type": "int"}]}, {"axis_name": "number", "transformations": [{"name": "type_change", "type": "int"}]}, - # { - # "axis_name": "date", - # "transformations": [{"name": "merge", "other_axis": "time", "linkers": ["T", "00"]}], - # }, {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, { @@ -155,12 +123,9 @@ def get_fdb_tree(request): "type": "UnsliceableDatacubeAxis", "model": "UnsliceableDatacubeAxis", "stream": "UnsliceableDatacubeAxis", - # "realization": "UnsliceableDatacubeAxis", "realization": "IntDatacubeAxis", - # "expver": "UnsliceableDatacubeAxis", "expver": "IntDatacubeAxis", "experiment": "UnsliceableDatacubeAxis", - # "generation": "UnsliceableDatacubeAxis", "generation": "IntDatacubeAxis", "levtype": "UnsliceableDatacubeAxis", "activity": "UnsliceableDatacubeAxis", @@ -170,58 +135,33 @@ def get_fdb_tree(request): }, } -# request = Request( -# Select("step", [0]), -# Select("levtype", ["sfc"]), -# Select("date", [pd.Timestamp("20230625T120000")]), -# Select("domain", ["g"]), -# Select("expver", ["0001"]), -# Select("param", ["167"]), -# Select("class", ["od"]), -# Select("stream", ["oper"]), -# Select("type", ["an"]), -# Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), -# ) request = Request( - # ConvexPolytope(["param"], [["164"]]), - # Select("param", ["164"]), Select("param", ["165"]), ConvexPolytope(["time"], [[pd.Timedelta(hours=0, minutes=0)], [pd.Timedelta(hours=3, minutes=0)]]), ConvexPolytope(["resolution"], [["high"]]), ConvexPolytope(["type"], [["fc"]]), - # ConvexPolytope(["model"], [['ifs-nemo']]), - # Select("model", ["ifs-nemo"]), Select("model", ["icon"]), Select("stream", ["clte"]), - # ConvexPolytope(["stream"], [["clte"]]), ConvexPolytope(["realization"], ["1"]), ConvexPolytope(["expver"], [["0001"]]), ConvexPolytope(["experiment"], [["ssp3-7.0"]]), - # ConvexPolytope(["generation"], [["1"]]), Select("generation", [1]), ConvexPolytope(["levtype"], [["sfc"]]), - # ConvexPolytope(["activity"], [["scenariomip"]]), Select("activity", ["scenariomip"]), ConvexPolytope(["dataset"], [["climate-dt"]]), ConvexPolytope(["class"], [["d1"]]), - # ConvexPolytope(["date"], [[pd.Timestamp("20220811")]]), ConvexPolytope(["date"], [[pd.Timestamp("20200908")]]), ConvexPolytope(["latitude", "longitude"], [[0, 0], [5, 5], [0, 5]]), ) -# fdb_tree = get_fdb_tree(request) path_to_qube = "../qubed/" full_qube_path = path_to_qube + "tests/example_qubes/climate_dt_with_metadata.json" fdb_tree = Qube.load(full_qube_path) -print("HERE WE HAVE THE FDB TREE") -print(fdb_tree) - qubeddatacube = QubedDatacube(fdb_tree, datacube_axes, datacube_transformations) slicer = QubedSlicer() self_API = Polytope( - # datacube=qubeddatacube, datacube=fdb_tree, engine=slicer, options=options, @@ -230,7 +170,6 @@ def get_fdb_tree(request): result = self_API.retrieve(request) time2 = time.time() -# print(result) print("TIME EXTRACTING USING QUBED") print(time2 - time1) @@ -305,10 +244,3 @@ def get_fdb_tree(request): # print("TIME EXTRACTING USING GJ NORMAL") # print(time4 - time3) - - -# # print(result) - -# # print(result.leaves) - -# # sliced_tree = actual_slice(fdb_tree, combi_polytopes, datacube_axes, datacube_transformations)