From 5040e15cfef4ef16d8c49e196617b3ae4143739c Mon Sep 17 00:00:00 2001 From: Gianluca Ficarelli Date: Thu, 13 Nov 2025 15:42:58 +0100 Subject: [PATCH 1/4] Improve performance of the query used for pagination --- app/queries/common.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/app/queries/common.py b/app/queries/common.py index df59a28a..a9b53de8 100644 --- a/app/queries/common.py +++ b/app/queries/common.py @@ -319,6 +319,17 @@ def router_read_many[T: BaseModel, I: Identifiable]( # noqa: PLR0913 ) if apply_data_query_operations: + # Select the minimum set of columns needed in the subquery, always including the id + sort_columns = [getattr(c, "element", c) for c in data_query._order_by_clauses] # noqa: SLF001 + subq = data_query.with_only_columns(*sort_columns).subquery() + # Build the new data_query using the subquery because it's more performant, + # especially when using pagination and selecting a big offset. + # The rows selected in the outer query must be sorted again for deterministic results. + data_query = ( + sa.select(db_model_class) + .join(subq, subq.c.id == db_model_class.id) + .order_by(*data_query._order_by_clauses) # noqa: SLF001 + ) data_query = apply_data_query_operations(data_query) # unique is needed b/c it contains results that include joined eager loads against collections From b680c2970c9744a76c39f38985ebcb20349a85ba Mon Sep 17 00:00:00 2001 From: Gianluca Ficarelli Date: Fri, 14 Nov 2025 19:20:35 +0100 Subject: [PATCH 2/4] Sort the selected page with the same conditions --- app/db/model.py | 3 --- app/queries/common.py | 52 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/app/db/model.py b/app/db/model.py index 51eec901..5efbede5 100644 --- a/app/db/model.py +++ b/app/db/model.py @@ -483,7 +483,6 @@ def mtypes(cls) -> Mapped[list["MTypeClass"]]: primaryjoin=f"{cls.__name__}.id == MTypeClassification.entity_id", secondary="mtype_classification", uselist=True, - order_by="MTypeClass.pref_label", passive_deletes=True, ) @@ -500,7 +499,6 @@ def etypes(cls) -> Mapped[list["ETypeClass"]]: primaryjoin=f"{cls.__name__}.id == ETypeClassification.entity_id", secondary="etype_classification", uselist=True, - order_by="ETypeClass.pref_label", passive_deletes=True, ) @@ -702,7 +700,6 @@ class EModel( secondary="ion_channel_model__emodel", uselist=True, viewonly=True, - order_by="IonChannelModel.creation_date", ) __mapper_args__ = {"polymorphic_identity": __tablename__} # noqa: RUF012 diff --git a/app/queries/common.py b/app/queries/common.py index a9b53de8..be684843 100644 --- a/app/queries/common.py +++ b/app/queries/common.py @@ -4,6 +4,7 @@ from fastapi import HTTPException from pydantic import BaseModel from sqlalchemy.orm import Session +from sqlalchemy.sql import operators from app.db.auth import ( constrain_to_accessible_entities, @@ -235,6 +236,45 @@ def get_or_create_user_agent(db: Session, user_profile: UserProfile) -> Agent: return db_agent +def _with_subquery[I: Identifiable]( + data_query: sa.Select[tuple[I]], + db_model_class: type[I], +) -> sa.Select[tuple[I]]: + """Build and return a new data_query using a subquery. + + This is more performant when: + + - using pagination and requesting a large offset, and + - needing many columns for building the results, but not all of them are needed for filtering. + """ + order_by_clauses = data_query._order_by_clauses # noqa: SLF001 + # Get the plain columns needed in the subquery, without DESC/ASC if UnaryExpression + sort_columns = [getattr(obc, "element", obc) for obc in order_by_clauses] + subq = data_query.with_only_columns(*sort_columns).subquery() + # Dict of modifiers as found in UnaryExpression. + modifiers = { + operators.desc_op: lambda x: x.desc(), + operators.asc_op: lambda x: x.asc(), + } + # Ensure that the rows selected in the outer query are sorted again for deterministic results. + outer_order_by = [] + for obc in order_by_clauses: + col = getattr(obc, "element", obc) + if not col.key: + msg = f"Can't determine column key for order-by expression: {col!r}" + raise RuntimeError(msg) + sub_col = subq.c[col.key] + if modifier := getattr(obc, "modifier", None): + sub_col = modifiers[modifier](sub_col) + outer_order_by.append(sub_col) + # Build the resulting query + return ( + sa.select(db_model_class) + .join(subq, subq.c.id == db_model_class.id) + .order_by(*outer_order_by) + ) + + def router_read_many[T: BaseModel, I: Identifiable]( # noqa: PLR0913 *, db: Session, @@ -319,17 +359,7 @@ def router_read_many[T: BaseModel, I: Identifiable]( # noqa: PLR0913 ) if apply_data_query_operations: - # Select the minimum set of columns needed in the subquery, always including the id - sort_columns = [getattr(c, "element", c) for c in data_query._order_by_clauses] # noqa: SLF001 - subq = data_query.with_only_columns(*sort_columns).subquery() - # Build the new data_query using the subquery because it's more performant, - # especially when using pagination and selecting a big offset. - # The rows selected in the outer query must be sorted again for deterministic results. - data_query = ( - sa.select(db_model_class) - .join(subq, subq.c.id == db_model_class.id) - .order_by(*data_query._order_by_clauses) # noqa: SLF001 - ) + data_query = _with_subquery(data_query=data_query, db_model_class=db_model_class) data_query = apply_data_query_operations(data_query) # unique is needed b/c it contains results that include joined eager loads against collections From 7a167f15fbafbba11d25cceef6eb263302604c59 Mon Sep 17 00:00:00 2001 From: Gianluca Ficarelli Date: Mon, 17 Nov 2025 09:05:59 +0100 Subject: [PATCH 3/4] Use labels --- app/queries/common.py | 42 ++++++++++++------- app/service/measurement_annotation.py | 2 +- .../scientific_artifact_external_url_link.py | 26 +----------- .../scientific_artifact_publication_link.py | 24 +---------- 4 files changed, 31 insertions(+), 63 deletions(-) diff --git a/app/queries/common.py b/app/queries/common.py index be684843..be2bae53 100644 --- a/app/queries/common.py +++ b/app/queries/common.py @@ -248,30 +248,40 @@ def _with_subquery[I: Identifiable]( - needing many columns for building the results, but not all of them are needed for filtering. """ order_by_clauses = data_query._order_by_clauses # noqa: SLF001 - # Get the plain columns needed in the subquery, without DESC/ASC if UnaryExpression - sort_columns = [getattr(obc, "element", obc) for obc in order_by_clauses] - subq = data_query.with_only_columns(*sort_columns).subquery() - # Dict of modifiers as found in UnaryExpression. + # dict of modifiers as found in UnaryExpression. modifiers = { operators.desc_op: lambda x: x.desc(), operators.asc_op: lambda x: x.asc(), } - # Ensure that the rows selected in the outer query are sorted again for deterministic results. - outer_order_by = [] - for obc in order_by_clauses: - col = getattr(obc, "element", obc) - if not col.key: - msg = f"Can't determine column key for order-by expression: {col!r}" - raise RuntimeError(msg) - sub_col = subq.c[col.key] - if modifier := getattr(obc, "modifier", None): + + # list of (label_name, element, modifier) + labeled_sort_columns = [] + for i, ob in enumerate(order_by_clauses): + # element is the plain column needed in the subquery, without ASC/DESC if UnaryExpression + element = getattr(ob, "element", ob) + # modifier contains the ASC/DESC ordering + modifier = getattr(ob, "modifier", None) + # label_name is needed for ordering the outer query even in case of BinaryExpression + label_name = f"_s{i}" + labeled_sort_columns.append((label_name, element, modifier)) + + select_cols = [db_model_class.id] + [ + element.label(label_name) for (label_name, element, _) in labeled_sort_columns + ] + subq = data_query.with_only_columns(*select_cols).subquery() + + outer_order_bys = [] + for label_name, _, modifier in labeled_sort_columns: + sub_col = subq.c[label_name] + if modifier: sub_col = modifiers[modifier](sub_col) - outer_order_by.append(sub_col) - # Build the resulting query + outer_order_bys.append(sub_col) + + # build the final query return ( sa.select(db_model_class) .join(subq, subq.c.id == db_model_class.id) - .order_by(*outer_order_by) + .order_by(*outer_order_bys) ) diff --git a/app/service/measurement_annotation.py b/app/service/measurement_annotation.py index 313c2067..bb543ca3 100644 --- a/app/service/measurement_annotation.py +++ b/app/service/measurement_annotation.py @@ -90,7 +90,7 @@ def read_many( facets=None, aliases=None, apply_filter_query_operations=apply_filter_query_operations, - apply_data_query_operations=_load_from_db, + apply_data_query_operations=_load_from_db_with_entity, pagination_request=pagination_request, response_schema_class=MeasurementAnnotationRead, name_to_facet_query_params=name_to_facet_query_params, diff --git a/app/service/scientific_artifact_external_url_link.py b/app/service/scientific_artifact_external_url_link.py index c0d1b527..43803880 100644 --- a/app/service/scientific_artifact_external_url_link.py +++ b/app/service/scientific_artifact_external_url_link.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING import sqlalchemy as sa -from sqlalchemy.orm import aliased, contains_eager, joinedload, raiseload +from sqlalchemy.orm import aliased, joinedload, raiseload from app.db.auth import ( constrain_to_accessible_entities, @@ -48,26 +48,6 @@ def _load(query: sa.Select) -> sa.Select: ) -def _load_with_eager(query: sa.Select, aliases): - return query.options( - contains_eager( - ScientificArtifactExternalUrlLink.scientific_artifact.of_type( - aliases[ScientificArtifact] - ) - ), - contains_eager( - ScientificArtifactExternalUrlLink.external_url.of_type(aliases[ExternalUrl]) - ), - contains_eager( - ScientificArtifactExternalUrlLink.created_by.of_type(aliases[Agent]["created_by"]) - ), - contains_eager( - ScientificArtifactExternalUrlLink.updated_by.of_type(aliases[Agent]["updated_by"]) - ), - raiseload("*"), - ) - - def read_one( user_context: UserContextDep, db: SessionDep, @@ -177,8 +157,6 @@ def read_many( db_model_class=scientific_artifact_alias, ) - load_with_aliases = lambda q: _load_with_eager(q, aliases) - return router_read_many( db=db, filter_model=filter_model, @@ -188,7 +166,7 @@ def read_many( facets=facets, name_to_facet_query_params=name_to_facet_query_params, apply_filter_query_operations=filter_query, - apply_data_query_operations=load_with_aliases, + apply_data_query_operations=_load, aliases=aliases, pagination_request=pagination_request, response_schema_class=ScientificArtifactExternalUrlLinkRead, diff --git a/app/service/scientific_artifact_publication_link.py b/app/service/scientific_artifact_publication_link.py index 23479dcd..3a113b6a 100644 --- a/app/service/scientific_artifact_publication_link.py +++ b/app/service/scientific_artifact_publication_link.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING import sqlalchemy as sa -from sqlalchemy.orm import aliased, contains_eager, joinedload, raiseload +from sqlalchemy.orm import aliased, joinedload, raiseload from app.db.auth import ( constrain_to_accessible_entities, @@ -48,24 +48,6 @@ def _load(query: sa.Select): ) -def _load_with_eager(query: sa.Select, aliases): - return query.options( - contains_eager( - ScientificArtifactPublicationLink.scientific_artifact.of_type( - aliases[ScientificArtifact] - ) - ), - contains_eager(ScientificArtifactPublicationLink.publication.of_type(aliases[Publication])), - contains_eager( - ScientificArtifactPublicationLink.created_by.of_type(aliases[Agent]["created_by"]) - ), - contains_eager( - ScientificArtifactPublicationLink.updated_by.of_type(aliases[Agent]["updated_by"]) - ), - raiseload("*"), - ) - - def read_one( user_context: UserContextDep, db: SessionDep, @@ -175,8 +157,6 @@ def read_many( db_model_class=scientific_artifact_alias, ) - load_with_aliases = lambda q: _load_with_eager(q, aliases) - return router_read_many( db=db, filter_model=filter_model, @@ -186,7 +166,7 @@ def read_many( facets=facets, name_to_facet_query_params=name_to_facet_query_params, apply_filter_query_operations=filter_query, - apply_data_query_operations=load_with_aliases, + apply_data_query_operations=_load, aliases=aliases, pagination_request=pagination_request, response_schema_class=ScientificArtifactPublicationLinkRead, From 9c30c9f55fd32eb20c25961657d3d0ece18c6d42 Mon Sep 17 00:00:00 2001 From: Gianluca Ficarelli Date: Mon, 17 Nov 2025 09:43:52 +0100 Subject: [PATCH 4/4] Restore order_by in relationships --- app/db/model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/db/model.py b/app/db/model.py index 5efbede5..51eec901 100644 --- a/app/db/model.py +++ b/app/db/model.py @@ -483,6 +483,7 @@ def mtypes(cls) -> Mapped[list["MTypeClass"]]: primaryjoin=f"{cls.__name__}.id == MTypeClassification.entity_id", secondary="mtype_classification", uselist=True, + order_by="MTypeClass.pref_label", passive_deletes=True, ) @@ -499,6 +500,7 @@ def etypes(cls) -> Mapped[list["ETypeClass"]]: primaryjoin=f"{cls.__name__}.id == ETypeClassification.entity_id", secondary="etype_classification", uselist=True, + order_by="ETypeClass.pref_label", passive_deletes=True, ) @@ -700,6 +702,7 @@ class EModel( secondary="ion_channel_model__emodel", uselist=True, viewonly=True, + order_by="IonChannelModel.creation_date", ) __mapper_args__ = {"polymorphic_identity": __tablename__} # noqa: RUF012