From 641f5bb3eb06c53596fa78e3a6549f1dd055c448 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 16 Feb 2023 23:47:55 +0100 Subject: [PATCH 1/2] Refactor: remove `Version.documentation_type` We are planning to suppose _any_ doctool and we want to get rid of the `documentation_type` attribute and make our logic more generic to work on most scenarios without specific logic per doctool. --- readthedocs/api/v2/views/footer_views.py | 1 + readthedocs/projects/models.py | 1 + readthedocs/search/documents.py | 1 + 3 files changed, 3 insertions(+) diff --git a/readthedocs/api/v2/views/footer_views.py b/readthedocs/api/v2/views/footer_views.py index e5f78ffdf0b..a513a05e287 100644 --- a/readthedocs/api/v2/views/footer_views.py +++ b/readthedocs/api/v2/views/footer_views.py @@ -155,6 +155,7 @@ def _get_context(self): page_slug = self.request.GET.get('page', '') path = '' + # TODO: review custom logic based con `documentation_type` if page_slug and page_slug != 'index': if version.documentation_type in {SPHINX_HTMLDIR, MKDOCS}: path = re.sub('/index$', '', page_slug) + '/' diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index b7d80730c07..9f0ebb11299 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1429,6 +1429,7 @@ class Meta: objects = HTMLFileManager() def get_processed_json(self): + # TODO: review custom logic based con `documentation_type` if ( self.version.documentation_type == constants.GENERIC or self.project.has_feature(Feature.INDEX_FROM_HTML_FILES) diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 06b7b9b9620..a90ad7d309a 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -74,6 +74,7 @@ class PageDocument(RTDDocTypeMixin, Document): # Metadata project = fields.KeywordField(attr='project.slug') version = fields.KeywordField(attr='version.slug') + # TODO: review custom logic based con `documentation_type` doctype = fields.KeywordField(attr='version.documentation_type') path = fields.KeywordField(attr='processed_json.path') full_path = fields.KeywordField(attr='path') From 254974dd12c788ec69f83627c70c445a2025159e Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 1 Mar 2023 16:16:28 +0100 Subject: [PATCH 2/2] Refactor: add more comments where `doctype` is used --- readthedocs/builds/models.py | 2 ++ readthedocs/doc_builder/director.py | 8 ------- readthedocs/embed/views.py | 5 +++++ readthedocs/projects/models.py | 28 ++---------------------- readthedocs/projects/tasks/search.py | 4 ++++ readthedocs/search/api/v2/serializers.py | 1 + readthedocs/search/documents.py | 4 ++-- readthedocs/telemetry/collectors.py | 13 +++++------ 8 files changed, 22 insertions(+), 43 deletions(-) diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py index 230c77a11ee..582be0f8c39 100644 --- a/readthedocs/builds/models.py +++ b/readthedocs/builds/models.py @@ -384,6 +384,8 @@ def is_editable(self): @property def is_sphinx_type(self): + # TODO: this method should no be required anymore. + # I'm not removing it yet because there may be places still using it. return self.documentation_type in {SPHINX, SPHINX_HTMLDIR, SPHINX_SINGLEHTML} @property diff --git a/readthedocs/doc_builder/director.py b/readthedocs/doc_builder/director.py index 44f7afb37b5..eed2ba6c8a1 100644 --- a/readthedocs/doc_builder/director.py +++ b/readthedocs/doc_builder/director.py @@ -365,10 +365,6 @@ def run_build_commands(self): if not os.path.exists(html_output_path): raise BuildUserError(BuildUserError.BUILD_COMMANDS_WITHOUT_OUTPUT) - # Update the `Version.documentation_type` to match the doctype defined - # by the config file. When using `build.commands` it will be `GENERIC` - self.data.version.documentation_type = self.data.config.doctype - def install_build_tools(self): """ Install all ``build.tools`` defined by the user in the config file. @@ -597,7 +593,3 @@ def get_build_env_vars(self): ) return env - - def is_type_sphinx(self): - """Is documentation type Sphinx.""" - return "sphinx" in self.data.config.doctype diff --git a/readthedocs/embed/views.py b/readthedocs/embed/views.py index 1437a3a224a..884d976ebff 100644 --- a/readthedocs/embed/views.py +++ b/readthedocs/embed/views.py @@ -149,6 +149,11 @@ def do_embed(*, project, version, doc=None, path=None, section=None, url=None): content = None headers = None + # TODO: EmbedAPI v1 is already deprecated and shouldn't be used. + # So, we may be able to remove the endpoint completely, together with the usage of + # `is_sphinx_type` as well :) + # It's disabled on commercial already. We should check the stats on Cloudflare/NewRelic + # looking for EmbedAPI v1 and decide what to do depending on usage. if version.is_sphinx_type: file_content = _get_doc_content( project=project, diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 9f0ebb11299..d1656e6ff3c 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -48,7 +48,7 @@ validate_repository_url, ) from readthedocs.projects.version_handling import determine_stable_version -from readthedocs.search.parsers import GenericParser, MkDocsParser, SphinxParser +from readthedocs.search.parsers import GenericParser from readthedocs.storage import build_media_storage from readthedocs.vcs_support.backends import backend_cls @@ -1429,25 +1429,7 @@ class Meta: objects = HTMLFileManager() def get_processed_json(self): - # TODO: review custom logic based con `documentation_type` - if ( - self.version.documentation_type == constants.GENERIC - or self.project.has_feature(Feature.INDEX_FROM_HTML_FILES) - ): - parser_class = GenericParser - elif self.version.is_sphinx_type: - parser_class = SphinxParser - elif self.version.is_mkdocs_type: - parser_class = MkDocsParser - else: - log.warning( - "Invalid documentation type", - documentation_type=self.version.documentation_type, - version_slug=self.version.slug, - project_slug=self.project.slug, - ) - return {} - parser = parser_class(self.version) + parser = GenericParser(self.version) return parser.parse(self.path) @cached_property @@ -1844,7 +1826,6 @@ def add_features(sender, **kwargs): DISABLE_SERVER_SIDE_SEARCH = 'disable_server_side_search' ENABLE_MKDOCS_SERVER_SIDE_SEARCH = 'enable_mkdocs_server_side_search' DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search' - INDEX_FROM_HTML_FILES = 'index_from_html_files' # Build related features LIST_PACKAGES_INSTALLED_ENV = "list_packages_installed_env" @@ -1986,11 +1967,6 @@ def add_features(sender, **kwargs): DEFAULT_TO_FUZZY_SEARCH, _('Default to fuzzy search for simple search queries'), ), - ( - INDEX_FROM_HTML_FILES, - _('Index content directly from html files instead or relying in other sources'), - ), - ( LIST_PACKAGES_INSTALLED_ENV, _( diff --git a/readthedocs/projects/tasks/search.py b/readthedocs/projects/tasks/search.py index dc5dec23890..7cd8bd980bc 100644 --- a/readthedocs/projects/tasks/search.py +++ b/readthedocs/projects/tasks/search.py @@ -127,6 +127,10 @@ def _create_intersphinx_data(version, commit, build): :param commit: Commit that updated path :param build: Build id """ + + # TODO: I think this method can be completely removed, + # since we said that we don't want to store SphinDomain objects anymore. + # I didn't find the issue, but I think we have one. if not version.is_sphinx_type: return diff --git a/readthedocs/search/api/v2/serializers.py b/readthedocs/search/api/v2/serializers.py index 98d8adc11aa..40f33af0d5c 100644 --- a/readthedocs/search/api/v2/serializers.py +++ b/readthedocs/search/api/v2/serializers.py @@ -146,6 +146,7 @@ def _get_full_path(self, obj): docs_url = project_data.version.docs_url path = obj.full_path + # TODO: review this logic that uses doctype # Generate an appropriate link for the doctypes that use htmldir, # and always end it with / so it goes directly to proxito. # For a generic doctype we just strip the index.html part if it exists. diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index a90ad7d309a..3b624320cf9 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -74,8 +74,6 @@ class PageDocument(RTDDocTypeMixin, Document): # Metadata project = fields.KeywordField(attr='project.slug') version = fields.KeywordField(attr='version.slug') - # TODO: review custom logic based con `documentation_type` - doctype = fields.KeywordField(attr='version.documentation_type') path = fields.KeywordField(attr='processed_json.path') full_path = fields.KeywordField(attr='path') rank = fields.IntegerField() @@ -128,6 +126,8 @@ def prepare_rank(self, html_file): def prepare_domains(self, html_file): """Prepares and returns the values for domains field.""" + # TODO: remove this method completely. It's not used. + # I grepped all the code and I didn't find where it's used. # XXX: Don't access the sphinx domains table while we migrate the ID type # https://github.com/readthedocs/readthedocs.org/pull/9482. diff --git a/readthedocs/telemetry/collectors.py b/readthedocs/telemetry/collectors.py index 05cba855bf1..77620325fa5 100644 --- a/readthedocs/telemetry/collectors.py +++ b/readthedocs/telemetry/collectors.py @@ -95,13 +95,9 @@ def collect(self): return data def _get_doctool_name(self): - if self.version.is_sphinx_type: - return "sphinx" - - if self.version.is_mkdocs_type: - return "mkdocs" - - return "generic" + # TODO: this data should be communicated from the builder itself homehow. + # This is a place where the build contract could be useful. + return "unknown" def _get_doctool(self): data = { @@ -110,6 +106,9 @@ def _get_doctool(self): "html_theme": "", } + # TODO: adapt this logic to work with a generic builder. This requires + # a build contract that explains how to create the JSON file we are + # expecting at this point if self._get_doctool_name() != "sphinx": return data