From 641f5bb3eb06c53596fa78e3a6549f1dd055c448 Mon Sep 17 00:00:00 2001
From: Manuel Kaufmann <humitos@gmail.com>
Date: Thu, 16 Feb 2023 23:47:55 +0100
Subject: [PATCH 1/2] Refactor: remove `Version.documentation_type`

We are planning to suppose _any_ doctool and we want to get rid of the
`documentation_type` attribute and make our logic more generic to work on most
scenarios without specific logic per doctool.
---
 readthedocs/api/v2/views/footer_views.py | 1 +
 readthedocs/projects/models.py           | 1 +
 readthedocs/search/documents.py          | 1 +
 3 files changed, 3 insertions(+)

diff --git a/readthedocs/api/v2/views/footer_views.py b/readthedocs/api/v2/views/footer_views.py
index e5f78ffdf0b..a513a05e287 100644
--- a/readthedocs/api/v2/views/footer_views.py
+++ b/readthedocs/api/v2/views/footer_views.py
@@ -155,6 +155,7 @@ def _get_context(self):
 
         page_slug = self.request.GET.get('page', '')
         path = ''
+        # TODO: review custom logic based con `documentation_type`
         if page_slug and page_slug != 'index':
             if version.documentation_type in {SPHINX_HTMLDIR, MKDOCS}:
                 path = re.sub('/index$', '', page_slug) + '/'
diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py
index b7d80730c07..9f0ebb11299 100644
--- a/readthedocs/projects/models.py
+++ b/readthedocs/projects/models.py
@@ -1429,6 +1429,7 @@ class Meta:
     objects = HTMLFileManager()
 
     def get_processed_json(self):
+        # TODO: review custom logic based con `documentation_type`
         if (
             self.version.documentation_type == constants.GENERIC
             or self.project.has_feature(Feature.INDEX_FROM_HTML_FILES)
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
index 06b7b9b9620..a90ad7d309a 100644
--- a/readthedocs/search/documents.py
+++ b/readthedocs/search/documents.py
@@ -74,6 +74,7 @@ class PageDocument(RTDDocTypeMixin, Document):
     # Metadata
     project = fields.KeywordField(attr='project.slug')
     version = fields.KeywordField(attr='version.slug')
+    # TODO: review custom logic based con `documentation_type`
     doctype = fields.KeywordField(attr='version.documentation_type')
     path = fields.KeywordField(attr='processed_json.path')
     full_path = fields.KeywordField(attr='path')

From 254974dd12c788ec69f83627c70c445a2025159e Mon Sep 17 00:00:00 2001
From: Manuel Kaufmann <humitos@gmail.com>
Date: Wed, 1 Mar 2023 16:16:28 +0100
Subject: [PATCH 2/2] Refactor: add more comments where `doctype` is used

---
 readthedocs/builds/models.py             |  2 ++
 readthedocs/doc_builder/director.py      |  8 -------
 readthedocs/embed/views.py               |  5 +++++
 readthedocs/projects/models.py           | 28 ++----------------------
 readthedocs/projects/tasks/search.py     |  4 ++++
 readthedocs/search/api/v2/serializers.py |  1 +
 readthedocs/search/documents.py          |  4 ++--
 readthedocs/telemetry/collectors.py      | 13 +++++------
 8 files changed, 22 insertions(+), 43 deletions(-)

diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py
index 230c77a11ee..582be0f8c39 100644
--- a/readthedocs/builds/models.py
+++ b/readthedocs/builds/models.py
@@ -384,6 +384,8 @@ def is_editable(self):
 
     @property
     def is_sphinx_type(self):
+        # TODO: this method should no be required anymore.
+        # I'm not removing it yet because there may be places still using it.
         return self.documentation_type in {SPHINX, SPHINX_HTMLDIR, SPHINX_SINGLEHTML}
 
     @property
diff --git a/readthedocs/doc_builder/director.py b/readthedocs/doc_builder/director.py
index 44f7afb37b5..eed2ba6c8a1 100644
--- a/readthedocs/doc_builder/director.py
+++ b/readthedocs/doc_builder/director.py
@@ -365,10 +365,6 @@ def run_build_commands(self):
         if not os.path.exists(html_output_path):
             raise BuildUserError(BuildUserError.BUILD_COMMANDS_WITHOUT_OUTPUT)
 
-        # Update the `Version.documentation_type` to match the doctype defined
-        # by the config file. When using `build.commands` it will be `GENERIC`
-        self.data.version.documentation_type = self.data.config.doctype
-
     def install_build_tools(self):
         """
         Install all ``build.tools`` defined by the user in the config file.
@@ -597,7 +593,3 @@ def get_build_env_vars(self):
         )
 
         return env
-
-    def is_type_sphinx(self):
-        """Is documentation type Sphinx."""
-        return "sphinx" in self.data.config.doctype
diff --git a/readthedocs/embed/views.py b/readthedocs/embed/views.py
index 1437a3a224a..884d976ebff 100644
--- a/readthedocs/embed/views.py
+++ b/readthedocs/embed/views.py
@@ -149,6 +149,11 @@ def do_embed(*, project, version, doc=None, path=None, section=None, url=None):
 
     content = None
     headers = None
+    # TODO: EmbedAPI v1 is already deprecated and shouldn't be used.
+    # So, we may be able to remove the endpoint completely, together with the usage of
+    # `is_sphinx_type` as well :)
+    # It's disabled on commercial already. We should check the stats on Cloudflare/NewRelic
+    # looking for EmbedAPI v1 and decide what to do depending on usage.
     if version.is_sphinx_type:
         file_content = _get_doc_content(
             project=project,
diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py
index 9f0ebb11299..d1656e6ff3c 100644
--- a/readthedocs/projects/models.py
+++ b/readthedocs/projects/models.py
@@ -48,7 +48,7 @@
     validate_repository_url,
 )
 from readthedocs.projects.version_handling import determine_stable_version
-from readthedocs.search.parsers import GenericParser, MkDocsParser, SphinxParser
+from readthedocs.search.parsers import GenericParser
 from readthedocs.storage import build_media_storage
 from readthedocs.vcs_support.backends import backend_cls
 
@@ -1429,25 +1429,7 @@ class Meta:
     objects = HTMLFileManager()
 
     def get_processed_json(self):
-        # TODO: review custom logic based con `documentation_type`
-        if (
-            self.version.documentation_type == constants.GENERIC
-            or self.project.has_feature(Feature.INDEX_FROM_HTML_FILES)
-        ):
-            parser_class = GenericParser
-        elif self.version.is_sphinx_type:
-            parser_class = SphinxParser
-        elif self.version.is_mkdocs_type:
-            parser_class = MkDocsParser
-        else:
-            log.warning(
-                "Invalid documentation type",
-                documentation_type=self.version.documentation_type,
-                version_slug=self.version.slug,
-                project_slug=self.project.slug,
-            )
-            return {}
-        parser = parser_class(self.version)
+        parser = GenericParser(self.version)
         return parser.parse(self.path)
 
     @cached_property
@@ -1844,7 +1826,6 @@ def add_features(sender, **kwargs):
     DISABLE_SERVER_SIDE_SEARCH = 'disable_server_side_search'
     ENABLE_MKDOCS_SERVER_SIDE_SEARCH = 'enable_mkdocs_server_side_search'
     DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search'
-    INDEX_FROM_HTML_FILES = 'index_from_html_files'
 
     # Build related features
     LIST_PACKAGES_INSTALLED_ENV = "list_packages_installed_env"
@@ -1986,11 +1967,6 @@ def add_features(sender, **kwargs):
             DEFAULT_TO_FUZZY_SEARCH,
             _('Default to fuzzy search for simple search queries'),
         ),
-        (
-            INDEX_FROM_HTML_FILES,
-            _('Index content directly from html files instead or relying in other sources'),
-        ),
-
         (
             LIST_PACKAGES_INSTALLED_ENV,
             _(
diff --git a/readthedocs/projects/tasks/search.py b/readthedocs/projects/tasks/search.py
index dc5dec23890..7cd8bd980bc 100644
--- a/readthedocs/projects/tasks/search.py
+++ b/readthedocs/projects/tasks/search.py
@@ -127,6 +127,10 @@ def _create_intersphinx_data(version, commit, build):
     :param commit: Commit that updated path
     :param build: Build id
     """
+
+    # TODO: I think this method can be completely removed,
+    # since we said that we don't want to store SphinDomain objects anymore.
+    # I didn't find the issue, but I think we have one.
     if not version.is_sphinx_type:
         return
 
diff --git a/readthedocs/search/api/v2/serializers.py b/readthedocs/search/api/v2/serializers.py
index 98d8adc11aa..40f33af0d5c 100644
--- a/readthedocs/search/api/v2/serializers.py
+++ b/readthedocs/search/api/v2/serializers.py
@@ -146,6 +146,7 @@ def _get_full_path(self, obj):
             docs_url = project_data.version.docs_url
             path = obj.full_path
 
+            # TODO: review this logic that uses doctype
             # Generate an appropriate link for the doctypes that use htmldir,
             # and always end it with / so it goes directly to proxito.
             # For a generic doctype we just strip the index.html part if it exists.
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
index a90ad7d309a..3b624320cf9 100644
--- a/readthedocs/search/documents.py
+++ b/readthedocs/search/documents.py
@@ -74,8 +74,6 @@ class PageDocument(RTDDocTypeMixin, Document):
     # Metadata
     project = fields.KeywordField(attr='project.slug')
     version = fields.KeywordField(attr='version.slug')
-    # TODO: review custom logic based con `documentation_type`
-    doctype = fields.KeywordField(attr='version.documentation_type')
     path = fields.KeywordField(attr='processed_json.path')
     full_path = fields.KeywordField(attr='path')
     rank = fields.IntegerField()
@@ -128,6 +126,8 @@ def prepare_rank(self, html_file):
 
     def prepare_domains(self, html_file):
         """Prepares and returns the values for domains field."""
+        # TODO: remove this method completely. It's not used.
+        # I grepped all the code and I didn't find where it's used.
 
         # XXX: Don't access the sphinx domains table while we migrate the ID type
         # https://github.com/readthedocs/readthedocs.org/pull/9482.
diff --git a/readthedocs/telemetry/collectors.py b/readthedocs/telemetry/collectors.py
index 05cba855bf1..77620325fa5 100644
--- a/readthedocs/telemetry/collectors.py
+++ b/readthedocs/telemetry/collectors.py
@@ -95,13 +95,9 @@ def collect(self):
         return data
 
     def _get_doctool_name(self):
-        if self.version.is_sphinx_type:
-            return "sphinx"
-
-        if self.version.is_mkdocs_type:
-            return "mkdocs"
-
-        return "generic"
+        # TODO: this data should be communicated from the builder itself homehow.
+        # This is a place where the build contract could be useful.
+        return "unknown"
 
     def _get_doctool(self):
         data = {
@@ -110,6 +106,9 @@ def _get_doctool(self):
             "html_theme": "",
         }
 
+        # TODO: adapt this logic to work with a generic builder. This requires
+        # a build contract that explains how to create the JSON file we are
+        # expecting at this point
         if self._get_doctool_name() != "sphinx":
             return data