readthedocs · benjaoming · Apr 11, 2023 · Mar 8, 2023 · Mar 8, 2023 · Mar 8, 2023
@@ -153,9 +153,40 @@ def _parse_sections(self, title, body):
                         'title': title,
                         'content': content,
                     }
+                except Exception as e:
+                    log.info("Unable to index section.", section=str(e))
+
+        dls = body.css("dl")
+        for dl in dls:
+            dts = dl.css("dt")
+
+            for dt in dts:
+                try:
+                    title, id = self._parse_dt(tag)
+                    next_element = dt.next
+                    # We only index a dt with an accompanying dd
+                    if next_element.tag != "dd":
+                        continue
+                    content, _ = self._parse_section_content(next_element, depth=2)
+                    yield {
+                        "id": id,
+                        "title": title,
+                        "content": content,
+                    }
                 except Exception as e:
                     log.info('Unable to index section.', section=str(e))
 
+    def _parse_dt(self, tag):
+        """
+        Parses a definition term <dt>
+        """
+        section_id = tag.attributes.get("id", "")
+        if not section_id:
+            parent = tag.parent
+            section_id = parent.attributes.get("id", "")
+
+        return self._parse_content(tag.text()), section_id
+
     def _get_sections(self, title, body):
         """Get the first `self.max_inner_documents` sections."""
         iterator = self._parse_sections(title=title, body=body)
@@ -407,7 +438,6 @@ def _process_fjson(self, fjson_path):
         sections = []
         path = ''
         title = ''
-        domain_data = {}
 
         if 'current_page_name' in data:
             path = data['current_page_name']
@@ -435,21 +465,23 @@ def _process_fjson(self, fjson_path):
                 try:
                     # Create a new html object, since the previous one could have been modified.
                     body = HTMLParser(data["body"])
-                    domain_data = self._generate_domains_data(body)
+                    # domain_data = self._generate_domains_data(body)
                 except Exception:
                     log.info("Unable to index domains.", path=fjson_path)
         else:
             log.info('Unable to index content.', path=fjson_path)
 
         return {
-            'path': path,
-            'title': title,
-            'sections': sections,
-            'domain_data': domain_data,
+            "path": path,
+            "title": title,
+            "sections": sections,
+            "domain_data": {},  # domain_data,
         }
 
     def _get_sphinx_domains(self, body):
         """
+        REMOVING THIS
+
         Get all nodes that are a sphinx domain.
 
         A Sphinx domain is a <dl> tag which contains <dt> tags with an 'id' attribute,
@@ -476,10 +508,9 @@ def _clean_body(self, body):
         # while we migrate the ID type of the sphinx domains table
         # https://github.com/readthedocs/readthedocs.org/pull/9482.
         nodes_to_be_removed = []
-        from readthedocs.projects.models import Feature
 
-        if not self.project.has_feature(Feature.DISABLE_SPHINX_DOMAINS):
-            nodes_to_be_removed = self._get_sphinx_domains(body)
+        # if not self.project.has_feature(Feature.DISABLE_SPHINX_DOMAINS):
+        #     nodes_to_be_removed = self._get_sphinx_domains(body)
 
         # TODO: see if we really need to remove these
         # remove `Table of Contents` elements
@@ -493,6 +524,8 @@ def _clean_body(self, body):
 
     def _generate_domains_data(self, body):
         """
+        REMOVING THIS
+
         Generate sphinx domain objects' docstrings.
 
         Returns a dict with the generated data.