diff --git a/dojo/decorators.py b/dojo/decorators.py index 40174524fb7..791e76cb8ab 100644 --- a/dojo/decorators.py +++ b/dojo/decorators.py @@ -64,13 +64,17 @@ def we_want_async(*args, func=None, **kwargs): return False user = kwargs.get("async_user", get_current_user()) - logger.debug("user: %s", user) + logger.debug("async user: %s", user) + + if not user: + logger.debug("dojo_async_task %s: no current user, running task in the background", func) + return True if Dojo_User.wants_block_execution(user): logger.debug("dojo_async_task %s: running task in the foreground as block_execution is set to True for %s", func, user) return False - logger.debug("dojo_async_task %s: no current user, running task in the background", func) + logger.debug("dojo_async_task %s: running task in the background as user has not set block_execution to True for %s", func, user) return True diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py index f3425a63010..458ed219d73 100644 --- a/dojo/importers/base_importer.py +++ b/dojo/importers/base_importer.py @@ -684,14 +684,11 @@ def process_endpoints( logger.debug("endpoints_to_add: %s", endpoints_to_add) self.endpoint_manager.chunk_endpoints_and_disperse(finding, endpoints_to_add) - def process_vulnerability_ids( + def process_cve( self, finding: Finding, ) -> Finding: - """ - Parse the `unsaved_vulnerability_ids` field from findings after they are parsed - to create `Vulnerability_Id` objects with the finding associated correctly - """ + """Ensure cve is set from the unsaved_vulnerability_ids field, or vice versa.""" # Synchronize the cve field with the unsaved_vulnerability_ids # We do this to be as flexible as possible to handle the fields until # the cve field is not needed anymore and can be removed. @@ -705,6 +702,16 @@ def process_vulnerability_ids( # If there is no list, make one with the value of the cve field finding.unsaved_vulnerability_ids = [finding.cve] + return finding + + def process_vulnerability_ids( + self, + finding: Finding, + ) -> Finding: + """ + Parse the `unsaved_vulnerability_ids` field from findings after they are parsed + to create `Vulnerability_Id` objects with the finding associated correctly + """ if finding.unsaved_vulnerability_ids: # Remove old vulnerability ids - keeping this call only because of flake8 Vulnerability_Id.objects.filter(finding=finding).delete() diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 14456954fe8..8aceb72e3fa 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -183,7 +183,7 @@ def process_findings( unsaved_finding.reporter = self.user unsaved_finding.last_reviewed_by = self.user unsaved_finding.last_reviewed = self.now - logger.debug("process_parsed_findings: active from report: %s, verified from report: %s", unsaved_finding.active, unsaved_finding.verified) + logger.debug("process_parsed_findings: unique_id_from_tool: %s, hash_code: %s, active from report: %s, verified from report: %s", unsaved_finding.unique_id_from_tool, unsaved_finding.hash_code, unsaved_finding.active, unsaved_finding.verified) # indicates an override. Otherwise, do not change the value of unsaved_finding.active if self.active is not None: unsaved_finding.active = self.active @@ -198,8 +198,10 @@ def process_findings( # Force parsers to use unsaved_tags (stored in below after saving) unsaved_finding.tags = None - # postprocessing will be done on next save. + finding = self.process_cve(unsaved_finding) + # postprocessing will be done after processing related fields like endpoints, vulnerability ids, etc. unsaved_finding.save_no_options() + finding = unsaved_finding # Determine how the finding should be grouped self.process_finding_groups( @@ -218,11 +220,18 @@ def process_findings( finding = self.process_vulnerability_ids(finding) # Categorize this finding as a new one new_findings.append(finding) + # all data is already saved on the finding, we only need to generate as store the hash_code + # this is an optimization to avoid a full UDPATE statement of the finding which is a quite a big object with lots of fields + # after that we tirgger the post processing directly + # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() + # this would do a full UDPATE statement for the finding + + finding.set_hash_code(True) + finding.save_no_options(update_fields=["hash_code"]) + # to avoid pushing a finding group multiple times, we push those outside of the loop - if self.findings_groups_enabled and self.group_by: - finding.save() - else: - finding.save(push_to_jira=self.push_to_jira) + push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) + finding_helper.post_process_finding_save(finding, dedupe_option=True, rules_option=True, product_grading_option=True, issue_updater_option=True, push_to_jira=push_to_jira) for (group_name, findings) in group_names_to_findings_dict.items(): finding_helper.add_findings_to_auto_group( diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 694bd8bc4e8..826ad62922d 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -236,12 +236,17 @@ def process_findings( finding, unsaved_finding, ) - # finding = new finding or existing finding still in the upload report + # all data is already saved on the finding, we only need to generate as store the hash_code + # this is an optimization to avoid a full UDPATE statement of the finding which is a quite a big object with lots of fields + # after that we tirgger the post processing directly + # the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save() + # this would do a full UDPATE statement for the finding + finding.set_hash_code(True) + finding.save_no_options(update_fields=["hash_code"]) + # to avoid pushing a finding group multiple times, we push those outside of the loop - if self.findings_groups_enabled and self.group_by: - finding.save() - else: - finding.save(push_to_jira=self.push_to_jira) + push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by) + finding_helper.post_process_finding_save(finding, dedupe_option=True, rules_option=True, product_grading_option=True, issue_updater_option=True, push_to_jira=push_to_jira) self.to_mitigate = (set(self.original_items) - set(self.reactivated_items) - set(self.unchanged_items)) # due to #3958 we can have duplicates inside the same report @@ -336,11 +341,14 @@ def match_new_finding_to_existing_finding( hash_code=unsaved_finding.hash_code, ).exclude(hash_code=None).order_by("id") if self.deduplication_algorithm == "unique_id_from_tool": + deduplicationLogger.debug(f"unique_id_from_tool: {unsaved_finding.unique_id_from_tool}") return Finding.objects.filter( test=self.test, unique_id_from_tool=unsaved_finding.unique_id_from_tool, ).exclude(unique_id_from_tool=None).order_by("id") if self.deduplication_algorithm == "unique_id_from_tool_or_hash_code": + deduplicationLogger.debug(f"unique_id_from_tool: {unsaved_finding.unique_id_from_tool}") + deduplicationLogger.debug(f"hash_code: {unsaved_finding.hash_code}") query = Finding.objects.filter( Q(test=self.test), (Q(hash_code__isnull=False) & Q(hash_code=unsaved_finding.hash_code)) @@ -501,10 +509,9 @@ def process_matched_mitigated_finding( if existing_finding.get_sla_configuration().restart_sla_on_reactivation: # restart the sla start date to the current date, finding.save() will set new sla_expiration_date existing_finding.sla_start_date = self.now - - existing_finding.save(dedupe_option=False) - # don't dedupe before endpoints are added - existing_finding.save(dedupe_option=False) + existing_finding = self.process_cve(existing_finding) + # don't dedupe before endpoints are added, postprocessing will be done on next save (in calling method) + existing_finding.save_no_options() note = Notes(entry=f"Re-activated by {self.scan_type} re-upload.", author=self.user) note.save() endpoint_statuses = existing_finding.status_finding.exclude( @@ -552,6 +559,9 @@ def process_matched_active_finding( existing_finding.active = False if self.verified is not None: existing_finding.verified = self.verified + existing_finding = self.process_cve(existing_finding) + existing_finding.save_no_options() + elif unsaved_finding.risk_accepted or unsaved_finding.false_p or unsaved_finding.out_of_scope: logger.debug("Reimported mitigated item matches a finding that is currently open, closing.") logger.debug( @@ -564,6 +574,8 @@ def process_matched_active_finding( existing_finding.active = False if self.verified is not None: existing_finding.verified = self.verified + existing_finding = self.process_cve(existing_finding) + existing_finding.save_no_options() else: # if finding is the same but list of affected was changed, finding is marked as unchanged. This is a known issue self.unchanged_items.append(existing_finding) @@ -598,6 +610,7 @@ def process_finding_that_was_not_matched( # scan_date was provided, override value from parser if self.scan_date_override: unsaved_finding.date = self.scan_date.date() + unsaved_finding = self.process_cve(unsaved_finding) # Save it. Don't dedupe before endpoints are added. unsaved_finding.save_no_options() finding = unsaved_finding @@ -641,7 +654,7 @@ def finding_post_processing( # Process vulnerability IDs if finding_from_report.unsaved_vulnerability_ids: finding.unsaved_vulnerability_ids = finding_from_report.unsaved_vulnerability_ids - + # legacy cve field has already been processed/set earlier return self.process_vulnerability_ids(finding) def process_groups_for_all_findings( diff --git a/unittests/test_import_reimport.py b/unittests/test_import_reimport.py index 9c322c96cc2..ad64f09c693 100644 --- a/unittests/test_import_reimport.py +++ b/unittests/test_import_reimport.py @@ -658,7 +658,8 @@ def test_import_veracode_reimport_veracode_same_hash_code_different_unique_id(se def test_import_veracode_reimport_veracode_same_unique_id_different_hash_code(self): logger.debug("reimporting report with one finding having same unique_id_from_tool but different hash_code, verified=False") - import_veracode_many_findings = self.import_scan_with_params(self.veracode_many_findings, scan_type=self.scan_type_veracode) + with assertTestImportModelsCreated(self, imports=1, created=4, affected_findings=4, closed=0, reactivated=0, untouched=0): + import_veracode_many_findings = self.import_scan_with_params(self.veracode_many_findings, scan_type=self.scan_type_veracode) test_id = import_veracode_many_findings["test"] @@ -1748,6 +1749,9 @@ def __init__(self, *args, **kwargs): def setUp(self): testuser = User.objects.get(username="admin") + testuser.usercontactinfo.block_execution = True + testuser.usercontactinfo.save() + token = Token.objects.get(user=testuser) self.client = APIClient() self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key) @@ -2023,6 +2027,9 @@ def __init__(self, *args, **kwargs): def setUp(self): # still using the API to verify results testuser = User.objects.get(username="admin") + testuser.usercontactinfo.block_execution = True + testuser.usercontactinfo.save() + token = Token.objects.get(user=testuser) self.client = APIClient() self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key) diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 4f316d7350a..9a4b666fd97 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -1,6 +1,5 @@ import logging from contextlib import contextmanager -from unittest.mock import patch from crum import impersonate from django.contrib.contenttypes.models import ContentType @@ -20,12 +19,15 @@ Product_Type, Test, User, + UserContactInfo, ) from .dojo_test_case import DojoTestCase, get_unit_tests_scans_path +logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) + STACK_HAWK_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings.json" STACK_HAWK_SUBSET_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings_subset.json" STACK_HAWK_SCAN_TYPE = "StackHawk HawkScan" @@ -39,6 +41,9 @@ class TestDojoImporterPerformance(DojoTestCase): def setUp(self): super().setUp() + testuser = User.objects.create(username="admin") + UserContactInfo.objects.create(user=testuser, block_execution=False) + self.system_settings(enable_webhooks_notifications=False) self.system_settings(enable_product_grade=False) self.system_settings(enable_github=False) @@ -67,6 +72,14 @@ def assertNumAsyncTask(self, num): ) raise self.failureException(msg) + tasks = dojo_async_task_counter.get_tasks() + tasks_str = "\n".join(str(task) for task in tasks) + msg = ( + f"Correct number of {num} celery tasks were created.\n" + f"Tasks created:\n{tasks_str}" + ) + logger.debug(msg) + def import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3): """ Log output can be quite large as when the assertNumQueries fails, all queries are printed. @@ -158,18 +171,22 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_ reimporter = DefaultReImporter(**reimport_options) test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) - def test_import_reimport_reimport_performance(self): + # patch the we_want_async decorator to always return True so we don't depend on block_execution flag shenanigans + # @patch("dojo.decorators.we_want_async", return_value=True) + # def test_import_reimport_reimport_performance_async(self, mock): + def test_import_reimport_reimport_performance_async(self): self.import_reimport_performance( expected_num_queries1=712, expected_num_async_tasks1=10, - expected_num_queries2=656, + expected_num_queries2=655, expected_num_async_tasks2=22, expected_num_queries3=332, expected_num_async_tasks3=20, ) - @patch("dojo.decorators.we_want_async", return_value=False) - def test_import_reimport_reimport_performance_no_async(self, mock): + # @patch("dojo.decorators.we_want_async", return_value=False) + # def test_import_reimport_reimport_performance_no_async(self, mock): + def test_import_reimport_reimport_performance_no_async(self): """ This test checks the performance of the importers when they are run in sync mode. The reason for this is that we also want to be aware of when a PR affects the number of queries @@ -177,17 +194,21 @@ def test_import_reimport_reimport_performance_no_async(self, mock): The impersonate context manager above does not work as expected for disabling async, so we patch the we_want_async decorator to always return False. """ + testuser = User.objects.get(username="admin") + testuser.usercontactinfo.block_execution = True + testuser.usercontactinfo.save() self.import_reimport_performance( expected_num_queries1=712, expected_num_async_tasks1=10, - expected_num_queries2=656, + expected_num_queries2=655, expected_num_async_tasks2=22, expected_num_queries3=332, expected_num_async_tasks3=20, ) - @patch("dojo.decorators.we_want_async", return_value=False) - def test_import_reimport_reimport_performance_no_async_with_product_grading(self, mock): + # @patch("dojo.decorators.we_want_async", return_value=False) + # def test_import_reimport_reimport_performance_no_async_with_product_grading(self, mock): + def test_import_reimport_reimport_performance_no_async_with_product_grading(self): """ This test checks the performance of the importers when they are run in sync mode. The reason for this is that we also want to be aware of when a PR affects the number of queries @@ -196,14 +217,14 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self so we patch the we_want_async decorator to always return False. """ self.system_settings(enable_product_grade=True) - # Refresh the cache with the new settings - from dojo.middleware import DojoSytemSettingsMiddleware - DojoSytemSettingsMiddleware.load() + testuser = User.objects.get(username="admin") + testuser.usercontactinfo.block_execution = True + testuser.usercontactinfo.save() self.import_reimport_performance( expected_num_queries1=732, expected_num_async_tasks1=15, - expected_num_queries2=686, + expected_num_queries2=685, expected_num_async_tasks2=28, expected_num_queries3=357, expected_num_async_tasks3=25,