Skip to content
Draft
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
a8ce14f
test cases: fix caching of system settings
valentijnscholten Jul 26, 2025
e094836
fix tests
valentijnscholten Jul 27, 2025
847ae7a
fix caching for github
valentijnscholten Jul 27, 2025
acbb196
fix caching for github
valentijnscholten Jul 27, 2025
9396ac9
simplify cache loading
valentijnscholten Jul 27, 2025
ab21221
post process only when needed
valentijnscholten Jul 27, 2025
bb5d547
set tags on (re)import
valentijnscholten Jul 27, 2025
da4a0f8
rebase set tags
valentijnscholten Jul 27, 2025
7b88c86
reduce save with options
valentijnscholten Jul 27, 2025
ef5d9b0
update counts, reduce saves with options
valentijnscholten Jul 27, 2025
aa225a5
importers: do not save again, but postprocess directly
valentijnscholten Jul 27, 2025
a9677a0
update counts
valentijnscholten Jul 27, 2025
567cb39
optimize hash_code setting
valentijnscholten Jul 27, 2025
e62c172
fix counts
valentijnscholten Jul 27, 2025
6ad90f9
set hash code for new findings in reimport
valentijnscholten Jul 27, 2025
09f0559
make smaller second save work
valentijnscholten Aug 1, 2025
bdc6bcc
make smaller second save work - add no_options
valentijnscholten Aug 1, 2025
87c30a0
update query counts
valentijnscholten Aug 1, 2025
02f670f
improve we_want_async decorator
valentijnscholten Aug 1, 2025
27de1d6
test performance: force async
valentijnscholten Aug 1, 2025
5d5c4f0
fix async stuff in perf test
valentijnscholten Aug 1, 2025
5f49110
fix async stuff in perf test
valentijnscholten Aug 1, 2025
f5e0edc
fix async stuff in perf test
valentijnscholten Aug 1, 2025
97f9d66
update counts
valentijnscholten Aug 1, 2025
c374a18
remove logging
valentijnscholten Aug 1, 2025
9debfb8
fix counts
valentijnscholten Aug 1, 2025
eab351c
fix cve for reimport
valentijnscholten Aug 1, 2025
b30c852
ruff
valentijnscholten Aug 1, 2025
12e77ce
fix no async
valentijnscholten Aug 3, 2025
087bbe8
Apply suggestions from code review
Maffooch Aug 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions dojo/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,17 @@ def we_want_async(*args, func=None, **kwargs):
return False

user = kwargs.get("async_user", get_current_user())
logger.debug("user: %s", user)
logger.debug("async user: %s", user)

if not user:
logger.debug("dojo_async_task %s: no current user, running task in the background", func)
return True

if Dojo_User.wants_block_execution(user):
logger.debug("dojo_async_task %s: running task in the foreground as block_execution is set to True for %s", func, user)
return False

logger.debug("dojo_async_task %s: no current user, running task in the background", func)
logger.debug("dojo_async_task %s: running task in the background as user has not set block_execution to True for %s", func, user)
return True


Expand Down
17 changes: 12 additions & 5 deletions dojo/importers/base_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,14 +684,11 @@ def process_endpoints(
logger.debug("endpoints_to_add: %s", endpoints_to_add)
self.endpoint_manager.chunk_endpoints_and_disperse(finding, endpoints_to_add)

def process_vulnerability_ids(
def process_cve(
self,
finding: Finding,
) -> Finding:
"""
Parse the `unsaved_vulnerability_ids` field from findings after they are parsed
to create `Vulnerability_Id` objects with the finding associated correctly
"""
"""Ensure cve is set from the unsaved_vulnerability_ids field, or vice versa."""
# Synchronize the cve field with the unsaved_vulnerability_ids
# We do this to be as flexible as possible to handle the fields until
# the cve field is not needed anymore and can be removed.
Expand All @@ -705,6 +702,16 @@ def process_vulnerability_ids(
# If there is no list, make one with the value of the cve field
finding.unsaved_vulnerability_ids = [finding.cve]

return finding

def process_vulnerability_ids(
self,
finding: Finding,
) -> Finding:
"""
Parse the `unsaved_vulnerability_ids` field from findings after they are parsed
to create `Vulnerability_Id` objects with the finding associated correctly
"""
if finding.unsaved_vulnerability_ids:
# Remove old vulnerability ids - keeping this call only because of flake8
Vulnerability_Id.objects.filter(finding=finding).delete()
Expand Down
21 changes: 15 additions & 6 deletions dojo/importers/default_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def process_findings(
unsaved_finding.reporter = self.user
unsaved_finding.last_reviewed_by = self.user
unsaved_finding.last_reviewed = self.now
logger.debug("process_parsed_findings: active from report: %s, verified from report: %s", unsaved_finding.active, unsaved_finding.verified)
logger.debug("process_parsed_findings: unique_id_from_tool: %s, hash_code: %s, active from report: %s, verified from report: %s", unsaved_finding.unique_id_from_tool, unsaved_finding.hash_code, unsaved_finding.active, unsaved_finding.verified)
# indicates an override. Otherwise, do not change the value of unsaved_finding.active
if self.active is not None:
unsaved_finding.active = self.active
Expand All @@ -198,8 +198,10 @@ def process_findings(

# Force parsers to use unsaved_tags (stored in below after saving)
unsaved_finding.tags = None
# postprocessing will be done on next save.
finding = self.process_cve(unsaved_finding)
# postprocessing will be done after processing related fields like endpoints, vulnerability ids, etc.
unsaved_finding.save_no_options()

finding = unsaved_finding
# Determine how the finding should be grouped
self.process_finding_groups(
Expand All @@ -218,11 +220,18 @@ def process_findings(
finding = self.process_vulnerability_ids(finding)
# Categorize this finding as a new one
new_findings.append(finding)
# all data is already saved on the finding, we only need to generate as store the hash_code
# this is an optimization to avoid a full UDPATE statement of the finding which is a quite a big object with lots of fields
# after that we tirgger the post processing directly
# the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save()
# this would do a full UDPATE statement for the finding

finding.set_hash_code(True)
finding.save_no_options(update_fields=["hash_code"])

# to avoid pushing a finding group multiple times, we push those outside of the loop
if self.findings_groups_enabled and self.group_by:
finding.save()
else:
finding.save(push_to_jira=self.push_to_jira)
push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by)
finding_helper.post_process_finding_save(finding, dedupe_option=True, rules_option=True, product_grading_option=True, issue_updater_option=True, push_to_jira=push_to_jira)

for (group_name, findings) in group_names_to_findings_dict.items():
finding_helper.add_findings_to_auto_group(
Expand Down
33 changes: 23 additions & 10 deletions dojo/importers/default_reimporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,17 @@ def process_findings(
finding,
unsaved_finding,
)
# finding = new finding or existing finding still in the upload report
# all data is already saved on the finding, we only need to generate as store the hash_code
# this is an optimization to avoid a full UDPATE statement of the finding which is a quite a big object with lots of fields
# after that we tirgger the post processing directly
# the alternative is to not trigger the post processing or generate the hash_code on the finding, but just call finding.save()
# this would do a full UDPATE statement for the finding
finding.set_hash_code(True)
finding.save_no_options(update_fields=["hash_code"])

# to avoid pushing a finding group multiple times, we push those outside of the loop
if self.findings_groups_enabled and self.group_by:
finding.save()
else:
finding.save(push_to_jira=self.push_to_jira)
push_to_jira = self.push_to_jira and (not self.findings_groups_enabled or not self.group_by)
finding_helper.post_process_finding_save(finding, dedupe_option=True, rules_option=True, product_grading_option=True, issue_updater_option=True, push_to_jira=push_to_jira)

self.to_mitigate = (set(self.original_items) - set(self.reactivated_items) - set(self.unchanged_items))
# due to #3958 we can have duplicates inside the same report
Expand Down Expand Up @@ -336,11 +341,14 @@ def match_new_finding_to_existing_finding(
hash_code=unsaved_finding.hash_code,
).exclude(hash_code=None).order_by("id")
if self.deduplication_algorithm == "unique_id_from_tool":
deduplicationLogger.debug(f"unique_id_from_tool: {unsaved_finding.unique_id_from_tool}")
return Finding.objects.filter(
test=self.test,
unique_id_from_tool=unsaved_finding.unique_id_from_tool,
).exclude(unique_id_from_tool=None).order_by("id")
if self.deduplication_algorithm == "unique_id_from_tool_or_hash_code":
deduplicationLogger.debug(f"unique_id_from_tool: {unsaved_finding.unique_id_from_tool}")
deduplicationLogger.debug(f"hash_code: {unsaved_finding.hash_code}")
query = Finding.objects.filter(
Q(test=self.test),
(Q(hash_code__isnull=False) & Q(hash_code=unsaved_finding.hash_code))
Expand Down Expand Up @@ -501,10 +509,9 @@ def process_matched_mitigated_finding(
if existing_finding.get_sla_configuration().restart_sla_on_reactivation:
# restart the sla start date to the current date, finding.save() will set new sla_expiration_date
existing_finding.sla_start_date = self.now

existing_finding.save(dedupe_option=False)
# don't dedupe before endpoints are added
existing_finding.save(dedupe_option=False)
existing_finding = self.process_cve(existing_finding)
# don't dedupe before endpoints are added, postprocessing will be done on next save (in calling method)
existing_finding.save_no_options()
note = Notes(entry=f"Re-activated by {self.scan_type} re-upload.", author=self.user)
note.save()
endpoint_statuses = existing_finding.status_finding.exclude(
Expand Down Expand Up @@ -552,6 +559,9 @@ def process_matched_active_finding(
existing_finding.active = False
if self.verified is not None:
existing_finding.verified = self.verified
existing_finding = self.process_cve(existing_finding)
existing_finding.save_no_options()

elif unsaved_finding.risk_accepted or unsaved_finding.false_p or unsaved_finding.out_of_scope:
logger.debug("Reimported mitigated item matches a finding that is currently open, closing.")
logger.debug(
Expand All @@ -564,6 +574,8 @@ def process_matched_active_finding(
existing_finding.active = False
if self.verified is not None:
existing_finding.verified = self.verified
existing_finding = self.process_cve(existing_finding)
existing_finding.save_no_options()
else:
# if finding is the same but list of affected was changed, finding is marked as unchanged. This is a known issue
self.unchanged_items.append(existing_finding)
Expand Down Expand Up @@ -598,6 +610,7 @@ def process_finding_that_was_not_matched(
# scan_date was provided, override value from parser
if self.scan_date_override:
unsaved_finding.date = self.scan_date.date()
unsaved_finding = self.process_cve(unsaved_finding)
# Save it. Don't dedupe before endpoints are added.
unsaved_finding.save_no_options()
finding = unsaved_finding
Expand Down Expand Up @@ -641,7 +654,7 @@ def finding_post_processing(
# Process vulnerability IDs
if finding_from_report.unsaved_vulnerability_ids:
finding.unsaved_vulnerability_ids = finding_from_report.unsaved_vulnerability_ids

# legacy cve field has already been processed/set earlier
return self.process_vulnerability_ids(finding)

def process_groups_for_all_findings(
Expand Down
2 changes: 1 addition & 1 deletion dojo/templatetags/multiply.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@


@register.filter
def multiply(value, arg):
def multiply(value, arg): # noqa: FURB118

Check failure on line 7 in dojo/templatetags/multiply.py

View workflow job for this annotation

GitHub Actions / ruff-linting

Ruff (RUF100)

dojo/templatetags/multiply.py:7:28: RUF100 Unused `noqa` directive (unused: `FURB118`)
return value * arg
9 changes: 8 additions & 1 deletion unittests/test_import_reimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,8 @@ def test_import_veracode_reimport_veracode_same_hash_code_different_unique_id(se
def test_import_veracode_reimport_veracode_same_unique_id_different_hash_code(self):
logger.debug("reimporting report with one finding having same unique_id_from_tool but different hash_code, verified=False")

import_veracode_many_findings = self.import_scan_with_params(self.veracode_many_findings, scan_type=self.scan_type_veracode)
with assertTestImportModelsCreated(self, imports=1, created=4, affected_findings=4, closed=0, reactivated=0, untouched=0):
import_veracode_many_findings = self.import_scan_with_params(self.veracode_many_findings, scan_type=self.scan_type_veracode)

test_id = import_veracode_many_findings["test"]

Expand Down Expand Up @@ -1748,6 +1749,9 @@ def __init__(self, *args, **kwargs):

def setUp(self):
testuser = User.objects.get(username="admin")
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()

token = Token.objects.get(user=testuser)
self.client = APIClient()
self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key)
Expand Down Expand Up @@ -2023,6 +2027,9 @@ def __init__(self, *args, **kwargs):
def setUp(self):
# still using the API to verify results
testuser = User.objects.get(username="admin")
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()

token = Token.objects.get(user=testuser)
self.client = APIClient()
self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key)
Expand Down
45 changes: 33 additions & 12 deletions unittests/test_importers_performance.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
from contextlib import contextmanager
from unittest.mock import patch

from crum import impersonate
from django.contrib.contenttypes.models import ContentType
Expand All @@ -20,12 +19,15 @@
Product_Type,
Test,
User,
UserContactInfo,
)

from .dojo_test_case import DojoTestCase, get_unit_tests_scans_path

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


STACK_HAWK_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings.json"
STACK_HAWK_SUBSET_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings_subset.json"
STACK_HAWK_SCAN_TYPE = "StackHawk HawkScan"
Expand All @@ -39,6 +41,9 @@ class TestDojoImporterPerformance(DojoTestCase):
def setUp(self):
super().setUp()

testuser = User.objects.create(username="admin")
UserContactInfo.objects.create(user=testuser, block_execution=False)

self.system_settings(enable_webhooks_notifications=False)
self.system_settings(enable_product_grade=False)
self.system_settings(enable_github=False)
Expand Down Expand Up @@ -67,6 +72,14 @@ def assertNumAsyncTask(self, num):
)
raise self.failureException(msg)

tasks = dojo_async_task_counter.get_tasks()
tasks_str = "\n".join(str(task) for task in tasks)
msg = (
f"Correct number of {num} celery tasks were created.\n"
f"Tasks created:\n{tasks_str}"
)
logger.debug(msg)

def import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3):
"""
Log output can be quite large as when the assertNumQueries fails, all queries are printed.
Expand Down Expand Up @@ -158,36 +171,44 @@ def import_reimport_performance(self, expected_num_queries1, expected_num_async_
reimporter = DefaultReImporter(**reimport_options)
test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan)

def test_import_reimport_reimport_performance(self):
# patch the we_want_async decorator to always return True so we don't depend on block_execution flag shenanigans
# @patch("dojo.decorators.we_want_async", return_value=True)
# def test_import_reimport_reimport_performance_async(self, mock):
def test_import_reimport_reimport_performance_async(self):
self.import_reimport_performance(
expected_num_queries1=712,
expected_num_async_tasks1=10,
expected_num_queries2=656,
expected_num_queries2=655,
expected_num_async_tasks2=22,
expected_num_queries3=332,
expected_num_async_tasks3=20,
)

@patch("dojo.decorators.we_want_async", return_value=False)
def test_import_reimport_reimport_performance_no_async(self, mock):
# @patch("dojo.decorators.we_want_async", return_value=False)
# def test_import_reimport_reimport_performance_no_async(self, mock):
def test_import_reimport_reimport_performance_no_async(self):
"""
This test checks the performance of the importers when they are run in sync mode.
The reason for this is that we also want to be aware of when a PR affects the number of queries
or async tasks created by a background task.
The impersonate context manager above does not work as expected for disabling async,
so we patch the we_want_async decorator to always return False.
"""
testuser = User.objects.get(username="admin")
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()
self.import_reimport_performance(
expected_num_queries1=712,
expected_num_async_tasks1=10,
expected_num_queries2=656,
expected_num_queries2=655,
expected_num_async_tasks2=22,
expected_num_queries3=332,
expected_num_async_tasks3=20,
)

@patch("dojo.decorators.we_want_async", return_value=False)
def test_import_reimport_reimport_performance_no_async_with_product_grading(self, mock):
# @patch("dojo.decorators.we_want_async", return_value=False)
# def test_import_reimport_reimport_performance_no_async_with_product_grading(self, mock):
def test_import_reimport_reimport_performance_no_async_with_product_grading(self):
"""
This test checks the performance of the importers when they are run in sync mode.
The reason for this is that we also want to be aware of when a PR affects the number of queries
Expand All @@ -196,14 +217,14 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self
so we patch the we_want_async decorator to always return False.
"""
self.system_settings(enable_product_grade=True)
# Refresh the cache with the new settings
from dojo.middleware import DojoSytemSettingsMiddleware
DojoSytemSettingsMiddleware.load()
testuser = User.objects.get(username="admin")
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()

self.import_reimport_performance(
expected_num_queries1=732,
expected_num_async_tasks1=15,
expected_num_queries2=686,
expected_num_queries2=685,
expected_num_async_tasks2=28,
expected_num_queries3=357,
expected_num_async_tasks3=25,
Expand Down
Loading