Skip to content

Commit 0ce27a5

Browse files
authored
Bug 1964128 - Enable monitor-only alerting for performance tests with email notifications. (#8670)
This patch adds the ability to produce performance alerts on tests that are only for monitoring purposes, and will not be sheriffed. This includes a new interface to view them on the Perfherder page (Monitored Alerts). At the same time, this patch adds the ability to send emails for all alerts (including sheriffed ones) to anyone that has an email specified in the new alertNotifyEmails field.
1 parent 822b52a commit 0ce27a5

File tree

13 files changed

+342
-6
lines changed

13 files changed

+342
-6
lines changed

schemas/performance-artifact.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,21 @@
205205
"type": "integer",
206206
"minimum": 1,
207207
"maximum": 255
208+
},
209+
"monitor": {
210+
"description": "Enable non-sheriffed alerting (ignores shouldAlert settings)",
211+
"title": "Monitor",
212+
"type": "boolean"
213+
},
214+
"alertNotifyEmails": {
215+
"type": "array",
216+
"title": "Notify these emails for any alerts produced",
217+
"items": {
218+
"type": "string",
219+
"maxLength": 100
220+
},
221+
"uniqueItems": true,
222+
"maxItems": 8
208223
}
209224
},
210225
"required": ["name", "subtests"],

tests/etl/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,10 @@ def perf_job(perf_push, failure_classifications, generic_reference_data):
2121
return create_generic_job(
2222
"myfunguid", perf_push.repository, perf_push.id, generic_reference_data
2323
)
24+
25+
26+
@pytest.fixture
27+
def perf_job_nonsheriffed(perf_push, failure_classifications, generic_reference_data):
28+
return create_generic_job(
29+
"myfunguid", perf_push.repository, perf_push.id, generic_reference_data, tier=3
30+
)

tests/etl/test_perf_data_load.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import operator
55
import time
6+
from unittest import mock
67

78
import pytest
89
from django.core.management import call_command
@@ -234,6 +235,52 @@ def test_default_ingest_workflow(
234235
_verify_datum(suite["name"], subtest["name"], subtest["value"], perf_push.time)
235236

236237

238+
@mock.patch("treeherder.etl.perf.generate_alerts")
239+
def test_monitored_perf_data(
240+
mocked_generate_alerts,
241+
test_repository,
242+
perf_push,
243+
later_perf_push,
244+
perf_job_nonsheriffed,
245+
generic_reference_data,
246+
sample_perf_artifact,
247+
):
248+
for suite in sample_perf_artifact["blob"]["suites"]:
249+
suite["monitor"] = True
250+
perf_datum, submit_datum = _prepare_test_data(sample_perf_artifact)
251+
252+
store_performance_artifact(perf_job_nonsheriffed, submit_datum)
253+
254+
assert DATA_PER_ARTIFACT == PerformanceSignature.objects.all().count()
255+
assert 1 == PerformanceFramework.objects.all().count()
256+
257+
# Ensure that alert generation gets triggered on a tier-3 test with
258+
# monitor set to True
259+
mocked_generate_alerts.apply_async.assert_called()
260+
261+
262+
@mock.patch("treeherder.etl.perf.generate_alerts")
263+
def test_unmonitored_perf_data(
264+
mocked_generate_alerts,
265+
test_repository,
266+
perf_push,
267+
later_perf_push,
268+
perf_job_nonsheriffed,
269+
generic_reference_data,
270+
sample_perf_artifact,
271+
):
272+
perf_datum, submit_datum = _prepare_test_data(sample_perf_artifact)
273+
274+
store_performance_artifact(perf_job_nonsheriffed, submit_datum)
275+
276+
assert DATA_PER_ARTIFACT == PerformanceSignature.objects.all().count()
277+
assert 1 == PerformanceFramework.objects.all().count()
278+
279+
# Ensure that alert generation does not get triggered when monitor is False
280+
# on a tier-3 job
281+
mocked_generate_alerts.apply_async.assert_not_called()
282+
283+
237284
def test_hash_remains_unchanged_for_default_ingestion_workflow(
238285
test_repository, perf_job, sample_perf_artifact
239286
):

tests/perfalert/test_alerts.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import datetime
22
import time
3+
from unittest import mock
34

45
import pytest
56

@@ -341,3 +342,101 @@ def test_alert_change_type_absolute(
341342

342343
assert PerformanceAlert.objects.count() == expected_num_alerts
343344
assert PerformanceAlertSummary.objects.count() == expected_num_alerts
345+
346+
347+
def test_alert_monitor_no_sheriff(
348+
test_repository,
349+
test_issue_tracker,
350+
failure_classifications,
351+
generic_reference_data,
352+
test_perf_signature,
353+
):
354+
# modify the test signature to have it as a monitored signature, but not sheriffed
355+
test_perf_signature.monitor = True
356+
test_perf_signature.should_alert = True
357+
test_perf_signature.save()
358+
359+
base_time = time.time() # generate it based off current time
360+
interval = 60
361+
_generate_performance_data(
362+
test_repository,
363+
test_perf_signature,
364+
base_time,
365+
1,
366+
0.5,
367+
int(interval / 2),
368+
)
369+
_generate_performance_data(
370+
test_repository,
371+
test_perf_signature,
372+
base_time,
373+
int(interval / 2) + 1,
374+
1.0,
375+
int(interval / 2),
376+
)
377+
378+
generate_new_alerts_in_series(test_perf_signature)
379+
380+
assert PerformanceAlert.objects.count() == 1
381+
assert PerformanceAlertSummary.objects.count() == 1
382+
383+
# When monitor is true, then alert should not be sheriffed
384+
# regardless of should_alert settings
385+
assert [not alert.sheriffed for alert in PerformanceAlert.objects.all()]
386+
387+
388+
@mock.patch("treeherder.perf.alerts.taskcluster")
389+
def test_alert_emails(
390+
mocked_taskcluster,
391+
test_repository,
392+
test_issue_tracker,
393+
failure_classifications,
394+
generic_reference_data,
395+
test_perf_signature,
396+
):
397+
mocked_email_client = mock.MagicMock()
398+
mocked_taskcluster.notify_client_factory.return_value = mocked_email_client
399+
400+
emails = "fake@email.com fake2@email.com"
401+
test_perf_signature.alert_notify_emails = emails
402+
test_perf_signature.save()
403+
404+
base_time = time.time() # generate it based off current time
405+
interval = 60
406+
_generate_performance_data(
407+
test_repository,
408+
test_perf_signature,
409+
base_time,
410+
1,
411+
0.5,
412+
int(interval / 2),
413+
)
414+
_generate_performance_data(
415+
test_repository,
416+
test_perf_signature,
417+
base_time,
418+
int(interval / 2) + 1,
419+
1.0,
420+
int(interval / 2),
421+
)
422+
423+
generate_new_alerts_in_series(test_perf_signature)
424+
425+
assert PerformanceAlert.objects.count() == 1
426+
assert PerformanceAlertSummary.objects.count() == 1
427+
428+
# When monitor is False, then the alerts should be sheriffed
429+
assert [alert.sheriffed for alert in PerformanceAlert.objects.all()]
430+
431+
# Make sure the email service was called correctly for 2 emails
432+
assert mocked_taskcluster.notify_client_factory.call_count == 1
433+
assert mocked_email_client.email.call_count == 2
434+
435+
# Ensure that each email specified has an email sent to it
436+
for email in emails.split():
437+
assert any(
438+
[
439+
email in call_arg[0][0]["address"]
440+
for call_arg in mocked_email_client.email.call_args_list
441+
]
442+
)

treeherder/etl/perf.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def _suite_should_alert_based_on(
9797
and new_datum_ingested
9898
and job.repository.performance_alerts_enabled
9999
and job.tier_is_sheriffable
100-
)
100+
) or (signature.monitor and job.repository.name not in ("try",))
101101

102102

103103
def _test_should_alert_based_on(
@@ -114,7 +114,7 @@ def _test_should_alert_based_on(
114114
and new_datum_ingested
115115
and job.repository.performance_alerts_enabled
116116
and job.tier_is_sheriffable
117-
)
117+
) or (signature.monitor and job.repository.name not in ("try",))
118118

119119

120120
def _test_should_gather_replicates_based_on(
@@ -205,6 +205,8 @@ def _load_perf_datum(job: Job, perf_datum: dict):
205205
# these properties below can be either True, False, or null
206206
# (None). Null indicates no preference has been set.
207207
"should_alert": suite.get("shouldAlert"),
208+
"monitor": suite.get("monitor"),
209+
"alert_notify_emails": _order_and_concat(suite.get("alertNotifyEmails", [])),
208210
"alert_change_type": PerformanceSignature._get_alert_change_type(
209211
suite.get("alertChangeType")
210212
),
@@ -272,6 +274,8 @@ def _load_perf_datum(job: Job, perf_datum: dict):
272274
# null (None). Null indicates no preference has been
273275
# set.
274276
"should_alert": subtest.get("shouldAlert"),
277+
"monitor": suite.get("monitor"),
278+
"alert_notify_emails": _order_and_concat(suite.get("alertNotifyEmails", [])),
275279
"alert_change_type": PerformanceSignature._get_alert_change_type(
276280
subtest.get("alertChangeType")
277281
),

treeherder/perf/alerts.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,29 @@
99
from django.conf import settings
1010
from django.db import transaction
1111

12+
from treeherder.perf.email import AlertNotificationWriter
1213
from treeherder.perf.models import (
1314
PerformanceAlert,
1415
PerformanceAlertSummary,
1516
PerformanceDatum,
1617
PerformanceSignature,
1718
)
1819
from treeherder.perfalert.perfalert import RevisionDatum, detect_changes
20+
from treeherder.services import taskcluster
1921

2022
logger = logging.getLogger(__name__)
2123

2224

25+
def send_alert_emails(emails, alert, alert_summary):
26+
notify_client = taskcluster.notify_client_factory()
27+
28+
for email in emails:
29+
logger.info(f"Sending alert email to {email}")
30+
notification_writer = AlertNotificationWriter()
31+
email = notification_writer.prepare_new_email(email, alert, alert_summary)
32+
notify_client.email(email)
33+
34+
2335
def geomean(iterable):
2436
# Returns a geomean of a list of values.
2537
a = np.array(iterable)
@@ -138,6 +150,7 @@ def generate_new_alerts_in_series(signature):
138150
framework=signature.framework,
139151
push_id=cur.push_id,
140152
prev_push_id=prev.push_id,
153+
sheriffed=not signature.monitor,
141154
defaults={
142155
"manually_created": False,
143156
"created": datetime.utcfromtimestamp(cur.push_timestamp),
@@ -150,9 +163,10 @@ def generate_new_alerts_in_series(signature):
150163
if t_value == float("inf"):
151164
t_value = 1000
152165

153-
PerformanceAlert.objects.update_or_create(
166+
alert, _ = PerformanceAlert.objects.update_or_create(
154167
summary=summary,
155168
series_signature=signature,
169+
sheriffed=not signature.monitor,
156170
defaults={
157171
"noise_profile": noise_profile,
158172
"is_regression": alert_properties.is_regression,
@@ -163,3 +177,6 @@ def generate_new_alerts_in_series(signature):
163177
"t_value": t_value,
164178
},
165179
)
180+
181+
if signature.alert_notify_emails:
182+
send_alert_emails(signature.alert_notify_emails.split(), alert, summary)

treeherder/perf/email.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,3 +292,47 @@ def _write_content(self, must_mention: list[PerformanceSignature]):
292292
content.include_signatures(must_mention)
293293

294294
self._email.content = str(content)
295+
296+
297+
class AlertNotificationContent:
298+
DESCRIPTION = "Perfherder has detected a performance change on the test **{test}** and produced an alert. See {alert_summary} for more information."
299+
ALERT_SUMMARY_LINK = "https://treeherder.mozilla.org/perfherder/alerts?id={alert_summary_id}"
300+
301+
def __init__(self):
302+
self._raw_content = None
303+
304+
def include_alert_summary(self, test: str, alert_summary_id: str):
305+
self._raw_content = self.DESCRIPTION.format(
306+
test=test,
307+
alert_summary=(
308+
f"[this alert summary]"
309+
f"({self.ALERT_SUMMARY_LINK.format(alert_summary_id=alert_summary_id)})"
310+
),
311+
)
312+
313+
def __str__(self):
314+
if self._raw_content is None:
315+
raise ValueError("No content set")
316+
return self._raw_content
317+
318+
319+
class AlertNotificationWriter(EmailWriter):
320+
def prepare_new_email(
321+
self, email: str, alert: PerformanceAlert, alert_summary: PerformanceAlertSummary
322+
) -> dict:
323+
self._write_address(email)
324+
self._write_subject(alert.series_signature.test)
325+
self._write_content(alert.series_signature.test, str(alert_summary.id))
326+
return self.email
327+
328+
def _write_address(self, email: str):
329+
self._email.address = email
330+
331+
def _write_subject(self, test: str):
332+
self._email.subject = f"Alert: Test {test} detected a performance change"
333+
334+
def _write_content(self, test: str, alert_summary_id: str):
335+
content = AlertNotificationContent()
336+
content.include_alert_summary(test, alert_summary_id)
337+
338+
self._email.content = str(content)

0 commit comments

Comments
 (0)