Skip to content

Implement Failed Test Replay #9214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2c219fd
add `di_enabled` to settings response
daniel-mohedano Jul 4, 2025
a911ae0
add FTR related metrics
daniel-mohedano Jul 4, 2025
d20159e
add FTR to execution settings
daniel-mohedano Jul 4, 2025
c4c84d6
add basic exception replay integration in agent mode
daniel-mohedano Jul 14, 2025
ae31670
feat: headless and agentless changes
daniel-mohedano Jul 22, 2025
4f2d6a9
Merge branch 'master' into daniel.mohedano/failed-test-replay
daniel-mohedano Jul 22, 2025
c7eeac8
fix: tests
daniel-mohedano Jul 23, 2025
14cf11c
fix: testng capabilities
daniel-mohedano Aug 4, 2025
461fb1f
feat: refactor agentless intakes
daniel-mohedano Aug 6, 2025
14b1054
Merge branch 'master' into daniel.mohedano/failed-test-replay
daniel-mohedano Aug 6, 2025
a87eff0
chore: update smoke test fixtures
daniel-mohedano Aug 6, 2025
19b4edf
test: add unit test for new Intake enum
daniel-mohedano Aug 6, 2025
9e81cc7
test: remove ftr from instrumentation tests (not used)
daniel-mohedano Aug 6, 2025
4e3deb4
test: introduce FTR smoke tests for headfull and headless modes
daniel-mohedano Aug 11, 2025
f931654
style: spotless and codenarc
daniel-mohedano Aug 11, 2025
eacde60
feat: add test event finished FTR telemetry
daniel-mohedano Aug 11, 2025
0228f71
feat: add `product` field to snapshots
daniel-mohedano Aug 11, 2025
88fd665
feat: implement SuiteEnd listener for sink flushing
daniel-mohedano Aug 11, 2025
ec9c54d
feat: introduce new config variables for debugger
daniel-mohedano Aug 12, 2025
d4d2432
chore: remove todo
daniel-mohedano Aug 19, 2025
ffda9b3
Merge branch 'master' into daniel.mohedano/failed-test-replay
daniel-mohedano Aug 19, 2025
49eaebf
feat: align FTR settings with JS' implementation
daniel-mohedano Aug 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
/dd-java-agent/instrumentation/maven-surefire/ @DataDog/ci-app-libraries-java
/dd-java-agent/instrumentation/weaver/ @DataDog/ci-app-libraries-java
/dd-smoke-tests/gradle/ @DataDog/ci-app-libraries-java
/dd-smoke-tests/junit-console/ @DataDog/ci-app-libraries-java
/dd-smoke-tests/maven/ @DataDog/ci-app-libraries-java
/internal-api/src/main/java/datadog/trace/api/git/ @DataDog/ci-app-libraries-java
**/civisibility/ @DataDog/ci-app-libraries-java
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import datadog.communication.http.HttpRetryPolicy;
import datadog.communication.http.OkHttpUtils;
import datadog.trace.api.Config;
import datadog.trace.api.intake.Intake;
import datadog.trace.util.throwable.FatalAgentMisconfigurationError;
import java.util.function.Function;
import javax.annotation.Nullable;
import okhttp3.HttpUrl;
import okhttp3.OkHttpClient;
Expand All @@ -28,8 +28,8 @@ public BackendApiFactory(Config config, SharedCommunicationObjects sharedCommuni
public @Nullable BackendApi createBackendApi(Intake intake) {
HttpRetryPolicy.Factory retryPolicyFactory = new HttpRetryPolicy.Factory(5, 100, 2.0, true);

if (intake.agentlessModeEnabled.apply(config)) {
HttpUrl agentlessUrl = getAgentlessUrl(intake);
if (intake.isAgentlessEnabled(config)) {
HttpUrl agentlessUrl = HttpUrl.get(intake.getAgentlessUrl(config));
String apiKey = config.getApiKey();
if (apiKey == null || apiKey.isEmpty()) {
throw new FatalAgentMisconfigurationError(
Expand Down Expand Up @@ -58,41 +58,4 @@ public BackendApiFactory(Config config, SharedCommunicationObjects sharedCommuni
+ "and agent does not support EVP proxy");
return null;
}

private HttpUrl getAgentlessUrl(Intake intake) {
String customUrl = intake.customUrl.apply(config);
if (customUrl != null && !customUrl.isEmpty()) {
return HttpUrl.get(String.format("%s/api/%s/", customUrl, intake.version));
} else {
String site = config.getSite();
return HttpUrl.get(
String.format("https://%s.%s/api/%s/", intake.urlPrefix, site, intake.version));
}
}

public enum Intake {
API("api", "v2", Config::isCiVisibilityAgentlessEnabled, Config::getCiVisibilityAgentlessUrl),
LLMOBS_API("api", "v2", Config::isLlmObsAgentlessEnabled, Config::getLlMObsAgentlessUrl),
LOGS(
"http-intake.logs",
"v2",
Config::isAgentlessLogSubmissionEnabled,
Config::getAgentlessLogSubmissionUrl);

public final String urlPrefix;
public final String version;
public final Function<Config, Boolean> agentlessModeEnabled;
public final Function<Config, String> customUrl;

Intake(
String urlPrefix,
String version,
Function<Config, Boolean> agentlessModeEnabled,
Function<Config, String> customUrl) {
this.urlPrefix = urlPrefix;
this.version = version;
this.agentlessModeEnabled = agentlessModeEnabled;
this.customUrl = customUrl;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ public void execute() {
}

maybeStartAppSec(scoClass, sco);
// start civisibility before debugger to enable Failed Test Replay correctly in headless mode
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we manage to plug into remote config as described in the other comment, there should be no ordering dependency 🤞

maybeStartCiVisibility(instrumentation, scoClass, sco);
maybeStartLLMObs(instrumentation, scoClass, sco);
// start debugger before remote config to subscribe to it before starting to poll
Expand Down Expand Up @@ -1308,10 +1309,6 @@ && isExplicitlyDisabled(TraceInstrumentationConfig.CODE_ORIGIN_FOR_SPANS_ENABLED
&& isExplicitlyDisabled(DebuggerConfig.DISTRIBUTED_DEBUGGER_ENABLED)) {
return;
}
if (!remoteConfigEnabled) {
log.warn("Cannot enable Dynamic Instrumentation because Remote Configuration is not enabled");
return;
}
startDebuggerAgent(inst, scoClass, sco);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import datadog.trace.api.civisibility.telemetry.CiVisibilityMetricCollector;
import datadog.trace.api.civisibility.telemetry.tag.Command;
import datadog.trace.api.git.GitInfoProvider;
import datadog.trace.api.intake.Intake;
import datadog.trace.civisibility.ci.CIProviderInfoFactory;
import datadog.trace.civisibility.ci.env.CiEnvironment;
import datadog.trace.civisibility.ci.env.CiEnvironmentImpl;
Expand Down Expand Up @@ -83,8 +84,7 @@ public class CiVisibilityServices {
this.processHierarchy = new ProcessHierarchy();
this.config = config;
this.metricCollector = metricCollector;
this.backendApi =
new BackendApiFactory(config, sco).createBackendApi(BackendApiFactory.Intake.API);
this.backendApi = new BackendApiFactory(config, sco).createBackendApi(Intake.API);
this.jvmInfoFactory = new CachingJvmInfoFactory(config, new JvmInfoFactoryImpl());
this.gitClientFactory = buildGitClientFactory(config, metricCollector);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ public static void start(Instrumentation inst, SharedCommunicationObjects sco) {
inst.addTransformer(new CoverageClassTransformer(instrumentationFilter));
}

if (executionSettings.isFailedTestReplayEnabled()) {
// only marks the feature as active in child or headless processes
config.setCiVisibilityFailedTestReplayActive(true);
}

CiVisibilityCoverageServices.Child coverageServices =
new CiVisibilityCoverageServices.Child(services, repoServices, executionSettings);
TestEventsHandlerFactory testEventsHandlerFactory =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public class CiVisibilitySettings {
false,
false,
false,
false,
EarlyFlakeDetectionSettings.DEFAULT,
TestManagementSettings.DEFAULT,
null);
Expand All @@ -28,6 +29,7 @@ public class CiVisibilitySettings {
private final boolean flakyTestRetriesEnabled;
private final boolean impactedTestsDetectionEnabled;
private final boolean knownTestsEnabled;
private final boolean failedTestReplayEnabled;
private final EarlyFlakeDetectionSettings earlyFlakeDetectionSettings;
private final TestManagementSettings testManagementSettings;
@Nullable private final String defaultBranch;
Expand All @@ -40,6 +42,7 @@ public class CiVisibilitySettings {
boolean flakyTestRetriesEnabled,
boolean impactedTestsDetectionEnabled,
boolean knownTestsEnabled,
boolean failedTestReplayEnabled,
EarlyFlakeDetectionSettings earlyFlakeDetectionSettings,
TestManagementSettings testManagementSettings,
@Nullable String defaultBranch) {
Expand All @@ -50,6 +53,7 @@ public class CiVisibilitySettings {
this.flakyTestRetriesEnabled = flakyTestRetriesEnabled;
this.impactedTestsDetectionEnabled = impactedTestsDetectionEnabled;
this.knownTestsEnabled = knownTestsEnabled;
this.failedTestReplayEnabled = failedTestReplayEnabled;
this.earlyFlakeDetectionSettings = earlyFlakeDetectionSettings;
this.testManagementSettings = testManagementSettings;
this.defaultBranch = defaultBranch;
Expand Down Expand Up @@ -83,6 +87,10 @@ public boolean isKnownTestsEnabled() {
return knownTestsEnabled;
}

public boolean isFailedTestReplayEnabled() {
return failedTestReplayEnabled;
}

public EarlyFlakeDetectionSettings getEarlyFlakeDetectionSettings() {
return earlyFlakeDetectionSettings;
}
Expand Down Expand Up @@ -112,6 +120,7 @@ public boolean equals(Object o) {
&& flakyTestRetriesEnabled == that.flakyTestRetriesEnabled
&& impactedTestsDetectionEnabled == that.impactedTestsDetectionEnabled
&& knownTestsEnabled == that.knownTestsEnabled
&& failedTestReplayEnabled == that.failedTestReplayEnabled
&& Objects.equals(earlyFlakeDetectionSettings, that.earlyFlakeDetectionSettings)
&& Objects.equals(testManagementSettings, that.testManagementSettings)
&& Objects.equals(defaultBranch, that.defaultBranch);
Expand All @@ -127,6 +136,7 @@ public int hashCode() {
flakyTestRetriesEnabled,
impactedTestsDetectionEnabled,
knownTestsEnabled,
failedTestReplayEnabled,
earlyFlakeDetectionSettings,
testManagementSettings,
defaultBranch);
Expand Down Expand Up @@ -154,6 +164,7 @@ public CiVisibilitySettings fromJson(Map<String, Object> json) {
getBoolean(json, "flaky_test_retries_enabled", false),
getBoolean(json, "impacted_tests_enabled", false),
getBoolean(json, "known_tests_enabled", false),
getBoolean(json, "di_enabled", false),
EarlyFlakeDetectionSettings.JsonAdapter.INSTANCE.fromJson(
(Map<String, Object>) json.get("early_flake_detection")),
TestManagementSettings.JsonAdapter.INSTANCE.fromJson(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import datadog.trace.api.civisibility.telemetry.CiVisibilityMetricCollector;
import datadog.trace.api.civisibility.telemetry.tag.CoverageEnabled;
import datadog.trace.api.civisibility.telemetry.tag.EarlyFlakeDetectionEnabled;
import datadog.trace.api.civisibility.telemetry.tag.FailedTestReplayEnabled;
import datadog.trace.api.civisibility.telemetry.tag.FlakyTestRetriesEnabled;
import datadog.trace.api.civisibility.telemetry.tag.ImpactedTestsDetectionEnabled;
import datadog.trace.api.civisibility.telemetry.tag.ItrEnabled;
Expand Down Expand Up @@ -156,6 +157,7 @@ public CiVisibilitySettings getSettings(TracerEnvironment tracerEnvironment) thr
settings.isKnownTestsEnabled() ? KnownTestsEnabled.TRUE : null,
settings.isImpactedTestsDetectionEnabled() ? ImpactedTestsDetectionEnabled.TRUE : null,
settings.getTestManagementSettings().isEnabled() ? TestManagementEnabled.TRUE : null,
settings.isFailedTestReplayEnabled() ? FailedTestReplayEnabled.SettingsMetric.TRUE : null,
settings.isGitUploadRequired() ? RequireGit.TRUE : null);

return settings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public class ExecutionSettings {
false,
false,
false,
false,
EarlyFlakeDetectionSettings.DEFAULT,
TestManagementSettings.DEFAULT,
null,
Expand All @@ -43,6 +44,7 @@ public class ExecutionSettings {
private final boolean testSkippingEnabled;
private final boolean flakyTestRetriesEnabled;
private final boolean impactedTestsDetectionEnabled;
private final boolean failedTestReplayEnabled;
@Nonnull private final EarlyFlakeDetectionSettings earlyFlakeDetectionSettings;
@Nonnull private final TestManagementSettings testManagementSettings;
@Nullable private final String itrCorrelationId;
Expand All @@ -58,6 +60,7 @@ public ExecutionSettings(
boolean testSkippingEnabled,
boolean flakyTestRetriesEnabled,
boolean impactedTestsDetectionEnabled,
boolean failedTestReplayEnabled,
@Nonnull EarlyFlakeDetectionSettings earlyFlakeDetectionSettings,
@Nonnull TestManagementSettings testManagementSettings,
@Nullable String itrCorrelationId,
Expand All @@ -74,6 +77,7 @@ public ExecutionSettings(
this.testSkippingEnabled = testSkippingEnabled;
this.flakyTestRetriesEnabled = flakyTestRetriesEnabled;
this.impactedTestsDetectionEnabled = impactedTestsDetectionEnabled;
this.failedTestReplayEnabled = failedTestReplayEnabled;
this.earlyFlakeDetectionSettings = earlyFlakeDetectionSettings;
this.testManagementSettings = testManagementSettings;
this.itrCorrelationId = itrCorrelationId;
Expand Down Expand Up @@ -110,6 +114,7 @@ private ExecutionSettings(
boolean testSkippingEnabled,
boolean flakyTestRetriesEnabled,
boolean impactedTestsDetectionEnabled,
boolean failedTestReplayEnabled,
@Nonnull EarlyFlakeDetectionSettings earlyFlakeDetectionSettings,
@Nonnull TestManagementSettings testManagementSettings,
@Nullable String itrCorrelationId,
Expand All @@ -123,6 +128,7 @@ private ExecutionSettings(
this.testSkippingEnabled = testSkippingEnabled;
this.flakyTestRetriesEnabled = flakyTestRetriesEnabled;
this.impactedTestsDetectionEnabled = impactedTestsDetectionEnabled;
this.failedTestReplayEnabled = failedTestReplayEnabled;
this.earlyFlakeDetectionSettings = earlyFlakeDetectionSettings;
this.testManagementSettings = testManagementSettings;
this.itrCorrelationId = itrCorrelationId;
Expand Down Expand Up @@ -157,6 +163,10 @@ public boolean isImpactedTestsDetectionEnabled() {
return impactedTestsDetectionEnabled;
}

public boolean isFailedTestReplayEnabled() {
return failedTestReplayEnabled;
}

@Nonnull
public EarlyFlakeDetectionSettings getEarlyFlakeDetectionSettings() {
return earlyFlakeDetectionSettings;
Expand Down Expand Up @@ -243,6 +253,7 @@ public boolean equals(Object o) {
&& testSkippingEnabled == that.testSkippingEnabled
&& flakyTestRetriesEnabled == that.flakyTestRetriesEnabled
&& impactedTestsDetectionEnabled == that.impactedTestsDetectionEnabled
&& failedTestReplayEnabled == that.failedTestReplayEnabled
&& Objects.equals(earlyFlakeDetectionSettings, that.earlyFlakeDetectionSettings)
&& Objects.equals(testManagementSettings, that.testManagementSettings)
&& Objects.equals(itrCorrelationId, that.itrCorrelationId)
Expand All @@ -261,6 +272,7 @@ public int hashCode() {
testSkippingEnabled,
flakyTestRetriesEnabled,
impactedTestsDetectionEnabled,
failedTestReplayEnabled,
earlyFlakeDetectionSettings,
testManagementSettings,
itrCorrelationId,
Expand All @@ -278,6 +290,7 @@ public static class Serializer {
private static final int TEST_SKIPPING_ENABLED_FLAG = 4;
private static final int FLAKY_TEST_RETRIES_ENABLED_FLAG = 8;
private static final int IMPACTED_TESTS_DETECTION_ENABLED_FLAG = 16;
private static final int FAILED_TEST_REPLAY_ENABLED_FLAG = 32;

public static ByteBuffer serialize(ExecutionSettings settings) {
datadog.trace.civisibility.ipc.serialization.Serializer s =
Expand All @@ -291,7 +304,8 @@ public static ByteBuffer serialize(ExecutionSettings settings) {
| (settings.flakyTestRetriesEnabled ? FLAKY_TEST_RETRIES_ENABLED_FLAG : 0)
| (settings.impactedTestsDetectionEnabled
? IMPACTED_TESTS_DETECTION_ENABLED_FLAG
: 0));
: 0)
| (settings.failedTestReplayEnabled ? FAILED_TEST_REPLAY_ENABLED_FLAG : 0));
s.write(flags);

EarlyFlakeDetectionSettings.Serializer.serialize(s, settings.earlyFlakeDetectionSettings);
Expand Down Expand Up @@ -330,6 +344,7 @@ public static ExecutionSettings deserialize(ByteBuffer buffer) {
boolean testSkippingEnabled = (flags & TEST_SKIPPING_ENABLED_FLAG) != 0;
boolean flakyTestRetriesEnabled = (flags & FLAKY_TEST_RETRIES_ENABLED_FLAG) != 0;
boolean impactedTestsDetectionEnabled = (flags & IMPACTED_TESTS_DETECTION_ENABLED_FLAG) != 0;
boolean failedTestReplayEnabled = (flags & FAILED_TEST_REPLAY_ENABLED_FLAG) != 0;

EarlyFlakeDetectionSettings earlyFlakeDetectionSettings =
EarlyFlakeDetectionSettings.Serializer.deserialize(buffer);
Expand Down Expand Up @@ -372,6 +387,7 @@ public static ExecutionSettings deserialize(ByteBuffer buffer) {
testSkippingEnabled,
flakyTestRetriesEnabled,
impactedTestsDetectionEnabled,
failedTestReplayEnabled,
earlyFlakeDetectionSettings,
testManagementSettings,
itrCorrelationId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,11 @@ private Map<String, ExecutionSettings> doCreate(
settings,
CiVisibilitySettings::isKnownTestsEnabled,
Config::isCiVisibilityKnownTestsRequestEnabled);
boolean failedTestReplayEnabled =
isFeatureEnabled(
settings,
CiVisibilitySettings::isFailedTestReplayEnabled,
Config::isCiVisibilityFailedTestReplayEnabled);

TestManagementSettings testManagementSettings = getTestManagementSettings(settings);

Expand All @@ -189,7 +194,8 @@ private Map<String, ExecutionSettings> doCreate(
+ "Impacted tests detection - {},\n"
+ "Known tests marking - {},\n"
+ "Auto test retries - {},\n"
+ "Test Management - {}",
+ "Test Management - {},\n"
+ "Failed Test Replay - {}",
repositoryRoot,
tracerEnvironment.getConfigurations().getRuntimeName(),
tracerEnvironment.getConfigurations().getRuntimeVersion(),
Expand All @@ -201,7 +207,8 @@ private Map<String, ExecutionSettings> doCreate(
impactedTestsEnabled,
knownTestsRequest,
flakyTestRetriesEnabled,
testManagementSettings.isEnabled());
testManagementSettings.isEnabled(),
failedTestReplayEnabled);

Future<SkippableTests> skippableTestsFuture =
executor.submit(() -> getSkippableTests(tracerEnvironment, itrEnabled));
Expand Down Expand Up @@ -253,6 +260,7 @@ private Map<String, ExecutionSettings> doCreate(
testSkippingEnabled,
flakyTestRetriesEnabled,
impactedTestsEnabled,
failedTestReplayEnabled,
earlyFlakeDetectionEnabled
? settings.getEarlyFlakeDetectionSettings()
: EarlyFlakeDetectionSettings.DEFAULT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import datadog.trace.api.civisibility.telemetry.TagValue;
import datadog.trace.api.civisibility.telemetry.tag.BrowserDriver;
import datadog.trace.api.civisibility.telemetry.tag.EventType;
import datadog.trace.api.civisibility.telemetry.tag.FailedTestReplayEnabled;
import datadog.trace.api.civisibility.telemetry.tag.HasFailedAllRetries;
import datadog.trace.api.civisibility.telemetry.tag.IsAttemptToFix;
import datadog.trace.api.civisibility.telemetry.tag.IsDisabled;
Expand Down Expand Up @@ -305,6 +306,9 @@ public void end(@Nullable Long endTime) {
span.getTag(Tags.TEST_IS_RETRY) != null ? IsRetry.TRUE : null,
span.getTag(Tags.TEST_HAS_FAILED_ALL_RETRIES) != null ? HasFailedAllRetries.TRUE : null,
retryReason instanceof TagValue ? (TagValue) retryReason : null,
span.getTag(Tags.ERROR_DEBUG_INFO_CAPTURED) != null
? FailedTestReplayEnabled.TestMetric.TRUE
: null,
span.getTag(Tags.TEST_IS_RUM_ACTIVE) != null ? IsRum.TRUE : null,
CIConstants.SELENIUM_BROWSER_DRIVER.equals(span.getTag(Tags.TEST_BROWSER_DRIVER))
? BrowserDriver.SELENIUM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import datadog.trace.api.Config;
import datadog.trace.api.civisibility.DDTestSuite;
import datadog.trace.api.civisibility.InstrumentationTestBridge;
import datadog.trace.api.civisibility.config.LibraryCapability;
import datadog.trace.api.civisibility.coverage.CoverageStore;
import datadog.trace.api.civisibility.execution.TestStatus;
Expand Down Expand Up @@ -220,6 +221,8 @@ public void end(@Nullable Long endTime) {
AgentTracer.closeActive();
}

InstrumentationTestBridge.fireBeforeSuiteEnd();

onSpanFinish.accept(span);

if (endTime != null) {
Expand Down
Loading