diff --git a/.gitlab/collect_reports.sh b/.gitlab/collect_reports.sh index 83c3d99d38a..1b30beb6a6e 100755 --- a/.gitlab/collect_reports.sh +++ b/.gitlab/collect_reports.sh @@ -62,6 +62,7 @@ function process_reports () { cp -r workspace/$project_to_save/build/reports/* $report_path/ 2>/dev/null || true cp workspace/$project_to_save/build/hs_err_pid*.log $report_path/ 2>/dev/null || true cp workspace/$project_to_save/build/javacore*.txt $report_path/ 2>/dev/null || true + cp workspace/$project_to_save/build/*.* $report_path/ 2>/dev/null || true fi } diff --git a/dd-java-agent/instrumentation/java-concurrent/java-concurrent-21/build.gradle b/dd-java-agent/instrumentation/java-concurrent/java-concurrent-21/build.gradle index 361477f06ff..a6a8869a4d9 100644 --- a/dd-java-agent/instrumentation/java-concurrent/java-concurrent-21/build.gradle +++ b/dd-java-agent/instrumentation/java-concurrent/java-concurrent-21/build.gradle @@ -1,3 +1,6 @@ +import java.util.concurrent.Executors +import java.util.concurrent.TimeUnit + ext { minJavaVersionForTests = JavaVersion.VERSION_21 // Structured concurrency is a preview feature in Java 21. Methods (e.g. ShutdownOnFailure) used in this instrumentation test are no longer available in Java 25, so we set the max version to 24. @@ -56,3 +59,75 @@ compileJava.configure { sourceCompatibility = JavaVersion.VERSION_1_8 targetCompatibility = JavaVersion.VERSION_1_8 } + +tasks.withType(Test).configureEach { + doFirst { + String fullPath = layout.buildDirectory.asFile.get().absolutePath.replace("dd-trace-java/dd-java-agent", + "dd-trace-java/workspace/dd-java-agent") + println "DEBUG full path: ${fullPath}" + + def outDir = new File(fullPath) + outDir.mkdirs() + def outFile = new File(outDir, "${System.currentTimeMillis()}-thread-dump.log") + + // single-thread scheduler (daemon) + def scheduler = Executors.newSingleThreadScheduledExecutor({ r -> + Thread t = new Thread(r, "dump-scheduler") + t.setDaemon(true) + return t + }) + + // schedule the dump job 10s later + def future = scheduler.schedule({ + try { + println "DEBUG threads dump: ${outFile.absolutePath}" + + new ProcessBuilder("jcmd", "0", "Thread.print", "-l") + .redirectErrorStream(true) + .redirectOutput(outFile) + .start().waitFor() + + println "DEBUG heap dumps" + def listOutput = "jcmd -l".execute().text.readLines() + listOutput.each { line -> + println "DEBUG PID: ${line}" + + if (!line.contains("gradle")) { + return + } + + def pid = line.substring(0, line.indexOf(' ')) + + def dumpFileName = "${fullPath}/${System.currentTimeMillis()}-${pid}.hprof" + + println "DEBUG dump file name: ${dumpFileName}" + + def cmd = "jcmd ${pid} GC.heap_dump ${dumpFileName}" + + println "DEBUG jcmd: ${cmd}" + + def res = cmd.execute().waitFor() + + println "DEBUG dump res: ${res}" + } + } catch (Throwable t) { + logger.warn("Dumping failed: ${t.message}", t) + } finally { + scheduler.shutdown() + } + }, 62, TimeUnit.SECONDS) + + // store handles for cancellation in doLast + ext.dumpFuture = future + ext.dumpScheduler = scheduler + } + + doLast { + // cancel if task completed in <10s + try { + ext.dumpFuture?.cancel(false) + } finally { + ext.dumpScheduler?.shutdownNow() + } + } +} diff --git a/dd-java-agent/instrumentation/java-concurrent/java-concurrent-21/src/previewTest/groovy/StructuredConcurrencyTest.groovy b/dd-java-agent/instrumentation/java-concurrent/java-concurrent-21/src/previewTest/groovy/StructuredConcurrencyTest.groovy index 24c157d1e80..dfa689aa261 100644 --- a/dd-java-agent/instrumentation/java-concurrent/java-concurrent-21/src/previewTest/groovy/StructuredConcurrencyTest.groovy +++ b/dd-java-agent/instrumentation/java-concurrent/java-concurrent-21/src/previewTest/groovy/StructuredConcurrencyTest.groovy @@ -1,6 +1,11 @@ +import com.sun.management.HotSpotDiagnosticMXBean import datadog.trace.agent.test.AgentTestRunner import datadog.trace.api.Trace +import javax.management.MBeanServer +import java.lang.management.ManagementFactory +import java.lang.management.ThreadInfo +import java.lang.management.ThreadMXBean import java.util.concurrent.Callable import java.util.concurrent.StructuredTaskScope @@ -9,10 +14,18 @@ import static datadog.trace.agent.test.utils.TraceUtils.runnableUnderTrace import static java.time.Instant.now class StructuredConcurrencyTest extends AgentTestRunner { - @Override - boolean useStrictTraceWrites() { - // TODO: Monitor in CI to validate fix effectiveness against freezes. - return false + ThreadDumpLogger threadDumpLogger + + def setup() { + // Use the current feature name as the test name + String testName = "${specificationContext?.currentSpec?.name ?: "unknown-spec"} : ${specificationContext?.currentFeature?.name ?: "unknown-test"}" + + threadDumpLogger = new ThreadDumpLogger(testName) + threadDumpLogger.start() + } + + def cleanup() { + threadDumpLogger.stop() } /** @@ -170,4 +183,72 @@ class StructuredConcurrencyTest extends AgentTestRunner { } } } + + // 🔒 Private helper class for thread dump logging + private static class ThreadDumpLogger { + private final String testName + private Thread task + + ThreadDumpLogger(String testName) { + this.testName = testName + } + + void start() { + task = new Thread() { + @Override + void run() { + sleep(20000) + + File outputDir = new File("build") + String fullPath = outputDir.absolutePath.replace("dd-trace-java/dd-java-agent", + "dd-trace-java/workspace/dd-java-agent") + + outputDir = new File(fullPath) + if (!outputDir.exists()) { + println("Folder not found: " + fullPath) + outputDir.mkdirs() + } else println("Folder found: " + fullPath) + + // Use the current feature name as the test name + println("Test name: " + testName) + + heapDump(outputDir, "test_1") + + def reportFile = new File(outputDir, "${System.currentTimeMillis()}-thread-dump.log") + + try (def writer = new FileWriter(reportFile)) { + writer.write("=== Test: ${testName} ===\n") + writer.write("=== Thread Dump Triggered at ${new Date()} ===\n") + writer.write(threadDump(false, false)) + writer.write("==============================================\n") + } + + heapDump(outputDir, "test_2") + } + } + task.start() + } + + static void heapDump(File outputDir, String kind) { + def heapDumpFile = new File(outputDir, "${System.currentTimeMillis()}-heap-dump-${kind}.hprof").absolutePath + MBeanServer server = ManagementFactory.getPlatformMBeanServer() + HotSpotDiagnosticMXBean mxBean = ManagementFactory.newPlatformMXBeanProxy( + server, "com.sun.management:type=HotSpotDiagnostic", HotSpotDiagnosticMXBean.class) + mxBean.dumpHeap(heapDumpFile, true) + } + + private static String threadDump(boolean lockedMonitors, boolean lockedSynchronizers) { + StringBuffer threadDump = new StringBuffer(System.lineSeparator()) + ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean() + for(ThreadInfo threadInfo : threadMXBean.dumpAllThreads(lockedMonitors, lockedSynchronizers)) { + threadDump.append(threadInfo.toString()) + } + + return threadDump.toString() + } + + void stop() { + task?.interrupt() + } + } } diff --git a/dd-java-agent/instrumentation/lettuce-4/build.gradle b/dd-java-agent/instrumentation/lettuce-4/build.gradle index 3eb173a4343..309b2a2c687 100644 --- a/dd-java-agent/instrumentation/lettuce-4/build.gradle +++ b/dd-java-agent/instrumentation/lettuce-4/build.gradle @@ -1,3 +1,5 @@ +import java.util.concurrent.Executors +import java.util.concurrent.TimeUnit muzzle { pass { @@ -20,3 +22,75 @@ dependencies { latestDepTestImplementation group: 'biz.paluch.redis', name: 'lettuce', version: '4.+' } + +tasks.withType(Test).configureEach { + doFirst { + String fullPath = layout.buildDirectory.asFile.get().absolutePath.replace("dd-trace-java/dd-java-agent", + "dd-trace-java/workspace/dd-java-agent") + println "DEBUG full path: ${fullPath}" + + def outDir = new File(fullPath) + outDir.mkdirs() + def outFile = new File(outDir, "${System.currentTimeMillis()}-thread-dump.log") + + // single-thread scheduler (daemon) + def scheduler = Executors.newSingleThreadScheduledExecutor({ r -> + Thread t = new Thread(r, "dump-scheduler") + t.setDaemon(true) + return t + }) + + // schedule the dump job 10s later + def future = scheduler.schedule({ + try { + println "DEBUG threads dump: ${outFile.absolutePath}" + + new ProcessBuilder("jcmd", "0", "Thread.print", "-l") + .redirectErrorStream(true) + .redirectOutput(outFile) + .start().waitFor() + + println "DEBUG heap dumps" + def listOutput = "jcmd -l".execute().text.readLines() + listOutput.each { line -> + println "DEBUG PID: ${line}" + + if (!line.contains("gradle")) { + return + } + + def pid = line.substring(0, line.indexOf(' ')) + + def dumpFileName = "${fullPath}/${System.currentTimeMillis()}-${pid}.hprof" + + println "DEBUG dump file name: ${dumpFileName}" + + def cmd = "jcmd ${pid} GC.heap_dump ${dumpFileName}" + + println "DEBUG jcmd: ${cmd}" + + def res = cmd.execute().waitFor() + + println "DEBUG dump res: ${res}" + } + } catch (Throwable t) { + logger.warn("Dumping failed: ${t.message}", t) + } finally { + scheduler.shutdown() + } + }, 62, TimeUnit.SECONDS) + + // store handles for cancellation in doLast + ext.dumpFuture = future + ext.dumpScheduler = scheduler + } + + doLast { + // cancel if task completed in <10s + try { + ext.dumpFuture?.cancel(false) + } finally { + ext.dumpScheduler?.shutdownNow() + } + } +} diff --git a/dd-java-agent/instrumentation/lettuce-4/src/test/groovy/Lettuce4ClientTestBase.groovy b/dd-java-agent/instrumentation/lettuce-4/src/test/groovy/Lettuce4ClientTestBase.groovy index 3ef7f319ca3..b79c525a204 100644 --- a/dd-java-agent/instrumentation/lettuce-4/src/test/groovy/Lettuce4ClientTestBase.groovy +++ b/dd-java-agent/instrumentation/lettuce-4/src/test/groovy/Lettuce4ClientTestBase.groovy @@ -3,11 +3,17 @@ import com.lambdaworks.redis.RedisClient import com.lambdaworks.redis.api.StatefulConnection import com.lambdaworks.redis.api.async.RedisAsyncCommands import com.lambdaworks.redis.api.sync.RedisCommands +import com.sun.management.HotSpotDiagnosticMXBean import datadog.trace.agent.test.naming.VersionedNamingTestBase import datadog.trace.agent.test.utils.PortUtils import redis.embedded.RedisServer import spock.lang.Shared +import javax.management.MBeanServer +import java.lang.management.ManagementFactory +import java.lang.management.ThreadInfo +import java.lang.management.ThreadMXBean + import static datadog.trace.agent.test.utils.TraceUtils.runUnderTrace abstract class Lettuce4ClientTestBase extends VersionedNamingTestBase { @@ -32,6 +38,8 @@ abstract class Lettuce4ClientTestBase extends VersionedNamingTestBase { @Shared RedisServer redisServer + ThreadDumpLogger threadDumpLogger + @Shared Map testHashMap = [ firstname: "John", @@ -61,6 +69,11 @@ abstract class Lettuce4ClientTestBase extends VersionedNamingTestBase { } def setup() { + // Use the current feature name as the test name + String testName = "${specificationContext?.currentSpec?.name ?: "unknown-spec"} : ${specificationContext?.currentFeature?.name ?: "unknown-test"}" + threadDumpLogger = new ThreadDumpLogger(testName) + threadDumpLogger.start() + redisServer.start() redisClient = RedisClient.create(embeddedDbUri) @@ -79,14 +92,78 @@ abstract class Lettuce4ClientTestBase extends VersionedNamingTestBase { } def cleanup() { + threadDumpLogger.stop() + connection.close() redisClient.shutdown() redisServer.stop() } - @Override - boolean useStrictTraceWrites() { - // TODO: Monitor in CI to validate fix effectiveness against freezes. - return false + // 🔒 Private helper class for thread dump logging + private static class ThreadDumpLogger { + private final String testName + private Thread task + + ThreadDumpLogger(String testName) { + this.testName = testName + } + + void start() { + task = new Thread() { + @Override + void run() { + sleep(12000) + + File outputDir = new File("build") + String fullPath = outputDir.absolutePath.replace("dd-trace-java/dd-java-agent", + "dd-trace-java/workspace/dd-java-agent") + + outputDir = new File(fullPath) + if (!outputDir.exists()) { + println("Folder not found: " + fullPath) + outputDir.mkdirs() + } else println("Folder found: " + fullPath) + + // Use the current feature name as the test name + println("Test name: " + testName) + + heapDump(outputDir, "test_1") + + def reportFile = new File(outputDir, "${System.currentTimeMillis()}-thread-dump.log") + + try (def writer = new FileWriter(reportFile)) { + writer.write("=== Test: ${testName} ===\n") + writer.write("=== Thread Dump Triggered at ${new Date()} ===\n") + writer.write(threadDump(false, false)) + writer.write("==============================================\n") + } + + heapDump(outputDir, "test_2") + } + } + task.start() + } + + static void heapDump(File outputDir, String kind) { + def heapDumpFile = new File(outputDir, "${System.currentTimeMillis()}-heap-dump-${kind}.hprof").absolutePath + MBeanServer server = ManagementFactory.getPlatformMBeanServer() + HotSpotDiagnosticMXBean mxBean = ManagementFactory.newPlatformMXBeanProxy( + server, "com.sun.management:type=HotSpotDiagnostic", HotSpotDiagnosticMXBean.class) + mxBean.dumpHeap(heapDumpFile, true) + } + + private static String threadDump(boolean lockedMonitors, boolean lockedSynchronizers) { + StringBuffer threadDump = new StringBuffer(System.lineSeparator()) + ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean() + for(ThreadInfo threadInfo : threadMXBean.dumpAllThreads(lockedMonitors, lockedSynchronizers)) { + threadDump.append(threadInfo.toString()) + } + + return threadDump.toString() + } + + void stop() { + task?.interrupt() + } } }