TimefoldAI's main vs. triceo's index (Java 25 vs. 25) #384
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Both baseline and SUT (Software Under Test) are built from source first [1], | |
| # with their binaries uploaded as artifacts. | |
| # This is done on GitHub infrastructure, to achieve maximum parallelization. | |
| # | |
| # The benchmark job downloads the binaries and runs them. | |
| # The baseline is established first, then the SUT is measured. | |
| # They both run in the same job, | |
| # to guarantee they run on the same machine with the same performance characteristics. | |
| # This is done on a self-hosted runner which we completely control. | |
| # | |
| # Each benchmark gives a 99.9 % confidence interval. | |
| # The confidence intervals are compared to determine if the branch under test is a regression or an improvement. | |
| # The error threshold is expected to be below +/- 2.0 %. | |
| # | |
| # [1] Unless the baseline is a release tag, in which case its binaries are downloaded from a repository. | |
| # | |
| name: ScoreDirector Perf Regression Test | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| jdk_baseline: | |
| description: 'JDK version' | |
| default: '25' | |
| required: true | |
| baseline: | |
| description: 'Baseline branch or tag (branches need to use 999-SNAPSHOT)' | |
| default: 'v1.27.0' | |
| required: true | |
| jdk_branch: | |
| description: 'JDK version' | |
| default: '25' | |
| required: true | |
| branch: | |
| description: 'Branch to benchmark (needs to use 999-SNAPSHOT)' | |
| default: 'main' | |
| required: true | |
| branch_owner: | |
| description: 'User owning the branch' | |
| default: 'TimefoldAI' | |
| required: true | |
| run-name: "TimefoldAI's ${{ github.event.inputs.baseline }} vs. ${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }} (Java ${{ github.event.inputs.jdk_baseline }} vs. ${{ github.event.inputs.jdk_branch }})" | |
| jobs: | |
| decisions: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| baseline_solver_version: ${{ steps.step1.outputs.version }} | |
| needs_snapshot_built: ${{ steps.step1.outputs.needs_snapshot_built }} | |
| steps: | |
| - name: Determine the baseline | |
| id: step1 | |
| shell: bash | |
| run: | | |
| if [[ "${{ github.event.inputs.baseline }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then | |
| VERSION="${BASH_REMATCH[0]}" | |
| NEEDS_SNAPSHOT_BUILT=false | |
| echo "Baseline is a release tag." | |
| else | |
| # We're testing against a tag. | |
| VERSION="999-SNAPSHOT" | |
| NEEDS_SNAPSHOT_BUILT=true | |
| echo "Baseline is a random branch." | |
| fi | |
| echo "version=$VERSION" >> "$GITHUB_OUTPUT" | |
| echo "needs_snapshot_built=$NEEDS_SNAPSHOT_BUILT" >> "$GITHUB_OUTPUT" | |
| build_baseline: | |
| needs: decisions | |
| runs-on: ubuntu-latest # Leverage massive parallelization of Github-hosted runners. | |
| strategy: | |
| fail-fast: true # If one compilation fails, abort everything. | |
| matrix: | |
| # When updating this list, use find-and-replace in the entire file to keep all such lists identical. | |
| example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] | |
| env: | |
| MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' | |
| MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' | |
| steps: | |
| - name: Checkout timefold-solver-benchmarks | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: TimefoldAI/timefold-solver-benchmarks | |
| path: ./timefold-solver-benchmarks | |
| ref: main # Assume the ref is compatible with both baseline and SUT | |
| - name: Setup JDK and Maven | |
| uses: actions/setup-java@v5 | |
| with: | |
| java-version: 25 # Always build with the least recent supported JDK. | |
| distribution: 'temurin' | |
| cache: 'maven' | |
| server-id: 'timefold-solver-enterprise' | |
| server-username: 'MVN_USERNAME' | |
| server-password: 'MVN_PASSWORD' | |
| # Only build the snapshots if determined by the decisions job. | |
| - name: Checkout timefold-solver | |
| if: needs.decisions.outputs.needs_snapshot_built == 'true' | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: TimefoldAI/timefold-solver | |
| ref: ${{ github.event.inputs.baseline }} | |
| path: ./timefold-solver | |
| - name: Quickly build timefold-solver | |
| if: needs.decisions.outputs.needs_snapshot_built == 'true' | |
| working-directory: ./timefold-solver | |
| shell: bash | |
| run: ./mvnw -B -Dquickly clean install | |
| - name: Checkout timefold-solver-enterprise | |
| if: needs.decisions.outputs.needs_snapshot_built == 'true' | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: TimefoldAI/timefold-solver-enterprise | |
| ref: ${{ github.event.inputs.baseline }} | |
| token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} | |
| path: ./timefold-solver-enterprise | |
| - name: Quickly build timefold-solver-enterprise | |
| if: needs.decisions.outputs.needs_snapshot_built == 'true' | |
| working-directory: ./timefold-solver-enterprise | |
| shell: bash | |
| run: ./mvnw -B -Dquickly clean install | |
| - name: Switch to correct Benchmarks branch if it exists | |
| if: needs.decisions.outputs.needs_snapshot_built == 'true' | |
| working-directory: ./timefold-solver-benchmarks | |
| shell: bash | |
| run: | | |
| if git branch --list "${{ github.event.inputs.baseline }}" | grep -q .; then | |
| git checkout ${{ github.event.inputs.baseline }} | |
| fi | |
| git status | |
| - name: Compile the benchmark | |
| working-directory: ./timefold-solver-benchmarks | |
| shell: bash | |
| run: | | |
| ./mvnw clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ needs.decisions.outputs.baseline_solver_version }} | |
| mv target/benchmarks.jar ../benchmarks-baseline.jar | |
| - name: Upload the binaries | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: ${{ matrix.example }}-sut | |
| path: | | |
| ./benchmarks-baseline.jar | |
| if-no-files-found: error | |
| build_sut: | |
| runs-on: ubuntu-latest # Leverage massive parallelization of Github-hosted runners. | |
| strategy: | |
| fail-fast: true # If one compilation fails, abort everything. | |
| matrix: | |
| # When updating this list, use find-and-replace in the entire file to keep all such lists identical. | |
| example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] | |
| env: | |
| MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' | |
| MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' | |
| steps: | |
| - name: Checkout timefold-solver-benchmarks | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: TimefoldAI/timefold-solver-benchmarks | |
| path: ./timefold-solver-benchmarks | |
| ref: main # Assume the ref is compatible with both baseline and SUIT | |
| - name: Setup JDK and Maven | |
| uses: actions/setup-java@v5 | |
| with: | |
| java-version: 25 # Always build with the least recent supported JDK. | |
| distribution: 'temurin' | |
| cache: 'maven' | |
| server-id: 'timefold-solver-enterprise' | |
| server-username: 'MVN_USERNAME' | |
| server-password: 'MVN_PASSWORD' | |
| - name: Checkout timefold-solver | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: ${{ github.event.inputs.branch_owner }}/timefold-solver | |
| ref: ${{ github.event.inputs.branch }} | |
| path: ./timefold-solver | |
| - name: Quickly build timefold-solver | |
| working-directory: ./timefold-solver | |
| shell: bash | |
| run: ./mvnw -B -Dquickly clean install | |
| # Clone timefold-solver-enterprise | |
| - name: Checkout timefold-solver-enterprise | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: TimefoldAI/timefold-solver-enterprise | |
| ref: main | |
| token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} | |
| path: ./timefold-solver-enterprise | |
| - name: Switch to correct Enterprise branch if it exists | |
| working-directory: ./timefold-solver-enterprise | |
| shell: bash | |
| run: | | |
| if git branch --list "${{ github.event.inputs.branch }}" | grep -q .; then | |
| git checkout ${{ github.event.inputs.branch }} | |
| fi | |
| git status | |
| - name: Quickly build timefold-solver-enterprise | |
| working-directory: ./timefold-solver-enterprise | |
| shell: bash | |
| run: ./mvnw -B -Dquickly clean install | |
| # Sometimes changes may be incompatible with the tag. | |
| # If the branch doesn't exist, we assume that the changes are compatible and move on. | |
| - name: Switch to correct Benchmarks branch if it exists | |
| working-directory: ./timefold-solver-benchmarks | |
| shell: bash | |
| run: | | |
| if git branch --list "${{ github.event.inputs.branch }}" | grep -q .; then | |
| git checkout ${{ github.event.inputs.branch }} | |
| fi | |
| git status | |
| - name: Compile the benchmarks | |
| working-directory: ./timefold-solver-benchmarks | |
| shell: bash | |
| run: | | |
| ./mvnw clean install -B -Dquickly | |
| mv target/benchmarks.jar ../benchmarks-sut.jar | |
| - name: Upload the binaries | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: ${{ matrix.example }}-baseline | |
| path: | | |
| ./benchmarks-sut.jar | |
| if-no-files-found: error | |
| benchmark: | |
| needs: [ build_baseline, build_sut ] | |
| runs-on: self-hosted # We need a stable machine to actually run the benchmarks. | |
| strategy: | |
| fail-fast: false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue. | |
| matrix: | |
| # When updating this list, use find-and-replace in the entire file to keep all such lists identical. | |
| example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] | |
| env: | |
| MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' | |
| MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' | |
| steps: | |
| - name: Clean results of previous runs | |
| shell: bash | |
| run: | | |
| rm -rf timefold-solver-benchmarks | |
| # GitHub expressions can't do this, so we need to hack this in shell. | |
| echo "SANITIZED_BASELINE=$(echo "${{ github.event.inputs.baseline }}" | sed 's/\//\-/g')" >> $GITHUB_ENV | |
| echo "SANITIZED_BRANCH=$(echo "${{ github.event.inputs.branch }}" | sed 's/\//\-/g')" >> $GITHUB_ENV | |
| - name: Checkout timefold-solver-benchmarks | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: TimefoldAI/timefold-solver-benchmarks | |
| path: ./timefold-solver-benchmarks | |
| - name: Setup JDK and Maven | |
| uses: actions/setup-java@v5 | |
| with: | |
| java-version: ${{ github.event.inputs.jdk_baseline }} | |
| distribution: 'temurin' | |
| check-latest: true | |
| - name: Download the benchmark binaries | |
| uses: actions/download-artifact@v6 | |
| with: | |
| name: ${{ matrix.example }}-baseline | |
| path: ./timefold-solver-benchmarks | |
| - name: Download the benchmark binaries | |
| uses: actions/download-artifact@v6 | |
| with: | |
| name: ${{ matrix.example }}-sut | |
| path: ./timefold-solver-benchmarks | |
| # Fine-tuned for stability on GHA. | |
| - name: Configure the benchmark | |
| working-directory: ./timefold-solver-benchmarks | |
| shell: bash | |
| run: | | |
| echo "forks=2" > scoredirector-benchmark.properties | |
| echo "warmup_iterations=1" >> scoredirector-benchmark.properties | |
| echo "measurement_iterations=1" >> scoredirector-benchmark.properties | |
| echo "relative_score_error_threshold=0.02" >> scoredirector-benchmark.properties | |
| echo "score_director_type=cs" >> scoredirector-benchmark.properties | |
| echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties | |
| cat scoredirector-benchmark.properties | |
| chmod +x run-scoredirector.sh | |
| - name: Run the benchmark | |
| working-directory: ./timefold-solver-benchmarks | |
| id: benchmark_baseline | |
| env: | |
| RUN_ID: ${{ env.SANITIZED_BASELINE }} | |
| shell: bash | |
| run: | | |
| mkdir target | |
| cp benchmarks-baseline.jar target/benchmarks.jar | |
| ./run-scoredirector.sh | |
| echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ env.RUN_ID }}/results.json)" >> "$GITHUB_OUTPUT" | |
| echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ env.RUN_ID }}/results.json)" >> "$GITHUB_OUTPUT" | |
| echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ env.RUN_ID }}/results.json)" >> "$GITHUB_OUTPUT" | |
| - name: Setup JDK and Maven | |
| uses: actions/setup-java@v5 | |
| with: | |
| java-version: ${{ github.event.inputs.jdk_branch }} | |
| distribution: 'temurin' | |
| check-latest: true | |
| - name: Run the benchmark | |
| id: benchmark_sut | |
| working-directory: ./timefold-solver-benchmarks | |
| env: | |
| RUN_ID: ${{ env.SANITIZED_BRANCH }} | |
| shell: bash | |
| run: | | |
| rm target/benchmarks.jar | |
| cp benchmarks-sut.jar target/benchmarks.jar | |
| ./run-scoredirector.sh | |
| echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ env.RUN_ID }}/results.json)" >> "$GITHUB_OUTPUT" | |
| echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ env.RUN_ID }}/results.json)" >> "$GITHUB_OUTPUT" | |
| echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ env.RUN_ID }}/results.json)" >> "$GITHUB_OUTPUT" | |
| - name: Clean up the data | |
| working-directory: ./timefold-solver-benchmarks | |
| shell: bash | |
| run: | | |
| mv results/scoredirector/${{ env.SANITIZED_BASELINE }} . | |
| mv ${{ env.SANITIZED_BASELINE }}/combined.jfr ${{ env.SANITIZED_BASELINE }}/${{ matrix.example }}-${{ env.SANITIZED_BASELINE }}-combined.jfr | |
| mv results/scoredirector/${{ env.SANITIZED_BRANCH }} . | |
| mv ${{ env.SANITIZED_BRANCH }}/combined.jfr ${{ env.SANITIZED_BRANCH }}/${{ matrix.example }}-${{ env.SANITIZED_BRANCH }}-combined.jfr | |
| - name: Archive benchmark data | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: assets-${{ matrix.example }}-${{ env.SANITIZED_BASELINE }}_vs_${{ env.SANITIZED_BRANCH }} | |
| path: | | |
| ./timefold-solver-benchmarks/scoredirector-benchmark.properties | |
| ./timefold-solver-benchmarks/${{ env.SANITIZED_BASELINE }}/*combined.jfr | |
| ./timefold-solver-benchmarks/${{ env.SANITIZED_BASELINE }}/*.html | |
| ./timefold-solver-benchmarks/${{ env.SANITIZED_BASELINE }}/*.json | |
| ./timefold-solver-benchmarks/${{ env.SANITIZED_BRANCH }}/*combined.jfr | |
| ./timefold-solver-benchmarks/${{ env.SANITIZED_BRANCH }}/*.html | |
| ./timefold-solver-benchmarks/${{ env.SANITIZED_BRANCH }}/*.json | |
| - name: Report results | |
| working-directory: ./timefold-solver-benchmarks | |
| env: | |
| BASELINE_RANGE_START: ${{ steps.benchmark_baseline.outputs.RANGE_START }} | |
| BASELINE_RANGE_MID: ${{ steps.benchmark_baseline.outputs.RANGE_MID }} | |
| BASELINE_RANGE_END: ${{ steps.benchmark_baseline.outputs.RANGE_END }} | |
| SUT_RANGE_START: ${{ steps.benchmark_sut.outputs.RANGE_START }} | |
| SUT_RANGE_MID: ${{ steps.benchmark_sut.outputs.RANGE_MID }} | |
| SUT_RANGE_END: ${{ steps.benchmark_sut.outputs.RANGE_END }} | |
| shell: bash | |
| run: | | |
| BASELINE_DEV=$(echo "scale=2; ($BASELINE_RANGE_MID / $BASELINE_RANGE_START) * 100 - 100" | bc) | |
| SUT_DEV=$(echo "scale=2; ($SUT_RANGE_MID / $SUT_RANGE_START) * 100 - 100" | bc) | |
| DIFF_MID=$(echo "scale=2; ($BASELINE_RANGE_MID / $SUT_RANGE_MID) * 100" | bc) | |
| FAIL=false | |
| if (( $(echo "$DIFF_MID >= 97.00" | bc -l) && $(echo "$DIFF_MID <= 103.00"|bc -l) )); then | |
| # Ignore differences of up to 3 %; we can't expect that level of precision anyway. | |
| echo "### ✅ Within tolerance" >> $GITHUB_STEP_SUMMARY | |
| elif [ "$SUT_RANGE_START" -gt "$BASELINE_RANGE_END" ]; then | |
| echo "### 🚀 Statistically significant improvement" >> $GITHUB_STEP_SUMMARY | |
| elif [ "$BASELINE_RANGE_START" -gt "$SUT_RANGE_END" ]; then | |
| echo "### ‼️ Statistically significant regression ‼️" >> $GITHUB_STEP_SUMMARY | |
| FAIL=true | |
| else | |
| echo "### 🌟 All good" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| if [[ "${{ github.event.inputs.baseline }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then | |
| BASELINE_URL="https://github.com/TimefoldAI/timefold-solver/releases/tag/${{ github.event.inputs.baseline }}" | |
| else | |
| BASELINE_URL="https://github.com/TimefoldAI/timefold-solver/tree/${{ github.event.inputs.branch }}" | |
| fi | |
| SUT_URL="https://github.com/${{ github.event.inputs.branch_owner }}/timefold-solver/tree/${{ github.event.inputs.branch }}" | |
| echo "| | **Ref** | **Mean** |" >> $GITHUB_STEP_SUMMARY | |
| echo "|:------:|:-----------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY | |
| echo "| _Old_ | [TimefoldAI's ${{ github.event.inputs.baseline }}]($BASELINE_URL) | $BASELINE_RANGE_MID ± $BASELINE_DEV % |" >> $GITHUB_STEP_SUMMARY | |
| echo "| _New_ | [${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }}]($SUT_URL) | $SUT_RANGE_MID ± $SUT_DEV % |" >> $GITHUB_STEP_SUMMARY | |
| echo "| _Diff_ | | $DIFF_MID % |" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "Mean is in operations per second. Higher is better." >> $GITHUB_STEP_SUMMARY | |
| echo "Mean ± X % describes a 99.9 % confidence interval." >> $GITHUB_STEP_SUMMARY | |
| echo "Diff under 100 % represents an improvement, over 100 % a regression." >> $GITHUB_STEP_SUMMARY | |
| if [ "$FAIL" = true ]; then | |
| exit 1 | |
| fi |