From ade1ab45c06648da3bf073feeffb669b1a17a52f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 14 Feb 2025 01:20:15 +0000 Subject: [PATCH 01/15] Start building Clang runtimes runtimes on-demand This is the first step to having Clang's runtime libraries fully available for the Carbon toolchain. This PR focuses on the lowest level runtimes, the CRT files and the builtins library. The goal is to intercept Clang runs where it needs these target-dependent pieces to be available, and build them on demand using our Clang-running infrastructure. This avoids most of the subprocess overhead, but there is still some due to missing features in Clang. This requires exporting the sources for these runtimes from the Bazel build, and installing them in our target-independent resource directory. We then build a simplified "build" of these sources within the `ClangRunner` itself to produce the specific artifacts and layout expected by Clang. It also required fixing our use of Clang on macOS to have a default system root in order to successfully compile or link. It also required cleaning up how the `ClangRunner` used target information more generally -- instead of taking the target as a constructor parameter, it manages its target internally and relies on the Clang target-specifying command line flags. Note that I looked at whether we could split this into another layer separate from the `ClangRunner`, but that proved frustratingly difficult to manage. While we support building these on-demand as part of a detected link, that doesn't seem feasible as we don't have the necessary separation between compilation runs of Clang and link runs of Clang. However, I have tried to factor the internals to provide as clear of separation as I could across these. Currently, the only part of the commandline that is detected and forwarded to the runtimes build is the target. Eventually, the plan is to expand this so that we can build a maximally tailored set of runtimes for a given compilation. The other big TODO here is to actually implement caching storage of these runtimes so they aren't built on every execution. Right now, this uses a somewhat hack-y build of a temporary directory, but this isn't expected to be suitable long-term. Building these runtimes on *every* link makes those commands take approximately 15 seconds with an ASan build like our default development build, and just over 2 seconds in an optimized build. Hopefully this is OK in the brief interim until I can put caching in place. --- .codespell_ignore | 2 + MODULE.bazel | 1 + ...t_for_custom_rules_to_build_builtins.patch | 26 ++ scripts/fix_cc_deps.py | 3 +- toolchain/base/BUILD | 5 + toolchain/base/runtime_sources.bzl | 435 ++++++++++++++++++ toolchain/driver/BUILD | 5 + toolchain/driver/clang_runner.cpp | 344 +++++++++++++- toolchain/driver/clang_runner.h | 41 +- toolchain/driver/clang_runner_test.cpp | 222 ++++++--- toolchain/driver/clang_subcommand.cpp | 3 +- toolchain/driver/link_subcommand.cpp | 9 +- toolchain/driver/lld_runner_test.cpp | 2 +- toolchain/install/BUILD | 9 + toolchain/install/busybox_info.h | 13 +- toolchain/install/busybox_info_test.cpp | 5 +- toolchain/install/install_paths.cpp | 19 + toolchain/install/install_paths.h | 6 + 18 files changed, 1066 insertions(+), 84 deletions(-) create mode 100644 bazel/llvm_project/0004_Add_support_for_custom_rules_to_build_builtins.patch create mode 100644 toolchain/base/runtime_sources.bzl diff --git a/.codespell_ignore b/.codespell_ignore index 04c4a9d37a093..06387ec012c8f 100644 --- a/.codespell_ignore +++ b/.codespell_ignore @@ -2,6 +2,7 @@ # Exceptions. See /LICENSE for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +ArchType atleast circularly compiletime @@ -14,6 +15,7 @@ forin groupt indext inout +isELF parameteras pullrequest rightt diff --git a/MODULE.bazel b/MODULE.bazel index 35a11f23ce791..220b021ae409d 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -126,6 +126,7 @@ http_archive( "@carbon//bazel/llvm_project:0001_Patch_for_mallinfo2_when_using_Bazel_build_system.patch", "@carbon//bazel/llvm_project:0002_Added_Bazel_build_for_compiler_rt_fuzzer.patch", "@carbon//bazel/llvm_project:0003_Comment_out_unloaded_proto_library_dependencies.patch", + "@carbon//bazel/llvm_project:0004_Add_support_for_custom_rules_to_build_builtins.patch", ], sha256 = "8466760c8d69c5d3a1d2561813f47fa9a6962076adfb2b3f7aa0a69417b36c52", strip_prefix = "llvm-project-{0}".format(llvm_project_version), diff --git a/bazel/llvm_project/0004_Add_support_for_custom_rules_to_build_builtins.patch b/bazel/llvm_project/0004_Add_support_for_custom_rules_to_build_builtins.patch new file mode 100644 index 0000000000000..21c686c980b49 --- /dev/null +++ b/bazel/llvm_project/0004_Add_support_for_custom_rules_to_build_builtins.patch @@ -0,0 +1,26 @@ +From 0a46fc798170e76a0110bf118ae53db7a8c10b27 Mon Sep 17 00:00:00 2001 +From: Chandler Carruth +Date: Fri, 14 Feb 2025 01:08:54 +0000 +Subject: [PATCH 4/4] Add support for custom rules to build builtins + +This exposes the compiler-rt files for any custom rules that we need to +write to build and install them for the Carbon toolchain. +--- + utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +index 00f2a3b9d7c0..a53a34cdc952 100644 +--- a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel ++++ b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +@@ -127,3 +127,7 @@ cc_library( + ], + includes = ["lib/fuzzer"], + ) ++ ++# Allow building custom rules for runtimes. ++exports_files(glob(["**"])) ++ +-- +2.48.1.601.g30ceb7b040-goog + diff --git a/scripts/fix_cc_deps.py b/scripts/fix_cc_deps.py index d8a75fd50bd4b..001f757892a5c 100755 --- a/scripts/fix_cc_deps.py +++ b/scripts/fix_cc_deps.py @@ -70,7 +70,8 @@ class RuleChoice(NamedTuple): IGNORE_SOURCE_FILE_REGEX = re.compile( r"^(third_party/clangd.*|common/version.*\.cpp" r"|.*_autogen_manifest\.cpp" - r"|toolchain/base/llvm_tools.def)$" + r"|toolchain/base/llvm_tools.def" + r"|toolchain/base/runtime_sources.h)$" ) diff --git a/toolchain/base/BUILD b/toolchain/base/BUILD index 5255190b16b98..d7215f46f855a 100644 --- a/toolchain/base/BUILD +++ b/toolchain/base/BUILD @@ -4,6 +4,7 @@ load("//bazel/cc_rules:defs.bzl", "cc_library", "cc_test") load("llvm_tools.bzl", "LLVM_MAIN_TOOLS", "generate_llvm_tools_def") +load("runtime_sources.bzl", "generate_runtime_sources_cc_library") package(default_visibility = ["//visibility:public"]) @@ -173,6 +174,10 @@ cc_library( ] + [info.lib for info in LLVM_MAIN_TOOLS.values()], ) +generate_runtime_sources_cc_library( + name = "runtime_sources", +) + cc_library( name = "shared_value_stores", hdrs = ["shared_value_stores.h"], diff --git a/toolchain/base/runtime_sources.bzl b/toolchain/base/runtime_sources.bzl new file mode 100644 index 0000000000000..a9f41f8bfcc94 --- /dev/null +++ b/toolchain/base/runtime_sources.bzl @@ -0,0 +1,435 @@ +# Part of the Carbon Language project, under the Apache License v2.0 with LLVM +# Exceptions. See /LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""Provides variables and rules to automate working with LLVM's CLI tools.""" + +load("@rules_cc//cc:cc_library.bzl", "cc_library") + +BUILTINS_CRTBEGIN = "crtbegin.c" +BUILTINS_CRTEND = "crtend.c" + +BUILTINS_BASE_SRCS = [ + # Internal headers. + "assembly.h", + "cpu_model/cpu_model.h", + "fp_extend.h", + "fp_lib.h", + "fp_mode.h", + "fp_trunc.h", + "int_endianness.h", + "int_lib.h", + "int_math.h", + "int_to_fp.h", + "int_types.h", + "int_util.h", + "unwind-ehabi-helpers.h", + + # Internal textually included files. + "fp_add_impl.inc", + "fp_compare_impl.inc", + "fp_div_impl.inc", + "fp_extend_impl.inc", + "fp_fixint_impl.inc", + "fp_fixuint_impl.inc", + "fp_mul_impl.inc", + "fp_trunc_impl.inc", + "int_div_impl.inc", + "int_mulo_impl.inc", + "int_mulv_impl.inc", + "int_to_fp_impl.inc", + + # Source files. + "absvdi2.c", + "absvsi2.c", + "absvti2.c", + "adddf3.c", + "addsf3.c", + "addvdi3.c", + "addvsi3.c", + "addvti3.c", + "apple_versioning.c", + "ashldi3.c", + "ashlti3.c", + "ashrdi3.c", + "ashrti3.c", + "bswapdi2.c", + "bswapsi2.c", + "clzdi2.c", + "clzsi2.c", + "clzti2.c", + "cmpdi2.c", + "cmpti2.c", + "comparedf2.c", + "comparesf2.c", + "ctzdi2.c", + "ctzsi2.c", + "ctzti2.c", + "divdc3.c", + "divdf3.c", + "divdi3.c", + "divmoddi4.c", + "divmodsi4.c", + "divmodti4.c", + "divsc3.c", + "divsf3.c", + "divsi3.c", + "divti3.c", + "extendsfdf2.c", + "extendhfsf2.c", + "ffsdi2.c", + "ffssi2.c", + "ffsti2.c", + "fixdfdi.c", + "fixdfsi.c", + "fixdfti.c", + "fixsfdi.c", + "fixsfsi.c", + "fixsfti.c", + "fixunsdfdi.c", + "fixunsdfsi.c", + "fixunsdfti.c", + "fixunssfdi.c", + "fixunssfsi.c", + "fixunssfti.c", + "floatdidf.c", + "floatdisf.c", + "floatsidf.c", + "floatsisf.c", + "floattidf.c", + "floattisf.c", + "floatundidf.c", + "floatundisf.c", + "floatunsidf.c", + "floatunsisf.c", + "floatuntidf.c", + "floatuntisf.c", + "fp_mode.c", + "int_util.c", + "lshrdi3.c", + "lshrti3.c", + "moddi3.c", + "modsi3.c", + "modti3.c", + "muldc3.c", + "muldf3.c", + "muldi3.c", + "mulodi4.c", + "mulosi4.c", + "muloti4.c", + "mulsc3.c", + "mulsf3.c", + "multi3.c", + "mulvdi3.c", + "mulvsi3.c", + "mulvti3.c", + "negdf2.c", + "negdi2.c", + "negsf2.c", + "negti2.c", + "negvdi2.c", + "negvsi2.c", + "negvti2.c", + "os_version_check.c", + "paritydi2.c", + "paritysi2.c", + "parityti2.c", + "popcountdi2.c", + "popcountsi2.c", + "popcountti2.c", + "powidf2.c", + "powisf2.c", + "subdf3.c", + "subsf3.c", + "subvdi3.c", + "subvsi3.c", + "subvti3.c", + "trampoline_setup.c", + "truncdfhf2.c", + "truncdfsf2.c", + "truncsfhf2.c", + "ucmpdi2.c", + "ucmpti2.c", + "udivdi3.c", + "udivmoddi4.c", + "udivmodsi4.c", + "udivmodti4.c", + "udivsi3.c", + "udivti3.c", + "umoddi3.c", + "umodsi3.c", + "umodti3.c", +] + +BUILTINS_BF16_SRCS = [ + "extendbfsf2.c", + "truncdfbf2.c", + "truncxfbf2.c", + "truncsfbf2.c", + "trunctfbf2.c", +] + +BUILTINS_TF_SRCS = [ + "addtf3.c", + "comparetf2.c", + "divtc3.c", + "divtf3.c", + "extenddftf2.c", + "extendhftf2.c", + "extendsftf2.c", + "fixtfdi.c", + "fixtfsi.c", + "fixtfti.c", + "fixunstfdi.c", + "fixunstfsi.c", + "fixunstfti.c", + "floatditf.c", + "floatsitf.c", + "floattitf.c", + "floatunditf.c", + "floatunsitf.c", + "floatuntitf.c", + "multc3.c", + "multf3.c", + "powitf2.c", + "subtf3.c", + "trunctfdf2.c", + "trunctfhf2.c", + "trunctfsf2.c", +] + +BUILTINS_HOSTED_SRCS = [ + #"emutls.c", + #"enable_execute_stack.c", +] + +BUILTINS_ATOMIC_SRCS = [ + "atomic.c", +] + +BUILTINS_EH_PERSONALITY_SRCS = [ + "gcc_personality_v0.c", +] + +BUILTINS_GENERIC_SRCS = BUILTINS_BASE_SRCS + BUILTINS_HOSTED_SRCS + BUILTINS_ATOMIC_SRCS + BUILTINS_EH_PERSONALITY_SRCS + +BUILTINS_MACOS_SRCS = [ + "atomic_flag_clear.c", + "atomic_flag_clear_explicit.c", + "atomic_flag_test_and_set.c", + "atomic_flag_test_and_set_explicit.c", + "atomic_signal_fence.c", + "atomic_thread_fence.c", +] + +BUILTINS_X86_ARCH_SRCS = [ + "cpu_model/x86.c", + "i386/fp_mode.c", +] + +BUILTINS_X86_FP80_SRCS = [ + "divxc3.c", + "extendhfxf2.c", + "extendxftf2.c", + "fixxfdi.c", + "fixxfti.c", + "fixunsxfdi.c", + "fixunsxfsi.c", + "fixunsxfti.c", + "floatdixf.c", + "floattixf.c", + "floatundixf.c", + "floatuntixf.c", + "mulxc3.c", + "powixf2.c", + "trunctfxf2.c", + "truncxfhf2.c", +] + +BUILTINS_AARCH64_SRCS = [ + "cpu_model/aarch64.h", + "cpu_model/aarch64.c", + "cpu_model/AArch64CPUFeatures.inc", + "cpu_model/aarch64/fmv/android.inc", + "cpu_model/aarch64/fmv/apple.inc", + "cpu_model/aarch64/fmv/baremetal.inc", + "cpu_model/aarch64/fmv/freebsd.inc", + "cpu_model/aarch64/fmv/fuchsia.inc", + "cpu_model/aarch64/fmv/getauxval.inc", + "cpu_model/aarch64/fmv/mrs.inc", + "cpu_model/aarch64/fmv/unimplemented.inc", + "cpu_model/aarch64/fmv/windows.inc", + "cpu_model/aarch64/hwcap.inc", + "cpu_model/aarch64/lse_atomics/android.inc", + "cpu_model/aarch64/lse_atomics/freebsd.inc", + "cpu_model/aarch64/lse_atomics/fuchsia.inc", + "cpu_model/aarch64/lse_atomics/getauxval.inc", + "cpu_model/aarch64/lse_atomics/windows.inc", + "aarch64/fp_mode.c", + "aarch64/sme-abi.S", + "aarch64/sme-libc-mem-routines.S", + "aarch64/sme-abi-assert.c", + "aarch64/sme-libc-routines.c", +] +BUILTINS_X86_64_SRCS = [ + "x86_64/floatdidf.c", + "x86_64/floatdisf.c", + "x86_64/floatdixf.c", + "x86_64/floatundidf.S", + "x86_64/floatundisf.S", + "x86_64/floatundixf.S", +] +BUILTINS_I386_SRCS = [ + "i386/ashldi3.S", + "i386/ashrdi3.S", + "i386/divdi3.S", + "i386/floatdidf.S", + "i386/floatdisf.S", + "i386/floatundidf.S", + "i386/floatundisf.S", + "i386/lshrdi3.S", + "i386/moddi3.S", + "i386/muldi3.S", + "i386/udivdi3.S", + "i386/umoddi3.S", + "i386/floatdixf.S", + "i386/floatundixf.S", +] + +RUNTIME_SRCS = [ + BUILTINS_CRTBEGIN, + BUILTINS_CRTEND, +] + [ + # Flatten the individual lists of source files into one main list. We use a + # nested loop list comprehension as it formats a bit more cleanly than `+`. + src + for srcs in [ + BUILTINS_GENERIC_SRCS, + BUILTINS_BF16_SRCS, + BUILTINS_TF_SRCS, + BUILTINS_X86_ARCH_SRCS, + BUILTINS_X86_FP80_SRCS, + BUILTINS_AARCH64_SRCS, + BUILTINS_X86_64_SRCS, + BUILTINS_I386_SRCS, + ] + for src in srcs +] + +# "aarch64-unknown-linux-gnu": BUILTINS_GENERIC_SRCS + BUILTINS_BF16_SRCS + BUILTINS_TF_SRCS +# "x86_64-unknown-linux-gnu": BUILTINS_GENERIC_SRCS + BUILTINS_BF16_SRCS + BUILTINS_TF_SRCS + BUILTINS_X86_ARCH_SRCS + BUILTINS_X86_FP80_SRCS + [ +# "i386-unknown-linux-gnu": BUILTINS_GENERIC_SRCS + BUILTINS_BF16_SRCS + BUILTINS_X86_ARCH_SRCS + BUILTINS_X86_FP80_SRCS + [ + +#[carbon_cc_runtime_static_library( +# name = target + "/libclang_rt.builtins.a", +# srcs = ["@llvm-project//compiler-rt:lib/builtins/" + src for src in BUILTINS_TARGET_SRCS[target]], +# copts = [ +# "--target=" + target, +# "-std=c11", +# "-fPIC", +# "-ffreestanding", +# "-fno-builtin", +# "-fvisibility=hidden", +# "-fomit-frame-pointer", +# ], +#) for target in TARGETS] +# The main LLVM command line tools, including their "primary" name, binary name, +# and the library dependency required to use them. + +_TEMPLATE = """ +// Part of the Carbon Language project, under the Apache License v2.0 with LLVM +// Exceptions. See /LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Generated header file of strings describing the runtime library sources +// needed by various Carbon runtime libraries. +// +// See toolchain/driver/runtime_sources.bzl for more details. + +#ifndef CARBON_TOOLCHAIN_BASE_RUNTIME_SOURCES_H_ +#define CARBON_TOOLCHAIN_BASE_RUNTIME_SOURCES_H_ + +#include "llvm/ADT/StringRef.h" + +namespace Carbon::RuntimeSources {{ + +constexpr inline llvm::StringLiteral CrtBegin = {crtbegin}; +constexpr inline llvm::StringLiteral CrtEnd = {crtend}; + +constexpr inline llvm::StringLiteral BuiltinsGenericSrcs[] = {{ +{generic_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsMacosSrcs[] = {{ +{macos_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsBf16Srcs[] = {{ +{bf16_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsTfSrcs[] = {{ +{tf_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsX86ArchSrcs[] = {{ +{x86_arch_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsX86Fp80Srcs[] = {{ +{x86_fp80_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsAarch64Srcs[] = {{ +{aarch64_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsX86_64Srcs[] = {{ +{x86_64_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsI386Srcs[] = {{ +{i386_srcs} +}}; + +}} + +#endif // CARBON_TOOLCHAIN_BASE_RUNTIME_SOURCES_H_ +""" + +def _make_srcs_strings(dir, srcs): + return "\n".join([' "' + dir + "/" + src + '",' for src in srcs]) + +def _generate_runtime_sources_h_rule(ctx): + h_file = ctx.actions.declare_file(ctx.label.name) + ctx.actions.write(h_file, _TEMPLATE.format( + crtbegin = '"builtins/' + BUILTINS_CRTBEGIN + '"', + crtend = '"builtins/' + BUILTINS_CRTEND + '"', + generic_srcs = _make_srcs_strings("builtins", BUILTINS_GENERIC_SRCS), + macos_srcs = _make_srcs_strings("builtins", BUILTINS_MACOS_SRCS), + bf16_srcs = _make_srcs_strings("builtins", BUILTINS_BF16_SRCS), + tf_srcs = _make_srcs_strings("builtins", BUILTINS_TF_SRCS), + x86_arch_srcs = _make_srcs_strings("builtins", BUILTINS_X86_ARCH_SRCS), + x86_fp80_srcs = _make_srcs_strings("builtins", BUILTINS_X86_FP80_SRCS), + aarch64_srcs = _make_srcs_strings("builtins", BUILTINS_AARCH64_SRCS), + x86_64_srcs = _make_srcs_strings("builtins", BUILTINS_X86_64_SRCS), + i386_srcs = _make_srcs_strings("builtins", BUILTINS_I386_SRCS), + )) + return [DefaultInfo(files = depset([h_file]))] + +generate_runtime_sources_h = rule( + implementation = _generate_runtime_sources_h_rule, + attrs = {}, +) + +def generate_runtime_sources_cc_library(name, **kwargs): + """Generates the LLVM tools `.def` file. + + This first generates the `.def` file into the `out` filename, and then + synthesizes a `cc_library` rule exporting that file in its `textual_hdrs`. + + The `cc_library` rule name is the provided `name` and should be depended on + by code that includes the generated file. The `kwargs` are expanded into the + `cc_library` in case other attributes need to be configured there. + + The two-step process is necessary to avoid trying to compile or otherwise + process the generated file as something other than a textual header. + """ + generate_runtime_sources_h(name = "runtime_sources.h") + cc_library( + name = name, + hdrs = ["runtime_sources.h"], + **kwargs + ) diff --git a/toolchain/driver/BUILD b/toolchain/driver/BUILD index 9913a85fe02ac..0c7afe41ed537 100644 --- a/toolchain/driver/BUILD +++ b/toolchain/driver/BUILD @@ -21,15 +21,18 @@ cc_library( srcs = ["clang_runner.cpp"], hdrs = ["clang_runner.h"], deps = [ + ":llvm_runner", ":tool_runner_base", "//common:ostream", "//common:vlog", + "//toolchain/base:runtime_sources", "//toolchain/install:install_paths", "@llvm-project//clang:basic", "@llvm-project//clang:clang-driver", "@llvm-project//clang:driver", "@llvm-project//clang:frontend", "@llvm-project//llvm:Core", + "@llvm-project//llvm:Object", "@llvm-project//llvm:Support", "@llvm-project//llvm:TargetParser", ], @@ -39,8 +42,10 @@ cc_test( name = "clang_runner_test", size = "small", srcs = ["clang_runner_test.cpp"], + data = ["//toolchain/install:install_data"], deps = [ ":clang_runner", + ":llvm_runner", "//common:all_llvm_targets", "//common:check", "//common:ostream", diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index 8ffbae9e6c089..779d66973fc62 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -4,11 +4,15 @@ #include "toolchain/driver/clang_runner.h" +#include + #include +#include #include #include #include #include +#include #include #include "clang/Basic/Diagnostic.h" @@ -23,11 +27,15 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/LLVMDriver.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/TargetParser/Host.h" +#include "toolchain/base/runtime_sources.h" // Defined in: // https://github.com/llvm/llvm-project/blob/main/clang/tools/driver/driver.cpp @@ -42,17 +50,219 @@ auto clang_main(int Argc, char** Argv, const llvm::ToolContext& ToolContext) namespace Carbon { ClangRunner::ClangRunner(const InstallPaths* install_paths, - llvm::StringRef target, llvm::IntrusiveRefCntPtr fs, llvm::raw_ostream* vlog_stream) : ToolRunnerBase(install_paths, vlog_stream), - target_(target), fs_(std::move(fs)), diagnostic_ids_(new clang::DiagnosticIDs()) {} +ClangRunner::~ClangRunner() { + if (!tmp_dir_.empty()) { + std::error_code ec; + if (auto number = std::filesystem::remove_all(tmp_dir_, ec); number == 0) { + CARBON_VLOG("Expected a temporary directory and found none: {0}", + tmp_dir_); + } else if (ec) { + CARBON_VLOG("Error when cleaning temporary directory `{0}`: {1}", + tmp_dir_, ec.message()); + } + } +} + +static auto ComputeTarget(llvm::ArrayRef args) -> std::string { + std::string target = llvm::sys::getDefaultTargetTriple(); + for (auto [i, arg] : llvm::enumerate(args)) { + if (llvm::StringRef arg_copy = arg; arg_copy.consume_front("--target=")) { + target = arg_copy.str(); + } else if ((arg == "--target" || arg == "-target") && + (i + 1) < args.size()) { + target = args[i + 1].str(); + } else if (arg == "--driver-mode=cl" || + ((arg == "--driver-mode" || arg == "-driver-mode") && + (i + 1) < args.size() && args[i + 1] == "cl")) { + // The `cl.exe` compatible driver mode should force to a + // `...-pc-windows-msvc` target. + llvm::Triple triple(target); + triple.setVendor(llvm::Triple::PC); + triple.setOS(llvm::Triple::Win32); + triple.setEnvironment(llvm::Triple::MSVC); + target = triple.str(); + } else if (arg == "-m32") { + llvm::Triple triple(target); + if (!triple.isArch32Bit()) { + target = triple.get32BitArchVariant().str(); + } + } else if (arg == "-m64") { + llvm::Triple triple(target); + if (!triple.isArch64Bit()) { + target = triple.get64BitArchVariant().str(); + } + } + } + return target; +} + +// Tries to detect a a non-linking list of Clang arguments to avoid setting up +// the more complete resource directory needed for linking. False negatives are +// fine here, and we use that to keep things simple. +static auto IsNonLinkCommand(llvm::ArrayRef args) -> bool { + return llvm::any_of(args, [](llvm::StringRef arg) { + // Only check the most common cases as we have to do this for each argument. + if (arg.empty() || arg[0] != '-') { + return false; + } + if (arg.size() == 2) { + return arg[1] == 'c' || arg[1] == 'E' || arg[1] == 'S'; + } + if (arg == "-fsyntax-only" || arg == "--version" || "--help" || "/?") { + return true; + } + // Everything else is rare and likely not worth the cost of searching for + // since its fine to have false negatives. + return false; + }); +} + auto ClangRunner::Run(llvm::ArrayRef args) -> bool { + // Check the args to see if we have a known target-independent command. If so, + // directly dispatch it to avoid the cost of building the target resource + // directory. // TODO: Maybe handle response file expansion similar to the Clang CLI? + if (args.empty() || args[0].starts_with("-cc1") || IsNonLinkCommand(args)) { + return RunTargetIndependentCommand(args); + } + + // Otherwise, we need to build a target resource directory. + CARBON_VLOG("Building target resource dir...\n"); + // Disable any leaking of memory while building the target resource dir, and + // restore the previous setting at the end. + auto restore_leak_flag = llvm::make_scope_exit( + [&, orig_flag = enable_leaking_] { enable_leaking_ = orig_flag; }); + enable_leaking_ = false; + + std::string target = ComputeTarget(args); + std::string resource_dir_path = BuildTargetResourceDir(target); + return RunInternal(args, target, resource_dir_path); +} + +auto ClangRunner::RunTargetIndependentCommand( + llvm::ArrayRef args) -> bool { + std::string target = ComputeTarget(args); + return RunInternal(args, target, std::nullopt); +} + +auto ClangRunner::BuildTargetResourceDir(llvm::StringRef target) + -> std::string { + MakeTmpDir(); + std::filesystem::path resource_dir_path = tmp_dir_ / "clang"; + std::error_code ec; + std::filesystem::create_directory(resource_dir_path, ec); + + // Symlink the installation's `include` and `share` directories. + std::filesystem::path install_resource_path = + installation_->clang_resource_path(); + std::filesystem::create_symlink(install_resource_path / "include", + resource_dir_path / "include", ec); + std::filesystem::create_symlink(install_resource_path / "share", + resource_dir_path / "share", ec); + + std::filesystem::path lib_path = resource_dir_path / "lib"; + std::filesystem::create_directory(lib_path, ec); + lib_path /= target.str(); + std::filesystem::create_directory(lib_path, ec); + + llvm::Triple target_triple(target); + CARBON_CHECK(!target_triple.isOSWindows(), + "TODO: Windows runtimes are untested and not yet supported."); + + // For Linux targets, the system libc (typically glibc) doesn't necessarily + // provide the CRT begin/end files, and so we need to build them. + if (target_triple.isOSLinux()) { + std::filesystem::path begin_o_path = lib_path / "clang_rt.crtbegin.o"; + BuildCrtFile(target, RuntimeSources::CrtBegin, begin_o_path.native()); + + std::filesystem::path end_o_path = lib_path / "clang_rt.crtend.o"; + BuildCrtFile(target, RuntimeSources::CrtEnd, end_o_path.native()); + } + + llvm::SmallVector src_files; + auto append_src_files = [&src_files](auto input_srcs) { + for (llvm::StringRef input_src : input_srcs) { + if (input_src.ends_with(".c") || input_src.ends_with(".S")) { + src_files.push_back(input_src); + } + } + }; + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsGenericSrcs)); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsBf16Srcs)); + if (target_triple.isArch64Bit()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsTfSrcs)); + } + if (target_triple.isAArch64()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsAarch64Srcs)); + } else if (target_triple.isX86()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86ArchSrcs)); + if (target_triple.isArch64Bit()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86_64Srcs)); + } else { + CARBON_CHECK( + target_triple.isArch32Bit(), + "The Carbon toolchain doesn't currently support 16-bit x86."); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsI386Srcs)); + } + } else { + CARBON_FATAL("Target architecture is not supported: {0}", target); + } + + // We need a separate directory for object files used in the builtins archive. + std::filesystem::path objs_path = tmp_dir_ / "_objs"; + std::filesystem::create_directory(objs_path, ec); + + llvm::SmallVector objs; + objs.reserve(src_files.size()); + for (llvm::StringRef src_file : src_files) { + llvm::StringRef obj_file = src_file; + std::filesystem::path obj_path = objs_path; + for (;;) { + auto slash_offset = obj_file.find('/'); + if (slash_offset == llvm::StringRef::npos) { + break; + } + obj_path.append(obj_file.begin(), obj_file.begin() + slash_offset); + std::filesystem::create_directory(obj_path, ec); + obj_file = obj_file.drop_front(slash_offset + 1); + } + obj_path.append(obj_file.begin(), obj_file.end()); + obj_path += ".o"; + BuildBuiltinsFile(target, src_file, obj_path.native()); + + llvm::Expected obj = + llvm::NewArchiveMember::getFile(obj_path.native(), + /*Deterministic=*/true); + CARBON_CHECK(obj, "TODO: Diagnose this: {0}", + llvm::fmt_consume(obj.takeError())); + objs.push_back(std::move(*obj)); + } + + // Now build an archive out of the `.o` files for the builtins. + std::filesystem::path builtins_a_path = lib_path / "libclang_rt.builtins.a"; + llvm::raw_fd_ostream builtins_a_os(builtins_a_path.native(), ec); + CARBON_CHECK(!ec, "Unable to open archive for writing: {0}", ec.message()); + llvm::Error archive_err = llvm::writeArchiveToStream( + builtins_a_os, objs, llvm::SymtabWritingMode::NormalSymtab, + target_triple.isOSDarwin() ? llvm::object::Archive::K_DARWIN + : llvm::object::Archive::K_GNU, + /*Deterministic=*/true, /*Thin=*/false); + CARBON_CHECK(!archive_err, "Error writing archive: {0}", archive_err); + + return resource_dir_path; +} + +// Handles building the Clang driver and passing the arguments down to it. +auto ClangRunner::RunInternal( + llvm::ArrayRef args, llvm::StringRef target, + std::optional target_resource_dir_path) -> bool { std::string clang_path = installation_->clang_path(); // Rebuild the args as C-string args. @@ -60,6 +270,7 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { llvm::SmallVector cstr_args = BuildCStrArgs("Clang", clang_path, "-v", args, cstr_arg_storage); + // Handle special dispatch for CC1 commands as they don't use the driver. if (!args.empty() && args[0].starts_with("-cc1")) { CARBON_VLOG("Calling clang_main for cc1..."); // cstr_args[0] will be the `clang_path` so we don't need the prepend arg. @@ -89,8 +300,31 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { /*ShouldOwnClient=*/false); clang::ProcessWarningOptions(diagnostics, *diagnostic_options, *fs_); - clang::driver::Driver driver(clang_path, target_, diagnostics, - "clang LLVM compiler", fs_); + // Note that we configure the driver's *default* target here, not the expected + // target as that will be parsed out of the command line below. + clang::driver::Driver driver(clang_path, llvm::sys::getDefaultTargetTriple(), + diagnostics, "clang LLVM compiler", fs_); + + llvm::Triple target_triple(target); + + // We need to set an SDK system root on macOS by default. Setting it here + // allows a custom sysroot to still be specified on the command line. + // + // TODO: A different system root should be used for iOS, watchOS, tvOS. + // Currently, we're only targeting macOS support though. + if (target_triple.isMacOSX()) { + // This is the default CLT system root, shown by `xcrun --show-sdk-path`. + // We hard code it here to avoid the overhead of subprocessing to `xcrun` on + // each Clang invocation, but this may need to be updated to search or + // reflect macOS versions if this changes in the future. + driver.SysRoot = "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk"; + } + + // If we have a target-specific resource directory, set it as the default + // here. + if (target_resource_dir_path) { + driver.ResourceDir = target_resource_dir_path->str(); + } // Configure the install directory to find other tools and data files. // @@ -134,6 +368,14 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { return false; } + // Make sure our target detection matches Clang's. Sadly, we can't just reuse + // Clang's as it is available too late. + // TODO: Use nice diagnostics here rather than a check failure. + CARBON_CHECK(llvm::Triple(target) == llvm::Triple(driver.getTargetTriple()), + "Mismatch between the expected target '{0}' and the one " + "computed by Clang '{1}'", + target, driver.getTargetTriple()); + CARBON_VLOG("Running Clang driver...\n"); llvm::SmallVector> @@ -157,4 +399,98 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { return result == 0 && failing_commands.empty(); } +// TODO: Replace this with a filesystem cache, and proper management of +// concurrent cache access. There are a bunch of hacks in this function that +// aren't realistically reliable for larger use, and only designed as in interim +// state to separate the implementation of the caching behavior. +auto ClangRunner::MakeTmpDir() -> void { + if (!tmp_dir_.empty()) { + return; + } + + // Make a temporary directory exclusive to this builder. + std::filesystem::path tmp_dir; + if (char* tmpdir_env = getenv("TEST_TMPDIR"); tmpdir_env != nullptr) { + tmp_dir = std::string(tmpdir_env); + } else { + tmp_dir = std::filesystem::temp_directory_path(); + } + + // We use the object's address to avoid collisions between two instances, + // whether from two processes or two instances in this process. This is a bit + // of a hack perfect, but should be fine until we implement caching at which + // point we'll need to do something better here. + auto salt = reinterpret_cast(this); + std::string name = llvm::formatv("carbon-runtimes.{0:x}", salt).str(); + std::error_code ec; + int attempt = 0; + // Limit how many tries we make. + constexpr int MaxAttempts = 128; + while (!std::filesystem::create_directory(tmp_dir / name, ec)) { + CARBON_CHECK(ec != std::errc::file_exists, + "Unable to create temporary directory: {0}", ec.message()); + ++attempt; + CARBON_CHECK(attempt < MaxAttempts, + "Tried too many times to create a temporary directory, last " + "attempted path: {0}", + tmp_dir / name); + + // Try appending an attempt string. + name = llvm::formatv("carbon-runtimes.{0:x}.{1}", salt, attempt).str(); + } + + // WHen we successfully create a temporary directory, commit it to the object. + tmp_dir_ = tmp_dir / name; +} + +auto ClangRunner::BuildCrtFile(llvm::StringRef target, llvm::StringRef src_file, + llvm::StringRef out_path) -> void { + llvm::SmallString<256> src_path(installation_->llvm_runtime_srcs()); + llvm::sys::path::append(src_path, llvm::sys::path::Style::posix, src_file); + CARBON_VLOG("Building `{0}' from `{1}`...\n", out_path, src_path); + + std::string target_arg = llvm::formatv("--target={0}", target).str(); + CARBON_CHECK(RunTargetIndependentCommand({ + "-no-canonical-prefixes", + target_arg, + "-DCRT_HAS_INITFINI_ARRAY", + "-DEH_USE_FRAME_REGISTRY", + "-O3", + "-fPIC", + "-ffreestanding", + "-std=c11", + "-w", + "-c", + "-o", + out_path, + src_path, + })); +} + +auto ClangRunner::BuildBuiltinsFile(llvm::StringRef target, + llvm::StringRef src_file, + llvm::StringRef out_path) -> void { + llvm::SmallString<256> src_path(installation_->llvm_runtime_srcs()); + llvm::sys::path::append(src_path, llvm::sys::path::Style::posix, src_file); + CARBON_VLOG("Building `{0}' from `{1}`...\n", out_path, src_path); + + std::string target_arg = llvm::formatv("--target={0}", target).str(); + CARBON_CHECK(RunTargetIndependentCommand({ + "-no-canonical-prefixes", + target_arg, + "-O3", + "-fPIC", + "-ffreestanding", + "-fno-builtin", + "-fomit-frame-pointer", + "-fvisibility=hidden", + "-std=c11", + "-w", + "-c", + "-o", + out_path, + src_path, + })); +} + } // namespace Carbon diff --git a/toolchain/driver/clang_runner.h b/toolchain/driver/clang_runner.h index 854c5bb85c224..10aa68d6d2982 100644 --- a/toolchain/driver/clang_runner.h +++ b/toolchain/driver/clang_runner.h @@ -5,6 +5,8 @@ #ifndef CARBON_TOOLCHAIN_DRIVER_CLANG_RUNNER_H_ #define CARBON_TOOLCHAIN_DRIVER_CLANG_RUNNER_H_ +#include + #include "clang/Basic/DiagnosticIDs.h" #include "common/ostream.h" #include "llvm/ADT/ArrayRef.h" @@ -43,13 +45,36 @@ class ClangRunner : ToolRunnerBase { // // If `verbose` is passed as true, will enable verbose logging to the // `err_stream` both from the runner and Clang itself. - ClangRunner(const InstallPaths* install_paths, llvm::StringRef target, + ClangRunner(const InstallPaths* install_paths, llvm::IntrusiveRefCntPtr fs, llvm::raw_ostream* vlog_stream = nullptr); + ~ClangRunner(); // Run Clang with the provided arguments. + // + // This works to support all of the Clang commandline, including commands that + // use target-dependent resources like linking. When it detects such commands, + // it will build the necessary resources before running the command. auto Run(llvm::ArrayRef args) -> bool; + // Run Clang with the provided arguments and without any target-dependent + // resources. + // + // This method can be used to avoid building target-dependent resources when + // unnecessary, but not all Clang command lines will work correct. + // Specifically, compile-only commands will typically work, while linking will + // not. + auto RunTargetIndependentCommand(llvm::ArrayRef args) + -> bool; + + // Builds the target-specific resource directory for Clang. + // + // There is a resource directory installed along side the Clang binary that + // contains all the target independent files such as headers. However, for + // target-specific files like runtimes, we build those on demand here and + // return the path. + auto BuildTargetResourceDir(llvm::StringRef target) -> std::string; + // Enable leaking memory. // // Clang can avoid deallocating some of its memory to improve compile time. @@ -62,7 +87,19 @@ class ClangRunner : ToolRunnerBase { auto EnableLeakingMemory() -> void { enable_leaking_ = true; } private: - llvm::StringRef target_; + auto RunInternal(llvm::ArrayRef args, llvm::StringRef target, + std::optional target_resource_dir_path) + -> bool; + + auto MakeTmpDir() -> void; + + auto BuildCrtFile(llvm::StringRef target, llvm::StringRef src_file, + llvm::StringRef out_path) -> void; + + auto BuildBuiltinsFile(llvm::StringRef target, llvm::StringRef src_file, + llvm::StringRef out_path) -> void; + + std::filesystem::path tmp_dir_; llvm::IntrusiveRefCntPtr fs_; llvm::IntrusiveRefCntPtr diagnostic_ids_; diff --git a/toolchain/driver/clang_runner_test.cpp b/toolchain/driver/clang_runner_test.cpp index b2ab6e72e6ec1..ce1359896e37c 100644 --- a/toolchain/driver/clang_runner_test.cpp +++ b/toolchain/driver/clang_runner_test.cpp @@ -17,26 +17,58 @@ #include "common/raw_string_ostream.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Program.h" #include "llvm/TargetParser/Host.h" #include "testing/base/capture_std_streams.h" #include "testing/base/file_helpers.h" #include "testing/base/global_exe_path.h" +#include "toolchain/driver/llvm_runner.h" namespace Carbon { namespace { +using ::testing::Eq; using ::testing::HasSubstr; +using ::testing::IsSupersetOf; using ::testing::StrEq; -TEST(ClangRunnerTest, Version) { - RawStringOstream test_os; - const auto install_paths = +// NOLINTNEXTLINE(modernize-use-trailing-return-type): Macro based function. +MATCHER_P(TextSymbolNamed, name_matcher, "") { + llvm::Expected name = arg.getName(); + if (auto error = name.takeError()) { + *result_listener << "with an error instead of a name: " << error; + return false; + } + if (!testing::ExplainMatchResult(name_matcher, *name, result_listener)) { + return false; + } + // We have to dig out the section to determine if this was a text symbol. + auto expected_section_it = arg.getSection(); + if (auto error = expected_section_it.takeError()) { + *result_listener << "without a section: " << error; + return false; + } + llvm::object::SectionRef section = **expected_section_it; + if (!section.isText()) { + *result_listener << "in the non-text section: " << *section.getName(); + return false; + } + return true; +} + +class ClangRunnerTest : public ::testing::Test { + public: + InstallPaths install_paths_ = InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &test_os); + llvm::IntrusiveRefCntPtr vfs_ = + llvm::vfs::getRealFileSystem(); +}; + +TEST_F(ClangRunnerTest, Version) { + RawStringOstream test_os; + ClangRunner runner(&install_paths_, vfs_, &test_os); std::string out; std::string err; @@ -51,65 +83,22 @@ TEST(ClangRunnerTest, Version) { // Flush and get the captured stdout to test that this command worked. // We don't care about any particular version, just that it is printed. EXPECT_THAT(out, HasSubstr("clang version")); - // The target should match what we provided. - EXPECT_THAT(out, HasSubstr((llvm::Twine("Target: ") + target).str())); + // The target should match the LLVM default. + EXPECT_THAT(out, HasSubstr((llvm::Twine("Target: ") + + llvm::sys::getDefaultTargetTriple()) + .str())); // Clang's install should be our private LLVM install bin directory. EXPECT_THAT(out, HasSubstr(std::string("InstalledDir: ") + - install_paths.llvm_install_bin())); -} - -// It's hard to write a portable and reliable unittest for all the layers of the -// Clang driver because they work hard to interact with the underlying -// filesystem and operating system. For now, we just check that a link command -// is echoed back with plausible contents. -// -// TODO: We should eventually strive to have a more complete setup that lets us -// test more complete Clang functionality here. -TEST(ClangRunnerTest, LinkCommandEcho) { - // Just create some empty files to use in a synthetic link command below. - std::filesystem::path foo_file = *Testing::WriteTestFile("foo.o", ""); - std::filesystem::path bar_file = *Testing::WriteTestFile("bar.o", ""); - - const auto install_paths = - InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); - RawStringOstream verbose_out; - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &verbose_out); - std::string out; - std::string err; - EXPECT_TRUE(Testing::CallWithCapturedOutput( - out, err, - [&] { - return runner.Run( - {"-###", "-o", "binary", foo_file.string(), bar_file.string()}); - })) - << "Verbose output from runner:\n" - << verbose_out.TakeStr() << "\n"; - verbose_out.clear(); - - // Because we use `-###' above, we should just see the command that the Clang - // driver would have run in a subprocess. This will be very architecture - // dependent and have lots of variety, but we expect to see both file strings - // in it the command at least. - EXPECT_THAT(err, HasSubstr(foo_file.string())) << err; - EXPECT_THAT(err, HasSubstr(bar_file.string())) << err; - - // And no non-stderr output should be produced. - EXPECT_THAT(out, StrEq("")); + install_paths_.llvm_install_bin())); } -TEST(ClangRunnerTest, DashC) { +TEST_F(ClangRunnerTest, DashC) { std::filesystem::path test_file = *Testing::WriteTestFile("test.cpp", "int test() { return 0; }"); std::filesystem::path test_output = *Testing::WriteTestFile("test.o", ""); - const auto install_paths = - InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); RawStringOstream verbose_out; - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &verbose_out); + ClangRunner runner(&install_paths_, vfs_, &verbose_out); std::string out; std::string err; EXPECT_TRUE(Testing::CallWithCapturedOutput( @@ -127,7 +116,7 @@ TEST(ClangRunnerTest, DashC) { EXPECT_THAT(err, StrEq("")); } -TEST(ClangRunnerTest, BuitinHeaders) { +TEST_F(ClangRunnerTest, BuitinHeaders) { std::filesystem::path test_file = *Testing::WriteTestFile("test.c", R"cpp( #include @@ -137,12 +126,8 @@ TEST(ClangRunnerTest, BuitinHeaders) { )cpp"); std::filesystem::path test_output = *Testing::WriteTestFile("test.o", ""); - const auto install_paths = - InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); RawStringOstream verbose_out; - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &verbose_out); + ClangRunner runner(&install_paths_, vfs_, &verbose_out); std::string out; std::string err; EXPECT_TRUE(Testing::CallWithCapturedOutput( @@ -160,10 +145,7 @@ TEST(ClangRunnerTest, BuitinHeaders) { EXPECT_THAT(err, StrEq("")); } -TEST(ClangRunnerTest, CompileMultipleFiles) { - const auto install_paths = - InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); - +TEST_F(ClangRunnerTest, CompileMultipleFiles) { // Memory leaks and other errors from running Clang can at times only manifest // with repeated compilations. Use a lambda to just do a series of compiles. auto compile = [&](llvm::StringRef filename, llvm::StringRef source) { @@ -172,9 +154,7 @@ TEST(ClangRunnerTest, CompileMultipleFiles) { std::filesystem::path output = *Testing::WriteTestFile(output_file, ""); RawStringOstream verbose_out; - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &verbose_out); + ClangRunner runner(&install_paths_, vfs_, &verbose_out); std::string out; std::string err; EXPECT_TRUE(Testing::CallWithCapturedOutput( @@ -195,5 +175,109 @@ TEST(ClangRunnerTest, CompileMultipleFiles) { compile("test3.cpp", "int test3() { return 0; }"); } +TEST_F(ClangRunnerTest, BuildResourceDir) { + ClangRunner runner(&install_paths_, vfs_, &llvm::errs()); + + // Note that we can't test arbitrary targets here as we need to be able to + // compile the builtin functions for the target. We use the default target as + // the most likely to pass. + std::string target = llvm::sys::getDefaultTargetTriple(); + llvm::Triple target_triple(target); + std::filesystem::path resource_path = runner.BuildTargetResourceDir(target); + + // For Linux we can directly check the CRT begin/end object files. + if (target_triple.isOSLinux()) { + std::filesystem::path crt_begin_path = + resource_path / "lib" / target / "clang_rt.crtbegin.o"; + ASSERT_TRUE(std::filesystem::is_regular_file(crt_begin_path)); + auto begin_result = + llvm::object::ObjectFile::createObjectFile(crt_begin_path.native()); + llvm::object::ObjectFile& crtbegin = *begin_result->getBinary(); + EXPECT_TRUE(crtbegin.isELF()); + EXPECT_TRUE(crtbegin.isObject()); + EXPECT_THAT(crtbegin.getArch(), Eq(target_triple.getArch())); + + llvm::SmallVector symbols(crtbegin.symbols()); + // The first symbol should come from the source file. + EXPECT_THAT(*symbols.front().getName(), Eq("crtbegin.c")); + + // Check for representative symbols of `crtbegin.o` -- we always use + // `.init_array` in our runtimes build so we have predictable functions. + EXPECT_THAT(symbols, IsSupersetOf({TextSymbolNamed("__do_init"), + TextSymbolNamed("__do_fini")})); + + std::filesystem::path crt_end_path = + resource_path / "lib" / target / "clang_rt.crtend.o"; + ASSERT_TRUE(std::filesystem::is_regular_file(crt_end_path)); + auto end_result = + llvm::object::ObjectFile::createObjectFile(crt_end_path.native()); + llvm::object::ObjectFile& crtend = *end_result->getBinary(); + EXPECT_TRUE(crtend.isELF()); + EXPECT_TRUE(crtend.isObject()); + EXPECT_THAT(crtend.getArch(), Eq(target_triple.getArch())); + + // Just check the source file symbol, not much of interest in the end. + llvm::object::SymbolRef crtend_front_symbol = *crtend.symbol_begin(); + EXPECT_THAT(*crtend_front_symbol.getName(), Eq("crtend.c")); + } + + // Across all targets, check that the builtins archive exists, and contains a + // relevant symbol by running the `llvm-nm` tool over it. + std::filesystem::path builtins_path = + resource_path / "lib" / target / "libclang_rt.builtins.a"; + LLVMRunner llvm_runner(&install_paths_, &llvm::errs()); + std::string out; + std::string err; + EXPECT_TRUE(Testing::CallWithCapturedOutput(out, err, [&] { + return llvm_runner.Run(LLVMTool::Nm, {builtins_path.native()}); + })); + // Check that we found a definition of `__mulodi4`, a builtin function + // provided by Compiler-RT, but not `libgcc` historically. Note that on macOS + // there is a leading `_` due to mangling. + EXPECT_THAT(out, HasSubstr(target_triple.isMacOSX() ? "T ___mulodi4\n" + : "T __mulodi4\n")); +} + +// It's hard to write a portable and reliable unittest for all the layers of the +// Clang driver because they work hard to interact with the underlying +// filesystem and operating system. For now, we just check that a link command +// is echoed back with plausible contents. +// +// TODO: We should eventually strive to have a more complete setup that lets us +// test more complete Clang functionality here. +TEST_F(ClangRunnerTest, LinkCommandEcho) { + // Just create some empty files to use in a synthetic link command below. + std::filesystem::path foo_file = *Testing::WriteTestFile("foo.o", ""); + std::filesystem::path bar_file = *Testing::WriteTestFile("bar.o", ""); + + RawStringOstream verbose_out; + ClangRunner runner(&install_paths_, vfs_, &verbose_out); + std::string out; + std::string err; + EXPECT_TRUE(Testing::CallWithCapturedOutput( + out, err, + [&] { + // Note that we use the target independent run command here because + // we're just getting the echo-ed output back. For this to actually + // link, we'd need to have the target-dependent resources, but those are + // expensive to build so we only want to test them once (above). + return runner.RunTargetIndependentCommand( + {"-###", "-o", "binary", foo_file.string(), bar_file.string()}); + })) + << "Verbose output from runner:\n" + << verbose_out.TakeStr() << "\n"; + verbose_out.clear(); + + // Because we use `-###' above, we should just see the command that the Clang + // driver would have run in a subprocess. This will be very architecture + // dependent and have lots of variety, but we expect to see both file strings + // in it the command at least. + EXPECT_THAT(err, HasSubstr(foo_file.string())) << err; + EXPECT_THAT(err, HasSubstr(bar_file.string())) << err; + + // And no non-stderr output should be produced. + EXPECT_THAT(out, StrEq("")); +} + } // namespace } // namespace Carbon diff --git a/toolchain/driver/clang_subcommand.cpp b/toolchain/driver/clang_subcommand.cpp index 6911cc75f33fe..40e458db23f14 100644 --- a/toolchain/driver/clang_subcommand.cpp +++ b/toolchain/driver/clang_subcommand.cpp @@ -46,8 +46,7 @@ ClangSubcommand::ClangSubcommand() : DriverSubcommand(SubcommandInfo) {} // add more. // https://github.com/llvm/llvm-project/blob/main/clang/tools/driver/driver.cpp auto ClangSubcommand::Run(DriverEnv& driver_env) -> DriverResult { - std::string target = llvm::sys::getDefaultTargetTriple(); - ClangRunner runner(driver_env.installation, target, driver_env.fs, + ClangRunner runner(driver_env.installation, driver_env.fs, driver_env.vlog_stream); // Don't run Clang when fuzzing, it is known to not be reliable under fuzzing diff --git a/toolchain/driver/link_subcommand.cpp b/toolchain/driver/link_subcommand.cpp index 60e53f1369b2f..ef15801f40405 100644 --- a/toolchain/driver/link_subcommand.cpp +++ b/toolchain/driver/link_subcommand.cpp @@ -94,6 +94,11 @@ auto LinkSubcommand::Run(DriverEnv& driver_env) -> DriverResult { // We link using a C++ mode of the driver. clang_args.push_back("--driver-mode=g++"); + // Pass the target down to Clang to pick up the correct defaults. + std::string target_arg = + llvm::formatv("--target={0}", options_.codegen_options.target).str(); + clang_args.push_back(target_arg); + // Use LLD, which we provide in our install directory, for linking. clang_args.push_back("-fuse-ld=lld"); @@ -113,8 +118,8 @@ auto LinkSubcommand::Run(DriverEnv& driver_env) -> DriverResult { clang_args.append(options_.object_filenames.begin(), options_.object_filenames.end()); - ClangRunner runner(driver_env.installation, options_.codegen_options.target, - driver_env.fs, driver_env.vlog_stream); + ClangRunner runner(driver_env.installation, driver_env.fs, + driver_env.vlog_stream); return {.success = runner.Run(clang_args)}; } diff --git a/toolchain/driver/lld_runner_test.cpp b/toolchain/driver/lld_runner_test.cpp index 539bd22ce4920..7b55ee58305f6 100644 --- a/toolchain/driver/lld_runner_test.cpp +++ b/toolchain/driver/lld_runner_test.cpp @@ -85,7 +85,7 @@ static auto CompileTwoSources(const InstallPaths& install_paths, // First compile the two source files to `.o` files with Clang. RawStringOstream verbose_out; auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner clang(&install_paths, target, vfs, &verbose_out); + ClangRunner clang(&install_paths, vfs, &verbose_out); std::string target_arg = llvm::formatv("--target={0}", target).str(); std::string out; std::string err; diff --git a/toolchain/install/BUILD b/toolchain/install/BUILD index a8a10fe297049..aad54f57101a2 100644 --- a/toolchain/install/BUILD +++ b/toolchain/install/BUILD @@ -10,6 +10,7 @@ load("@rules_python//python:defs.bzl", "py_test") load("//bazel/cc_rules:defs.bzl", "cc_binary", "cc_library", "cc_test") load("//bazel/manifest:defs.bzl", "manifest") load("//toolchain/base:llvm_tools.bzl", "LLVM_MAIN_TOOLS", "LLVM_TOOL_ALIASES") +load("//toolchain/base:runtime_sources.bzl", "RUNTIME_SRCS") load("install_filegroups.bzl", "install_filegroup", "install_symlink", "install_target", "make_install_filegroups") load("pkg_helpers.bzl", "pkg_naming_variables", "pkg_tar_and_test") @@ -33,6 +34,7 @@ cc_library( "//common:error", "//toolchain/base:llvm_tools", "@bazel_tools//tools/cpp/runfiles", + "@llvm-project//clang:basic", "@llvm-project//llvm:Support", ], ) @@ -181,6 +183,13 @@ install_dirs = { "lib/carbon/llvm/lib/clang/" + LLVM_VERSION_MAJOR: [ install_filegroup("include", ":clang_headers", "staging/include/"), ], + "lib/carbon/llvm/lib/clang/" + LLVM_VERSION_MAJOR + "/src/builtins": [ + install_target( + src, + "@llvm-project//compiler-rt:lib/builtins/" + src, + ) + for src in RUNTIME_SRCS + ], } make_install_filegroups( diff --git a/toolchain/install/busybox_info.h b/toolchain/install/busybox_info.h index 926350e9e2efc..f23c77b6bafb9 100644 --- a/toolchain/install/busybox_info.h +++ b/toolchain/install/busybox_info.h @@ -35,8 +35,19 @@ struct BusyboxInfo { // If unable to locate a plausible busybox binary, returns an error instead. inline auto GetBusyboxInfo(llvm::StringRef argv0) -> ErrorOr { // Check for an override of `argv[0]` from the environment and apply it. + std::string argv0_override_storage; if (const char* argv0_override = getenv(Argv0OverrideEnv)) { - argv0 = argv0_override; + // Capture the override into local storage here so we can clear it from the + // environment. + argv0_override_storage = argv0_override; + argv0 = argv0_override_storage; + + // Unset the override environment variable so that subsequent subprocessing + // back into the busybox doesn't pick it up and uses the subprocesses's + // argv[0]. This is essential to, for example, allow the `clang` driver to + // subprocess back into a `clang` invocation with an internal CC1 + // commandline. + unsetenv(Argv0OverrideEnv); } BusyboxInfo info = {.bin_path = argv0.str(), .mode = std::nullopt}; diff --git a/toolchain/install/busybox_info_test.cpp b/toolchain/install/busybox_info_test.cpp index dd8010c738152..75710c018103a 100644 --- a/toolchain/install/busybox_info_test.cpp +++ b/toolchain/install/busybox_info_test.cpp @@ -290,8 +290,6 @@ TEST_F(BusyboxInfoTest, RejectSymlinkInUnrelatedInstall) { TEST_F(BusyboxInfoTest, EnvBinaryPathOverride) { // The test should not have this environment variable set. ASSERT_THAT(getenv(Argv0OverrideEnv), Eq(nullptr)); - // Clean up this environment variable when this test finishes. - auto _ = llvm::make_scope_exit([] { unsetenv(Argv0OverrideEnv); }); // Set the environment to our actual busybox. auto busybox = MakeFile(dir_ / "carbon-busybox"); @@ -301,6 +299,9 @@ TEST_F(BusyboxInfoTest, EnvBinaryPathOverride) { ASSERT_TRUE(info.ok()) << info.error(); EXPECT_THAT(info->bin_path, Eq(busybox)); EXPECT_THAT(info->mode, Eq(std::nullopt)); + + // Make sure that we cleaned up the environment afterward. + EXPECT_THAT(getenv(Argv0OverrideEnv), Eq(nullptr)); } } // namespace diff --git a/toolchain/install/install_paths.cpp b/toolchain/install/install_paths.cpp index 0bd18db6e2663..1ee478dfe0871 100644 --- a/toolchain/install/install_paths.cpp +++ b/toolchain/install/install_paths.cpp @@ -7,6 +7,7 @@ #include #include +#include "clang/Basic/Version.h" #include "common/check.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -204,4 +205,22 @@ auto InstallPaths::llvm_tool_path(LLVMTool tool) const -> std::string { return path.str().str(); } +auto InstallPaths::clang_resource_path() const -> std::string { + llvm::SmallString<256> path(prefix_); + // TODO: Adjust this to work equally well on Windows. + llvm::sys::path::append( + path, llvm::sys::path::Style::posix, + "lib/carbon/llvm/lib/clang/" CLANG_VERSION_MAJOR_STRING); + return path.str().str(); +} + +auto InstallPaths::llvm_runtime_srcs() const -> std::string { + llvm::SmallString<256> path(prefix_); + // TODO: Adjust this to work equally well on Windows. + llvm::sys::path::append( + path, llvm::sys::path::Style::posix, + "lib/carbon/llvm/lib/clang/" CLANG_VERSION_MAJOR_STRING "/src"); + return path.str().str(); +} + } // namespace Carbon diff --git a/toolchain/install/install_paths.h b/toolchain/install/install_paths.h index fa3bc9e24f3cd..a1220d06bca35 100644 --- a/toolchain/install/install_paths.h +++ b/toolchain/install/install_paths.h @@ -96,6 +96,12 @@ class InstallPaths { // The path to any of the LLVM tools. auto llvm_tool_path(LLVMTool tool) const -> std::string; + // The path to the Clang resources. + auto clang_resource_path() const -> std::string; + + // The path to the root of LLVM runtime sources. + auto llvm_runtime_srcs() const -> std::string; + private: friend class InstallPathsTestPeer; From 3b4b4e9a9b1e2d727cbd58dcb9cc600692dd75e8 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Apr 2025 20:11:17 -0700 Subject: [PATCH 02/15] Apply suggestions from code review Co-authored-by: Jon Ross-Perkins --- toolchain/base/runtime_sources.bzl | 6 +++--- toolchain/driver/clang_runner.h | 2 +- toolchain/install/busybox_info.h | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/toolchain/base/runtime_sources.bzl b/toolchain/base/runtime_sources.bzl index a9f41f8bfcc94..443285eb5c92a 100644 --- a/toolchain/base/runtime_sources.bzl +++ b/toolchain/base/runtime_sources.bzl @@ -390,13 +390,13 @@ constexpr inline llvm::StringLiteral BuiltinsI386Srcs[] = {{ """ def _make_srcs_strings(dir, srcs): - return "\n".join([' "' + dir + "/" + src + '",' for src in srcs]) + return "\n".join([' "{0}/{1}",'.format(dir, src) for src in srcs]) def _generate_runtime_sources_h_rule(ctx): h_file = ctx.actions.declare_file(ctx.label.name) ctx.actions.write(h_file, _TEMPLATE.format( - crtbegin = '"builtins/' + BUILTINS_CRTBEGIN + '"', - crtend = '"builtins/' + BUILTINS_CRTEND + '"', + crtbegin = '"builtins/{0}"'.format(BUILTINS_CRTBEGIN), + crtend = '"builtins/{0}"'.format(BUILTINS_CRTEND), generic_srcs = _make_srcs_strings("builtins", BUILTINS_GENERIC_SRCS), macos_srcs = _make_srcs_strings("builtins", BUILTINS_MACOS_SRCS), bf16_srcs = _make_srcs_strings("builtins", BUILTINS_BF16_SRCS), diff --git a/toolchain/driver/clang_runner.h b/toolchain/driver/clang_runner.h index 10aa68d6d2982..c38161958d6c1 100644 --- a/toolchain/driver/clang_runner.h +++ b/toolchain/driver/clang_runner.h @@ -61,7 +61,7 @@ class ClangRunner : ToolRunnerBase { // resources. // // This method can be used to avoid building target-dependent resources when - // unnecessary, but not all Clang command lines will work correct. + // unnecessary, but not all Clang command lines will work correctly. // Specifically, compile-only commands will typically work, while linking will // not. auto RunTargetIndependentCommand(llvm::ArrayRef args) diff --git a/toolchain/install/busybox_info.h b/toolchain/install/busybox_info.h index f23c77b6bafb9..41e25adce4929 100644 --- a/toolchain/install/busybox_info.h +++ b/toolchain/install/busybox_info.h @@ -43,9 +43,8 @@ inline auto GetBusyboxInfo(llvm::StringRef argv0) -> ErrorOr { argv0 = argv0_override_storage; // Unset the override environment variable so that subsequent subprocessing - // back into the busybox doesn't pick it up and uses the subprocesses's - // argv[0]. This is essential to, for example, allow the `clang` driver to - // subprocess back into a `clang` invocation with an internal CC1 + // back into the busybox doesn't pick it up. This is essential to, for example, allow the `clang` driver to + // subprocess back into a `clang` busyboxed binary with an internal CC1 // commandline. unsetenv(Argv0OverrideEnv); } From 01fab5ce51448c3ba5745e9de3b222bab4ed45d8 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Apr 2025 22:43:19 -0700 Subject: [PATCH 03/15] Apply suggestions from code review Co-authored-by: Jon Ross-Perkins --- toolchain/driver/clang_runner.cpp | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index 779d66973fc62..93d56f7eb2f4a 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -59,7 +59,7 @@ ClangRunner::ClangRunner(const InstallPaths* install_paths, ClangRunner::~ClangRunner() { if (!tmp_dir_.empty()) { std::error_code ec; - if (auto number = std::filesystem::remove_all(tmp_dir_, ec); number == 0) { + if (std::filesystem::remove_all(tmp_dir_, ec) == 0) { CARBON_VLOG("Expected a temporary directory and found none: {0}", tmp_dir_); } else if (ec) { @@ -118,7 +118,7 @@ static auto IsNonLinkCommand(llvm::ArrayRef args) -> bool { return true; } // Everything else is rare and likely not worth the cost of searching for - // since its fine to have false negatives. + // since it's fine to have false negatives. return false; }); } @@ -167,10 +167,8 @@ auto ClangRunner::BuildTargetResourceDir(llvm::StringRef target) std::filesystem::create_symlink(install_resource_path / "share", resource_dir_path / "share", ec); - std::filesystem::path lib_path = resource_dir_path / "lib"; - std::filesystem::create_directory(lib_path, ec); - lib_path /= target.str(); - std::filesystem::create_directory(lib_path, ec); + std::filesystem::path lib_path = resource_dir_path / "lib" / target.str(); + std::filesystem::create_directories(lib_path, ec); llvm::Triple target_triple(target); CARBON_CHECK(!target_triple.isOSWindows(), @@ -222,18 +220,8 @@ auto ClangRunner::BuildTargetResourceDir(llvm::StringRef target) llvm::SmallVector objs; objs.reserve(src_files.size()); for (llvm::StringRef src_file : src_files) { - llvm::StringRef obj_file = src_file; - std::filesystem::path obj_path = objs_path; - for (;;) { - auto slash_offset = obj_file.find('/'); - if (slash_offset == llvm::StringRef::npos) { - break; - } - obj_path.append(obj_file.begin(), obj_file.begin() + slash_offset); - std::filesystem::create_directory(obj_path, ec); - obj_file = obj_file.drop_front(slash_offset + 1); - } - obj_path.append(obj_file.begin(), obj_file.end()); + std::filesystem::path obj_path = src_file; + std::filesystem::create_directories(obj_path.parent_path(), ec); obj_path += ".o"; BuildBuiltinsFile(target, src_file, obj_path.native()); @@ -410,8 +398,8 @@ auto ClangRunner::MakeTmpDir() -> void { // Make a temporary directory exclusive to this builder. std::filesystem::path tmp_dir; - if (char* tmpdir_env = getenv("TEST_TMPDIR"); tmpdir_env != nullptr) { - tmp_dir = std::string(tmpdir_env); + if (const char* tmpdir_env = getenv("TEST_TMPDIR"); tmpdir_env != nullptr) { + tmp_dir = tmpdir_env; } else { tmp_dir = std::filesystem::temp_directory_path(); } @@ -439,7 +427,7 @@ auto ClangRunner::MakeTmpDir() -> void { name = llvm::formatv("carbon-runtimes.{0:x}.{1}", salt, attempt).str(); } - // WHen we successfully create a temporary directory, commit it to the object. + // When we successfully create a temporary directory, commit it to the object. tmp_dir_ = tmp_dir / name; } From 6aebf14cc3720692fe66225420aba0a5c0f0a5d9 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 22 Apr 2025 07:27:13 +0000 Subject: [PATCH 04/15] review feedback --- toolchain/base/runtime_sources.bzl | 83 ++++++------ toolchain/driver/clang_runner.cpp | 204 +++++++++++++++++------------ toolchain/driver/clang_runner.h | 19 +++ 3 files changed, 184 insertions(+), 122 deletions(-) diff --git a/toolchain/base/runtime_sources.bzl b/toolchain/base/runtime_sources.bzl index 443285eb5c92a..dc468d1cd8ff3 100644 --- a/toolchain/base/runtime_sources.bzl +++ b/toolchain/base/runtime_sources.bzl @@ -2,12 +2,26 @@ # Exceptions. See /LICENSE for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -"""Provides variables and rules to automate working with LLVM's CLI tools.""" +"""Provides variables and rules to work with Clang's runtime library sources. + +These are organized into groups based on the runtime functionality: +- CRT: The C language runtimes not provided by the C standard library, currently + just infrastructure for global initialization and teardown. +- Builtins: The compiler builtins library mirroring `libgcc` that provides + function definitions for operations not reliably available in hardware bet + needed by Clang. + +Future runtimes we plan to add support for but not yet included: +- Libunwind +- Libc++ and libc++abi +- Sanitizers +- Profiling runtimes +""" load("@rules_cc//cc:cc_library.bzl", "cc_library") -BUILTINS_CRTBEGIN = "crtbegin.c" -BUILTINS_CRTEND = "crtend.c" +CRTBEGIN_SRC = "crtbegin.c" +CRTEND_SRC = "crtend.c" BUILTINS_BASE_SRCS = [ # Internal headers. @@ -199,8 +213,22 @@ BUILTINS_TF_SRCS = [ ] BUILTINS_HOSTED_SRCS = [ - #"emutls.c", - #"enable_execute_stack.c", + # Note that LLVM's CompilerRT provides a few hosted sources that we don't + # currently build: + # - `emutls.c`: Unclear we need to support targets with software emulated + # TLS rather than hardware support. + # - `enable_execute_stack.c`: Used to implemnet support for a builtin that + # marks part of the stack as *executable* to support the GCC extension of + # nested functions. This extension was never implemneted in Clang, and is + # generally considered a security issue to include. We expect to be able + # to avoid even linking the support code for this into binaries at this + # point. + # - `eprintf.c`: This provided a legacy `__eprintf` builtin used by old + # versions of `assert.h` in its macros, but does not appear to be needed + # when building with modern versions of this header. + # + # We keep the hosted source list in case other sources are added to the + # hosted set that we _do_ want to support. ] BUILTINS_ATOMIC_SRCS = [ @@ -297,8 +325,8 @@ BUILTINS_I386_SRCS = [ ] RUNTIME_SRCS = [ - BUILTINS_CRTBEGIN, - BUILTINS_CRTEND, + CRTBEGIN_SRC, + CRTEND_SRC, ] + [ # Flatten the individual lists of source files into one main list. We use a # nested loop list comprehension as it formats a bit more cleanly than `+`. @@ -316,33 +344,13 @@ RUNTIME_SRCS = [ for src in srcs ] -# "aarch64-unknown-linux-gnu": BUILTINS_GENERIC_SRCS + BUILTINS_BF16_SRCS + BUILTINS_TF_SRCS -# "x86_64-unknown-linux-gnu": BUILTINS_GENERIC_SRCS + BUILTINS_BF16_SRCS + BUILTINS_TF_SRCS + BUILTINS_X86_ARCH_SRCS + BUILTINS_X86_FP80_SRCS + [ -# "i386-unknown-linux-gnu": BUILTINS_GENERIC_SRCS + BUILTINS_BF16_SRCS + BUILTINS_X86_ARCH_SRCS + BUILTINS_X86_FP80_SRCS + [ - -#[carbon_cc_runtime_static_library( -# name = target + "/libclang_rt.builtins.a", -# srcs = ["@llvm-project//compiler-rt:lib/builtins/" + src for src in BUILTINS_TARGET_SRCS[target]], -# copts = [ -# "--target=" + target, -# "-std=c11", -# "-fPIC", -# "-ffreestanding", -# "-fno-builtin", -# "-fvisibility=hidden", -# "-fomit-frame-pointer", -# ], -#) for target in TARGETS] -# The main LLVM command line tools, including their "primary" name, binary name, -# and the library dependency required to use them. - _TEMPLATE = """ // Part of the Carbon Language project, under the Apache License v2.0 with LLVM // Exceptions. See /LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Generated header file of strings describing the runtime library sources -// needed by various Carbon runtime libraries. +// Generated header file of strings describing the Clang runtime library source +// files. // // See toolchain/driver/runtime_sources.bzl for more details. @@ -395,8 +403,8 @@ def _make_srcs_strings(dir, srcs): def _generate_runtime_sources_h_rule(ctx): h_file = ctx.actions.declare_file(ctx.label.name) ctx.actions.write(h_file, _TEMPLATE.format( - crtbegin = '"builtins/{0}"'.format(BUILTINS_CRTBEGIN), - crtend = '"builtins/{0}"'.format(BUILTINS_CRTEND), + crtbegin = '"builtins/{0}"'.format(CRTBEGIN_SRC), + crtend = '"builtins/{0}"'.format(CRTEND_SRC), generic_srcs = _make_srcs_strings("builtins", BUILTINS_GENERIC_SRCS), macos_srcs = _make_srcs_strings("builtins", BUILTINS_MACOS_SRCS), bf16_srcs = _make_srcs_strings("builtins", BUILTINS_BF16_SRCS), @@ -415,17 +423,14 @@ generate_runtime_sources_h = rule( ) def generate_runtime_sources_cc_library(name, **kwargs): - """Generates the LLVM tools `.def` file. + """Generates a `runtime_sources.h` header and a `cc_library` rule for it. - This first generates the `.def` file into the `out` filename, and then - synthesizes a `cc_library` rule exporting that file in its `textual_hdrs`. + This first generates the header file with variables describing the runtime + sources from Clang, and then a `cc_library` that exports that header. The `cc_library` rule name is the provided `name` and should be depended on - by code that includes the generated file. The `kwargs` are expanded into the - `cc_library` in case other attributes need to be configured there. - - The two-step process is necessary to avoid trying to compile or otherwise - process the generated file as something other than a textual header. + by code that includes the generated header. The `kwargs` are expanded into + the `cc_library` in case other attributes need to be configured there. """ generate_runtime_sources_h(name = "runtime_sources.h") cc_library( diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index 93d56f7eb2f4a..a105748ab9e9a 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -69,19 +69,43 @@ ClangRunner::~ClangRunner() { } } -static auto ComputeTarget(llvm::ArrayRef args) -> std::string { +// Searches an argument list to a Clang execution to determine the expected +// target string, suitable for use with `llvm::Triple`, should be used. +// +// If no explicit target flags are present, this defaults to the default +// LLVM target. +// +// Works to handle the most common flags that modify the expected target such as +// well as direct target flags. +// +// Note: this has known fidelity issues if the args include separate-value flags +// (`--flag value` style as opposed to `--flag=value`) where the value might +// match the spelling of one of the target flags. For example, args that include +// an output file spelled `-m32` (so `-o` followed by `-m32`) will be +// misinterpreted by considering the value to itself be a flag. Addressing this +// would add substantial complexity, including likely parsing the entire args +// twice with the Clang driver. Instead, our current plan is to document this +// limitation and encourage the use of flags with joined values +// (`--flag=value`). +static auto ComputeClangTarget(llvm::ArrayRef args) + -> std::string { std::string target = llvm::sys::getDefaultTargetTriple(); + bool explicit_target = false; for (auto [i, arg] : llvm::enumerate(args)) { if (llvm::StringRef arg_copy = arg; arg_copy.consume_front("--target=")) { target = arg_copy.str(); + explicit_target = true; } else if ((arg == "--target" || arg == "-target") && (i + 1) < args.size()) { target = args[i + 1].str(); - } else if (arg == "--driver-mode=cl" || - ((arg == "--driver-mode" || arg == "-driver-mode") && - (i + 1) < args.size() && args[i + 1] == "cl")) { - // The `cl.exe` compatible driver mode should force to a - // `...-pc-windows-msvc` target. + explicit_target = true; + } else if (!explicit_target && + (arg == "--driver-mode=cl" || + ((arg == "--driver-mode" || arg == "-driver-mode") && + (i + 1) < args.size() && args[i + 1] == "cl"))) { + // The `cl.exe` compatible driver mode should switch the default target to + // a `...-pc-windows-msvc` target. However, a subsequent explicit target + // should override this. llvm::Triple triple(target); triple.setVendor(llvm::Triple::PC); triple.setOS(llvm::Triple::Win32); @@ -108,17 +132,11 @@ static auto ComputeTarget(llvm::ArrayRef args) -> std::string { static auto IsNonLinkCommand(llvm::ArrayRef args) -> bool { return llvm::any_of(args, [](llvm::StringRef arg) { // Only check the most common cases as we have to do this for each argument. - if (arg.empty() || arg[0] != '-') { - return false; - } - if (arg.size() == 2) { - return arg[1] == 'c' || arg[1] == 'E' || arg[1] == 'S'; - } - if (arg == "-fsyntax-only" || arg == "--version" || "--help" || "/?") { - return true; - } // Everything else is rare and likely not worth the cost of searching for // since it's fine to have false negatives. + return arg == "-c" || arg == "-E" || arg == "-S" || + arg == "-fsyntax-only" || arg == "--version" || arg == "--help" || + arg == "/?"; return false; }); } @@ -134,26 +152,26 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { // Otherwise, we need to build a target resource directory. CARBON_VLOG("Building target resource dir...\n"); - - // Disable any leaking of memory while building the target resource dir, and - // restore the previous setting at the end. - auto restore_leak_flag = llvm::make_scope_exit( - [&, orig_flag = enable_leaking_] { enable_leaking_ = orig_flag; }); - enable_leaking_ = false; - - std::string target = ComputeTarget(args); + std::string target = ComputeClangTarget(args); std::string resource_dir_path = BuildTargetResourceDir(target); + return RunInternal(args, target, resource_dir_path); } auto ClangRunner::RunTargetIndependentCommand( llvm::ArrayRef args) -> bool { - std::string target = ComputeTarget(args); + std::string target = ComputeClangTarget(args); return RunInternal(args, target, std::nullopt); } auto ClangRunner::BuildTargetResourceDir(llvm::StringRef target) -> std::string { + // Disable any leaking of memory while building the target resource dir, and + // restore the previous setting at the end. + auto restore_leak_flag = llvm::make_scope_exit( + [&, orig_flag = enable_leaking_] { enable_leaking_ = orig_flag; }); + enable_leaking_ = false; + MakeTmpDir(); std::filesystem::path resource_dir_path = tmp_dir_ / "clang"; std::error_code ec; @@ -184,65 +202,11 @@ auto ClangRunner::BuildTargetResourceDir(llvm::StringRef target) BuildCrtFile(target, RuntimeSources::CrtEnd, end_o_path.native()); } - llvm::SmallVector src_files; - auto append_src_files = [&src_files](auto input_srcs) { - for (llvm::StringRef input_src : input_srcs) { - if (input_src.ends_with(".c") || input_src.ends_with(".S")) { - src_files.push_back(input_src); - } - } - }; - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsGenericSrcs)); - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsBf16Srcs)); - if (target_triple.isArch64Bit()) { - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsTfSrcs)); - } - if (target_triple.isAArch64()) { - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsAarch64Srcs)); - } else if (target_triple.isX86()) { - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86ArchSrcs)); - if (target_triple.isArch64Bit()) { - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86_64Srcs)); - } else { - CARBON_CHECK( - target_triple.isArch32Bit(), - "The Carbon toolchain doesn't currently support 16-bit x86."); - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsI386Srcs)); - } - } else { - CARBON_FATAL("Target architecture is not supported: {0}", target); - } - // We need a separate directory for object files used in the builtins archive. std::filesystem::path objs_path = tmp_dir_ / "_objs"; std::filesystem::create_directory(objs_path, ec); - - llvm::SmallVector objs; - objs.reserve(src_files.size()); - for (llvm::StringRef src_file : src_files) { - std::filesystem::path obj_path = src_file; - std::filesystem::create_directories(obj_path.parent_path(), ec); - obj_path += ".o"; - BuildBuiltinsFile(target, src_file, obj_path.native()); - - llvm::Expected obj = - llvm::NewArchiveMember::getFile(obj_path.native(), - /*Deterministic=*/true); - CARBON_CHECK(obj, "TODO: Diagnose this: {0}", - llvm::fmt_consume(obj.takeError())); - objs.push_back(std::move(*obj)); - } - - // Now build an archive out of the `.o` files for the builtins. std::filesystem::path builtins_a_path = lib_path / "libclang_rt.builtins.a"; - llvm::raw_fd_ostream builtins_a_os(builtins_a_path.native(), ec); - CARBON_CHECK(!ec, "Unable to open archive for writing: {0}", ec.message()); - llvm::Error archive_err = llvm::writeArchiveToStream( - builtins_a_os, objs, llvm::SymtabWritingMode::NormalSymtab, - target_triple.isOSDarwin() ? llvm::object::Archive::K_DARWIN - : llvm::object::Archive::K_GNU, - /*Deterministic=*/true, /*Thin=*/false); - CARBON_CHECK(!archive_err, "Error writing archive: {0}", archive_err); + BuildBuiltinsLib(target, target_triple, objs_path, builtins_a_path.native()); return resource_dir_path; } @@ -387,10 +351,6 @@ auto ClangRunner::RunInternal( return result == 0 && failing_commands.empty(); } -// TODO: Replace this with a filesystem cache, and proper management of -// concurrent cache access. There are a bunch of hacks in this function that -// aren't realistically reliable for larger use, and only designed as in interim -// state to separate the implementation of the caching behavior. auto ClangRunner::MakeTmpDir() -> void { if (!tmp_dir_.empty()) { return; @@ -455,6 +415,44 @@ auto ClangRunner::BuildCrtFile(llvm::StringRef target, llvm::StringRef src_file, })); } +auto ClangRunner::CollectBuiltinsSrcFiles(const llvm::Triple& target_triple) + -> llvm::SmallVector { + llvm::SmallVector src_files; + auto append_src_files = [&src_files](auto input_srcs) { + for (llvm::StringRef input_src : input_srcs) { + if (input_src.ends_with(".c") || input_src.ends_with(".S")) { + src_files.push_back(input_src); + } + } + }; + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsGenericSrcs)); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsBf16Srcs)); + if (target_triple.isArch64Bit()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsTfSrcs)); + } + if (target_triple.isAArch64()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsAarch64Srcs)); + } else if (target_triple.isX86()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86ArchSrcs)); + if (target_triple.isArch64Bit()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86_64Srcs)); + } else { + // TODO: This should be turned into a nice user-facing diagnostic about an + // unsupported target. + CARBON_CHECK( + target_triple.isArch32Bit(), + "The Carbon toolchain doesn't currently support 16-bit x86."); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsI386Srcs)); + } + } else { + // TODO: This should be turned into a nice user-facing diagnostic about an + // unsupported target. + CARBON_FATAL("Target architecture is not supported: {0}", + target_triple.str()); + } + return src_files; +} + auto ClangRunner::BuildBuiltinsFile(llvm::StringRef target, llvm::StringRef src_file, llvm::StringRef out_path) -> void { @@ -481,4 +479,44 @@ auto ClangRunner::BuildBuiltinsFile(llvm::StringRef target, })); } +auto ClangRunner::BuildBuiltinsLib(llvm::StringRef target, + const llvm::Triple& target_triple, + const std::filesystem::path& objs_path, + llvm::StringRef out_path) -> void { + llvm::SmallVector src_files = + CollectBuiltinsSrcFiles(target_triple); + + llvm::SmallVector objs; + objs.reserve(src_files.size()); + for (llvm::StringRef src_file : src_files) { + std::filesystem::path obj_path = objs_path / src_file.str(); + std::error_code ec; + std::filesystem::create_directories(obj_path.parent_path(), ec); + obj_path += ".o"; + BuildBuiltinsFile(target, src_file, obj_path.native()); + + llvm::Expected obj = + llvm::NewArchiveMember::getFile(obj_path.native(), + /*Deterministic=*/true); + CARBON_CHECK(obj, "TODO: Diagnose this: {0}", + llvm::fmt_consume(obj.takeError())); + objs.push_back(std::move(*obj)); + } + + // Now build an archive out of the `.o` files for the builtins. + std::error_code ec; + llvm::raw_fd_ostream builtins_a_os(out_path, ec); + // TODO: Here and elsewhere we check for filesystem errors, and we should + // revisit this with some more structured approach for reporting errors due to + // a broken environment (filesystem misbehavior) as opposed to user-error. A + // check failure and stack trace is likely not how we want to handle that. + CARBON_CHECK(!ec, "Unable to open archive for writing: {0}", ec.message()); + llvm::Error archive_err = llvm::writeArchiveToStream( + builtins_a_os, objs, llvm::SymtabWritingMode::NormalSymtab, + target_triple.isOSDarwin() ? llvm::object::Archive::K_DARWIN + : llvm::object::Archive::K_GNU, + /*Deterministic=*/true, /*Thin=*/false); + CARBON_CHECK(!archive_err, "Error writing archive: {0}", archive_err); +} + } // namespace Carbon diff --git a/toolchain/driver/clang_runner.h b/toolchain/driver/clang_runner.h index c38161958d6c1..cea86fbdfcf34 100644 --- a/toolchain/driver/clang_runner.h +++ b/toolchain/driver/clang_runner.h @@ -12,6 +12,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/VirtualFileSystem.h" +#include "llvm/TargetParser/Triple.h" #include "toolchain/driver/tool_runner_base.h" #include "toolchain/install/install_paths.h" @@ -91,14 +92,32 @@ class ClangRunner : ToolRunnerBase { std::optional target_resource_dir_path) -> bool; + // TODO: Replace this with a filesystem cache, and proper management of + // concurrent cache access. There are a bunch of hacks in this function that + // aren't realistically reliable for larger use, and only designed as in + // interim state to separate the implementation of the caching behavior. auto MakeTmpDir() -> void; + // Helper to compile a single file of the CRT runtimes. auto BuildCrtFile(llvm::StringRef target, llvm::StringRef src_file, llvm::StringRef out_path) -> void; + // Returns the target-specific source files for the builtins runtime library. + auto CollectBuiltinsSrcFiles(const llvm::Triple& target_triple) + -> llvm::SmallVector; + + // Helper to compile a single file of the compiler builtins runtimes. auto BuildBuiltinsFile(llvm::StringRef target, llvm::StringRef src_file, llvm::StringRef out_path) -> void; + // Builds the builtins runtime library into the provided archive file path, + // using the provided objects path for intermediate object files. + auto BuildBuiltinsLib(llvm::StringRef target, + const llvm::Triple& target_triple, + const std::filesystem::path& objs_path, + llvm::StringRef out_path) -> void; + + // A temp directory for this instance, will be cleaned up on destruction. std::filesystem::path tmp_dir_; llvm::IntrusiveRefCntPtr fs_; From 694757421e3d910ff9c9a7708df5e2c797ea3fd9 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 22 Apr 2025 07:31:00 +0000 Subject: [PATCH 05/15] format --- toolchain/install/busybox_info.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/toolchain/install/busybox_info.h b/toolchain/install/busybox_info.h index 41e25adce4929..7ad54d90bc08d 100644 --- a/toolchain/install/busybox_info.h +++ b/toolchain/install/busybox_info.h @@ -43,9 +43,9 @@ inline auto GetBusyboxInfo(llvm::StringRef argv0) -> ErrorOr { argv0 = argv0_override_storage; // Unset the override environment variable so that subsequent subprocessing - // back into the busybox doesn't pick it up. This is essential to, for example, allow the `clang` driver to - // subprocess back into a `clang` busyboxed binary with an internal CC1 - // commandline. + // back into the busybox doesn't pick it up. This is essential to, for + // example, allow the `clang` driver to subprocess back into a `clang` + // busyboxed binary with an internal CC1 commandline. unsetenv(Argv0OverrideEnv); } From 40486d5cec23e91af98693c1ab350bb892ac2a79 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 22 Apr 2025 07:58:45 +0000 Subject: [PATCH 06/15] fix --- toolchain/driver/clang_runner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index a105748ab9e9a..5b5aa207c4f43 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -137,7 +137,7 @@ static auto IsNonLinkCommand(llvm::ArrayRef args) -> bool { return arg == "-c" || arg == "-E" || arg == "-S" || arg == "-fsyntax-only" || arg == "--version" || arg == "--help" || arg == "/?"; - return false; + return true; }); } From 49441ab270a8cac072bbbfc76c63868dab88aa72 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 22 Apr 2025 13:16:27 -0700 Subject: [PATCH 07/15] Apply suggestions from code review Co-authored-by: Jon Ross-Perkins --- toolchain/driver/clang_runner.cpp | 5 ++--- toolchain/driver/clang_runner.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index 5b5aa207c4f43..6a2c16b68d35a 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -70,12 +70,12 @@ ClangRunner::~ClangRunner() { } // Searches an argument list to a Clang execution to determine the expected -// target string, suitable for use with `llvm::Triple`, should be used. +// target string, suitable for use with `llvm::Triple`. // // If no explicit target flags are present, this defaults to the default // LLVM target. // -// Works to handle the most common flags that modify the expected target such as +// Works to handle the most common flags that modify the expected target as // well as direct target flags. // // Note: this has known fidelity issues if the args include separate-value flags @@ -137,7 +137,6 @@ static auto IsNonLinkCommand(llvm::ArrayRef args) -> bool { return arg == "-c" || arg == "-E" || arg == "-S" || arg == "-fsyntax-only" || arg == "--version" || arg == "--help" || arg == "/?"; - return true; }); } diff --git a/toolchain/driver/clang_runner.h b/toolchain/driver/clang_runner.h index cea86fbdfcf34..1488777a99710 100644 --- a/toolchain/driver/clang_runner.h +++ b/toolchain/driver/clang_runner.h @@ -117,7 +117,7 @@ class ClangRunner : ToolRunnerBase { const std::filesystem::path& objs_path, llvm::StringRef out_path) -> void; - // A temp directory for this instance, will be cleaned up on destruction. + // If non-empty, a temp directory for this instance which will be cleaned up on destruction. Set by `MakeTmpDir`. std::filesystem::path tmp_dir_; llvm::IntrusiveRefCntPtr fs_; From 19efda81b251a3dc8529e69888e2606b292fbf5a Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Apr 2025 06:52:28 +0000 Subject: [PATCH 08/15] Restructure to address review feedback. --- MODULE.bazel | 2 +- ...t_for_custom_rules_to_build_builtins.patch | 26 -- ...ups_for_compiler_rt_builtins_runtime.patch | 193 +++++++++ toolchain/base/BUILD | 4 +- toolchain/base/runtime_sources.bzl | 401 +++--------------- toolchain/driver/clang_runner.cpp | 35 +- toolchain/driver/clang_runner_test.cpp | 10 +- toolchain/install/BUILD | 20 +- 8 files changed, 301 insertions(+), 390 deletions(-) delete mode 100644 bazel/llvm_project/0004_Add_support_for_custom_rules_to_build_builtins.patch create mode 100644 bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch diff --git a/MODULE.bazel b/MODULE.bazel index 220b021ae409d..6f02f0d25079c 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -126,7 +126,7 @@ http_archive( "@carbon//bazel/llvm_project:0001_Patch_for_mallinfo2_when_using_Bazel_build_system.patch", "@carbon//bazel/llvm_project:0002_Added_Bazel_build_for_compiler_rt_fuzzer.patch", "@carbon//bazel/llvm_project:0003_Comment_out_unloaded_proto_library_dependencies.patch", - "@carbon//bazel/llvm_project:0004_Add_support_for_custom_rules_to_build_builtins.patch", + "@carbon//bazel/llvm_project:0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch", ], sha256 = "8466760c8d69c5d3a1d2561813f47fa9a6962076adfb2b3f7aa0a69417b36c52", strip_prefix = "llvm-project-{0}".format(llvm_project_version), diff --git a/bazel/llvm_project/0004_Add_support_for_custom_rules_to_build_builtins.patch b/bazel/llvm_project/0004_Add_support_for_custom_rules_to_build_builtins.patch deleted file mode 100644 index 21c686c980b49..0000000000000 --- a/bazel/llvm_project/0004_Add_support_for_custom_rules_to_build_builtins.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 0a46fc798170e76a0110bf118ae53db7a8c10b27 Mon Sep 17 00:00:00 2001 -From: Chandler Carruth -Date: Fri, 14 Feb 2025 01:08:54 +0000 -Subject: [PATCH 4/4] Add support for custom rules to build builtins - -This exposes the compiler-rt files for any custom rules that we need to -write to build and install them for the Carbon toolchain. ---- - utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel -index 00f2a3b9d7c0..a53a34cdc952 100644 ---- a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel -+++ b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel -@@ -127,3 +127,7 @@ cc_library( - ], - includes = ["lib/fuzzer"], - ) -+ -+# Allow building custom rules for runtimes. -+exports_files(glob(["**"])) -+ --- -2.48.1.601.g30ceb7b040-goog - diff --git a/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch b/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch new file mode 100644 index 0000000000000..61d5a1b5be4a9 --- /dev/null +++ b/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch @@ -0,0 +1,193 @@ +From 53747ea5328ec878fa019961a59ca74eb539d1c1 Mon Sep 17 00:00:00 2001 +From: Chandler Carruth +Date: Thu, 24 Apr 2025 05:03:43 +0000 +Subject: [PATCH] Introduce filegroups for compiler-rt builtins runtimes + +These filegroups allow downstream projects to package and build +customized runtime libraries. + +The filegroups work hard to use globs and a careful structuring to +create the structured breakdown of sources needed to target different +architectures and platforms without having to maintain a complete +parallel list of sources from CMake. +--- + .../compiler-rt/BUILD.bazel | 163 +++++++++++++++++- + 1 file changed, 160 insertions(+), 3 deletions(-) + +diff --git a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +index 6a6f58995df3..3b930372a54d 100644 +--- a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel ++++ b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +@@ -122,9 +122,166 @@ cc_library( + "lib/fuzzer/Fuzzer*.def", + ]), + copts = [ +- # Not using no-sanitize=address per https://github.com/google/sanitizers/wiki/AddressSanitizerContainerOverflow#false-positives +- "-fno-sanitize=memory,thread,undefined", +- "-fsanitize-coverage=0", ++ # Not using no-sanitize=address per https://github.com/google/sanitizers/wiki/AddressSanitizerContainerOverflow#false-positives ++ "-fno-sanitize=memory,thread,undefined", ++ "-fsanitize-coverage=0", + ], + includes = ["lib/fuzzer"], + ) ++ ++BUILTINS_CRTBEGIN_SRCS = ["lib/builtins/crtbegin.c"] ++ ++filegroup( ++ name = "builtins_crtbegin_src", ++ srcs = BUILTINS_CRTBEGIN_SRCS, ++) ++ ++BUILTINS_CRTEND_SRCS = ["lib/builtins/crtend.c"] ++ ++filegroup( ++ name = "builtins_crtend_src", ++ srcs = BUILTINS_CRTEND_SRCS, ++) ++ ++# Note that while LLVM's CompilerRT provides a few hosted sources, we don't ++# currently build them: ++# ++# - `emutls.c`: Unclear we need to support targets with software emulated ++# TLS rather than hardware support. ++# - `enable_execute_stack.c`: Used to implement support for a builtin that ++# marks part of the stack as *executable* to support the GCC extension of ++# nested functions. This extension was never implemented in Clang, and is ++# generally considered a security issue to include. We expect to be able ++# to avoid even linking the support code for this into binaries at this ++# point. ++# - `eprintf.c`: This provided a legacy `__eprintf` builtin used by old ++# versions of `assert.h` in its macros, but does not appear to be needed ++# when building with modern versions of this header. ++BUILTINS_HOSTED_SRCS = [ ++ "lib/builtins/emutls.c", ++ "lib/builtins/enable_execute_stack.c", ++ "lib/builtins/eprintf.c", ++] ++ ++filegroup( ++ name = "builtins_hosted_srcs", ++ srcs = BUILTINS_HOSTED_SRCS, ++) ++ ++BUILTINS_BF16_SRCS_PATTERNS = [ ++ # `bf` marks 16-bit Brain floating-point number builtins. ++ "lib/builtins/*bf*.c", ++] ++ ++filegroup( ++ name = "builtins_bf16_srcs", ++ srcs = glob(BUILTINS_BF16_SRCS_PATTERNS), ++) ++ ++BUILTINS_X86_FP80_SRCS_PATTERNS = [ ++ # `xc` marks 80-bit complex number builtins. ++ "lib/builtins/*xc*.c", ++ ++ # `xf` marks 80-bit floating-point builtins. ++ "lib/builtins/*xf*.c", ++] ++ ++filegroup( ++ name = "builtins_x86_fp80_srcs", ++ srcs = glob( ++ BUILTINS_X86_FP80_SRCS_PATTERNS, ++ exclude = BUILTINS_BF16_SRCS_PATTERNS, ++ ), ++) ++ ++BUILTINS_TF_SRCS_PATTERNS = [ ++ # `tc` marks 128-bit complex number builtins. ++ "lib/builtins/*tc*.c", ++ ++ # `tf` marks 128-bit floating-point builtins. ++ "lib/builtins/*tf*.c", ++] ++ ++BUILTINS_TF_EXCLUDES = BUILTINS_HOSTED_SRCS + BUILTINS_BF16_SRCS_PATTERNS + BUILTINS_X86_FP80_SRCS_PATTERNS ++ ++filegroup( ++ name = "builtins_tf_srcs", ++ srcs = glob( ++ BUILTINS_TF_SRCS_PATTERNS, ++ exclude = BUILTINS_TF_EXCLUDES, ++ ), ++) ++ ++BUILTINS_MACOS_ATOMIC_SRCS_PATTERNS = [ ++ "lib/builtins/atomic_*.c", ++] ++ ++filegroup( ++ name = "builtins_macos_atomic_srcs", ++ srcs = glob(BUILTINS_MACOS_ATOMIC_SRCS_PATTERNS), ++) ++ ++filegroup( ++ name = "builtins_aarch64_srcs", ++ srcs = [ ++ "lib/builtins/cpu_model/aarch64.c", ++ "lib/builtins/cpu_model/aarch64.h", ++ ] + glob( ++ [ ++ "lib/builtins/cpu_model/AArch64*.inc", ++ "lib/builtins/cpu_model/aarch64/**/*.inc", ++ "lib/builtins/aarch64/*.S", ++ "lib/builtins/aarch64/*.c", ++ ], ++ exclude = [ ++ # This file isn't intended to directly compile, but to be used to ++ # generate a collection of outline atomic helpers. ++ # TODO: Add support for generating the sources for these helpers if ++ # there are users that need this functionality from the builtins ++ # library. ++ "lib/builtins/aarch64/lse.S", ++ ], ++ ), ++) ++ ++filegroup( ++ name = "builtins_x86_arch_srcs", ++ srcs = [ ++ "lib/builtins/cpu_model/x86.c", ++ "lib/builtins/i386/fp_mode.c", ++ ], ++) ++ ++filegroup( ++ name = "builtins_x86_64_srcs", ++ srcs = glob([ ++ "lib/builtins/x86_64/*.c", ++ "lib/builtins/x86_64/*.S", ++ ]), ++) ++ ++filegroup( ++ name = "builtins_i386_srcs", ++ srcs = glob( ++ [ ++ "lib/builtins/i386/*.c", ++ "lib/builtins/i386/*.S", ++ ], ++ exclude = [ ++ # This file is used for both i386 and x86_64. ++ "lib/builtins/i386/fp_mode.c", ++ ], ++ ), ++) ++ ++filegroup( ++ name = "builtins_generic_srcs", ++ srcs = ["lib/builtins/cpu_model/cpu_model.h"] + glob( ++ [ ++ "lib/builtins/*.c", ++ "lib/builtins/*.h", ++ "lib/builtins/*.inc", ++ ], ++ exclude = BUILTINS_CRTBEGIN_SRCS + BUILTINS_CRTEND_SRCS + BUILTINS_TF_EXCLUDES + BUILTINS_TF_SRCS_PATTERNS + BUILTINS_MACOS_ATOMIC_SRCS_PATTERNS, ++ ), ++) +-- +2.49.0.805.g082f7c87e0-goog + diff --git a/toolchain/base/BUILD b/toolchain/base/BUILD index d7215f46f855a..ed72a1eb740cb 100644 --- a/toolchain/base/BUILD +++ b/toolchain/base/BUILD @@ -174,9 +174,7 @@ cc_library( ] + [info.lib for info in LLVM_MAIN_TOOLS.values()], ) -generate_runtime_sources_cc_library( - name = "runtime_sources", -) +generate_runtime_sources_cc_library(name = "runtime_sources") cc_library( name = "shared_value_stores", diff --git a/toolchain/base/runtime_sources.bzl b/toolchain/base/runtime_sources.bzl index dc468d1cd8ff3..16a8dadc35f36 100644 --- a/toolchain/base/runtime_sources.bzl +++ b/toolchain/base/runtime_sources.bzl @@ -20,329 +20,22 @@ Future runtimes we plan to add support for but not yet included: load("@rules_cc//cc:cc_library.bzl", "cc_library") -CRTBEGIN_SRC = "crtbegin.c" -CRTEND_SRC = "crtend.c" - -BUILTINS_BASE_SRCS = [ - # Internal headers. - "assembly.h", - "cpu_model/cpu_model.h", - "fp_extend.h", - "fp_lib.h", - "fp_mode.h", - "fp_trunc.h", - "int_endianness.h", - "int_lib.h", - "int_math.h", - "int_to_fp.h", - "int_types.h", - "int_util.h", - "unwind-ehabi-helpers.h", - - # Internal textually included files. - "fp_add_impl.inc", - "fp_compare_impl.inc", - "fp_div_impl.inc", - "fp_extend_impl.inc", - "fp_fixint_impl.inc", - "fp_fixuint_impl.inc", - "fp_mul_impl.inc", - "fp_trunc_impl.inc", - "int_div_impl.inc", - "int_mulo_impl.inc", - "int_mulv_impl.inc", - "int_to_fp_impl.inc", - - # Source files. - "absvdi2.c", - "absvsi2.c", - "absvti2.c", - "adddf3.c", - "addsf3.c", - "addvdi3.c", - "addvsi3.c", - "addvti3.c", - "apple_versioning.c", - "ashldi3.c", - "ashlti3.c", - "ashrdi3.c", - "ashrti3.c", - "bswapdi2.c", - "bswapsi2.c", - "clzdi2.c", - "clzsi2.c", - "clzti2.c", - "cmpdi2.c", - "cmpti2.c", - "comparedf2.c", - "comparesf2.c", - "ctzdi2.c", - "ctzsi2.c", - "ctzti2.c", - "divdc3.c", - "divdf3.c", - "divdi3.c", - "divmoddi4.c", - "divmodsi4.c", - "divmodti4.c", - "divsc3.c", - "divsf3.c", - "divsi3.c", - "divti3.c", - "extendsfdf2.c", - "extendhfsf2.c", - "ffsdi2.c", - "ffssi2.c", - "ffsti2.c", - "fixdfdi.c", - "fixdfsi.c", - "fixdfti.c", - "fixsfdi.c", - "fixsfsi.c", - "fixsfti.c", - "fixunsdfdi.c", - "fixunsdfsi.c", - "fixunsdfti.c", - "fixunssfdi.c", - "fixunssfsi.c", - "fixunssfti.c", - "floatdidf.c", - "floatdisf.c", - "floatsidf.c", - "floatsisf.c", - "floattidf.c", - "floattisf.c", - "floatundidf.c", - "floatundisf.c", - "floatunsidf.c", - "floatunsisf.c", - "floatuntidf.c", - "floatuntisf.c", - "fp_mode.c", - "int_util.c", - "lshrdi3.c", - "lshrti3.c", - "moddi3.c", - "modsi3.c", - "modti3.c", - "muldc3.c", - "muldf3.c", - "muldi3.c", - "mulodi4.c", - "mulosi4.c", - "muloti4.c", - "mulsc3.c", - "mulsf3.c", - "multi3.c", - "mulvdi3.c", - "mulvsi3.c", - "mulvti3.c", - "negdf2.c", - "negdi2.c", - "negsf2.c", - "negti2.c", - "negvdi2.c", - "negvsi2.c", - "negvti2.c", - "os_version_check.c", - "paritydi2.c", - "paritysi2.c", - "parityti2.c", - "popcountdi2.c", - "popcountsi2.c", - "popcountti2.c", - "powidf2.c", - "powisf2.c", - "subdf3.c", - "subsf3.c", - "subvdi3.c", - "subvsi3.c", - "subvti3.c", - "trampoline_setup.c", - "truncdfhf2.c", - "truncdfsf2.c", - "truncsfhf2.c", - "ucmpdi2.c", - "ucmpti2.c", - "udivdi3.c", - "udivmoddi4.c", - "udivmodsi4.c", - "udivmodti4.c", - "udivsi3.c", - "udivti3.c", - "umoddi3.c", - "umodsi3.c", - "umodti3.c", -] - -BUILTINS_BF16_SRCS = [ - "extendbfsf2.c", - "truncdfbf2.c", - "truncxfbf2.c", - "truncsfbf2.c", - "trunctfbf2.c", -] - -BUILTINS_TF_SRCS = [ - "addtf3.c", - "comparetf2.c", - "divtc3.c", - "divtf3.c", - "extenddftf2.c", - "extendhftf2.c", - "extendsftf2.c", - "fixtfdi.c", - "fixtfsi.c", - "fixtfti.c", - "fixunstfdi.c", - "fixunstfsi.c", - "fixunstfti.c", - "floatditf.c", - "floatsitf.c", - "floattitf.c", - "floatunditf.c", - "floatunsitf.c", - "floatuntitf.c", - "multc3.c", - "multf3.c", - "powitf2.c", - "subtf3.c", - "trunctfdf2.c", - "trunctfhf2.c", - "trunctfsf2.c", -] - -BUILTINS_HOSTED_SRCS = [ - # Note that LLVM's CompilerRT provides a few hosted sources that we don't - # currently build: - # - `emutls.c`: Unclear we need to support targets with software emulated - # TLS rather than hardware support. - # - `enable_execute_stack.c`: Used to implemnet support for a builtin that - # marks part of the stack as *executable* to support the GCC extension of - # nested functions. This extension was never implemneted in Clang, and is - # generally considered a security issue to include. We expect to be able - # to avoid even linking the support code for this into binaries at this - # point. - # - `eprintf.c`: This provided a legacy `__eprintf` builtin used by old - # versions of `assert.h` in its macros, but does not appear to be needed - # when building with modern versions of this header. - # - # We keep the hosted source list in case other sources are added to the - # hosted set that we _do_ want to support. -] - -BUILTINS_ATOMIC_SRCS = [ - "atomic.c", -] - -BUILTINS_EH_PERSONALITY_SRCS = [ - "gcc_personality_v0.c", -] - -BUILTINS_GENERIC_SRCS = BUILTINS_BASE_SRCS + BUILTINS_HOSTED_SRCS + BUILTINS_ATOMIC_SRCS + BUILTINS_EH_PERSONALITY_SRCS - -BUILTINS_MACOS_SRCS = [ - "atomic_flag_clear.c", - "atomic_flag_clear_explicit.c", - "atomic_flag_test_and_set.c", - "atomic_flag_test_and_set_explicit.c", - "atomic_signal_fence.c", - "atomic_thread_fence.c", -] - -BUILTINS_X86_ARCH_SRCS = [ - "cpu_model/x86.c", - "i386/fp_mode.c", -] - -BUILTINS_X86_FP80_SRCS = [ - "divxc3.c", - "extendhfxf2.c", - "extendxftf2.c", - "fixxfdi.c", - "fixxfti.c", - "fixunsxfdi.c", - "fixunsxfsi.c", - "fixunsxfti.c", - "floatdixf.c", - "floattixf.c", - "floatundixf.c", - "floatuntixf.c", - "mulxc3.c", - "powixf2.c", - "trunctfxf2.c", - "truncxfhf2.c", -] - -BUILTINS_AARCH64_SRCS = [ - "cpu_model/aarch64.h", - "cpu_model/aarch64.c", - "cpu_model/AArch64CPUFeatures.inc", - "cpu_model/aarch64/fmv/android.inc", - "cpu_model/aarch64/fmv/apple.inc", - "cpu_model/aarch64/fmv/baremetal.inc", - "cpu_model/aarch64/fmv/freebsd.inc", - "cpu_model/aarch64/fmv/fuchsia.inc", - "cpu_model/aarch64/fmv/getauxval.inc", - "cpu_model/aarch64/fmv/mrs.inc", - "cpu_model/aarch64/fmv/unimplemented.inc", - "cpu_model/aarch64/fmv/windows.inc", - "cpu_model/aarch64/hwcap.inc", - "cpu_model/aarch64/lse_atomics/android.inc", - "cpu_model/aarch64/lse_atomics/freebsd.inc", - "cpu_model/aarch64/lse_atomics/fuchsia.inc", - "cpu_model/aarch64/lse_atomics/getauxval.inc", - "cpu_model/aarch64/lse_atomics/windows.inc", - "aarch64/fp_mode.c", - "aarch64/sme-abi.S", - "aarch64/sme-libc-mem-routines.S", - "aarch64/sme-abi-assert.c", - "aarch64/sme-libc-routines.c", -] -BUILTINS_X86_64_SRCS = [ - "x86_64/floatdidf.c", - "x86_64/floatdisf.c", - "x86_64/floatdixf.c", - "x86_64/floatundidf.S", - "x86_64/floatundisf.S", - "x86_64/floatundixf.S", -] -BUILTINS_I386_SRCS = [ - "i386/ashldi3.S", - "i386/ashrdi3.S", - "i386/divdi3.S", - "i386/floatdidf.S", - "i386/floatdisf.S", - "i386/floatundidf.S", - "i386/floatundisf.S", - "i386/lshrdi3.S", - "i386/moddi3.S", - "i386/muldi3.S", - "i386/udivdi3.S", - "i386/umoddi3.S", - "i386/floatdixf.S", - "i386/floatundixf.S", -] - -RUNTIME_SRCS = [ - CRTBEGIN_SRC, - CRTEND_SRC, -] + [ - # Flatten the individual lists of source files into one main list. We use a - # nested loop list comprehension as it formats a bit more cleanly than `+`. - src - for srcs in [ - BUILTINS_GENERIC_SRCS, - BUILTINS_BF16_SRCS, - BUILTINS_TF_SRCS, - BUILTINS_X86_ARCH_SRCS, - BUILTINS_X86_FP80_SRCS, - BUILTINS_AARCH64_SRCS, - BUILTINS_X86_64_SRCS, - BUILTINS_I386_SRCS, - ] - for src in srcs -] +CRT_FILES = { + "crtbegin_src": "@llvm-project//compiler-rt:builtins_crtbegin_src", + "crtend_src": "@llvm-project//compiler-rt:builtins_crtend_src", +} + +BUILTINS_FILEGROUPS = { + "aarch64_srcs": "@llvm-project//compiler-rt:builtins_aarch64_srcs", + "bf16_srcs": "@llvm-project//compiler-rt:builtins_bf16_srcs", + "generic_srcs": "@llvm-project//compiler-rt:builtins_generic_srcs", + "i386_srcs": "@llvm-project//compiler-rt:builtins_i386_srcs", + "macos_srcs": "@llvm-project//compiler-rt:builtins_macos_atomic_srcs", + "tf_srcs": "@llvm-project//compiler-rt:builtins_tf_srcs", + "x86_64_srcs": "@llvm-project//compiler-rt:builtins_x86_64_srcs", + "x86_arch_srcs": "@llvm-project//compiler-rt:builtins_x86_arch_srcs", + "x86_fp80_srcs": "@llvm-project//compiler-rt:builtins_x86_fp80_srcs", +} _TEMPLATE = """ // Part of the Carbon Language project, under the Apache License v2.0 with LLVM @@ -361,8 +54,8 @@ _TEMPLATE = """ namespace Carbon::RuntimeSources {{ -constexpr inline llvm::StringLiteral CrtBegin = {crtbegin}; -constexpr inline llvm::StringLiteral CrtEnd = {crtend}; +constexpr inline llvm::StringLiteral CrtBegin = {crtbegin_src}; +constexpr inline llvm::StringLiteral CrtEnd = {crtend_src}; constexpr inline llvm::StringLiteral BuiltinsGenericSrcs[] = {{ {generic_srcs} @@ -392,34 +85,58 @@ constexpr inline llvm::StringLiteral BuiltinsI386Srcs[] = {{ {i386_srcs} }}; -}} +}} // namespace Carbon::RuntimeSources #endif // CARBON_TOOLCHAIN_BASE_RUNTIME_SOURCES_H_ """ -def _make_srcs_strings(dir, srcs): - return "\n".join([' "{0}/{1}",'.format(dir, src) for src in srcs]) +def _builtins_path(file): + """Returns the runtime install path for a file in CompilerRT's builtins library.""" + + # The CompilerRT package has the builtins runtime sources in the + # "lib/builtins/" subdirectory, and we install into a "builtins/" + # subdirectory, so just remove the "lib/" prefix from the package-relative + # label name. + return file.owner.name.removeprefix("lib/") + +def _get_path(file_attr, to_path_fn): + files = file_attr[DefaultInfo].files.to_list() + if len(files) > 1: + fail(msg = "Expected a single file and got {0} files.".format(len(files))) + + return '"{0}"'.format(to_path_fn(files[0])) + +def _get_paths(files_attr, to_path_fn): + files = [] + for src in files_attr: + files.extend(src[DefaultInfo].files.to_list()) + files.extend(src[DefaultInfo].default_runfiles.files.to_list()) + + return "\n".join([ + ' "{0}",'.format(to_path_fn(f)) + for f in files + ]) def _generate_runtime_sources_h_rule(ctx): h_file = ctx.actions.declare_file(ctx.label.name) - ctx.actions.write(h_file, _TEMPLATE.format( - crtbegin = '"builtins/{0}"'.format(CRTBEGIN_SRC), - crtend = '"builtins/{0}"'.format(CRTEND_SRC), - generic_srcs = _make_srcs_strings("builtins", BUILTINS_GENERIC_SRCS), - macos_srcs = _make_srcs_strings("builtins", BUILTINS_MACOS_SRCS), - bf16_srcs = _make_srcs_strings("builtins", BUILTINS_BF16_SRCS), - tf_srcs = _make_srcs_strings("builtins", BUILTINS_TF_SRCS), - x86_arch_srcs = _make_srcs_strings("builtins", BUILTINS_X86_ARCH_SRCS), - x86_fp80_srcs = _make_srcs_strings("builtins", BUILTINS_X86_FP80_SRCS), - aarch64_srcs = _make_srcs_strings("builtins", BUILTINS_AARCH64_SRCS), - x86_64_srcs = _make_srcs_strings("builtins", BUILTINS_X86_64_SRCS), - i386_srcs = _make_srcs_strings("builtins", BUILTINS_I386_SRCS), - )) + ctx.actions.write(h_file, _TEMPLATE.format(**({ + k: _get_path(getattr(ctx.attr, "_" + k), _builtins_path) + for k in CRT_FILES.keys() + } | { + k: _get_paths(getattr(ctx.attr, "_" + k), _builtins_path) + for k in BUILTINS_FILEGROUPS.keys() + }))) return [DefaultInfo(files = depset([h_file]))] generate_runtime_sources_h = rule( implementation = _generate_runtime_sources_h_rule, - attrs = {}, + attrs = { + "_" + k: attr.label(default = v, allow_single_file = True) + for k, v in CRT_FILES.items() + } | { + "_" + k: attr.label_list(default = [v], allow_files = True) + for k, v in BUILTINS_FILEGROUPS.items() + }, ) def generate_runtime_sources_cc_library(name, **kwargs): diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index 6a2c16b68d35a..b9054b44ecfe7 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -417,31 +417,46 @@ auto ClangRunner::BuildCrtFile(llvm::StringRef target, llvm::StringRef src_file, auto ClangRunner::CollectBuiltinsSrcFiles(const llvm::Triple& target_triple) -> llvm::SmallVector { llvm::SmallVector src_files; - auto append_src_files = [&src_files](auto input_srcs) { - for (llvm::StringRef input_src : input_srcs) { - if (input_src.ends_with(".c") || input_src.ends_with(".S")) { - src_files.push_back(input_src); - } - } - }; + auto append_src_files = + [&](auto input_srcs, + llvm::function_ref filter_out = {}) { + for (llvm::StringRef input_src : input_srcs) { + if (!input_src.ends_with(".c") && !input_src.ends_with(".S")) { + // Not a compiled file. + continue; + } + if (filter_out && filter_out(input_src)) { + // Filtered out. + continue; + } + + src_files.push_back(input_src); + } + }; append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsGenericSrcs)); append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsBf16Srcs)); if (target_triple.isArch64Bit()) { append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsTfSrcs)); } + auto filter_out_chkstk = [&](llvm::StringRef src) { + return !target_triple.isOSWindows() || !src.ends_with("chkstk.S"); + }; if (target_triple.isAArch64()) { - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsAarch64Srcs)); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsAarch64Srcs), + filter_out_chkstk); } else if (target_triple.isX86()) { append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86ArchSrcs)); if (target_triple.isArch64Bit()) { - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86_64Srcs)); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86_64Srcs), + filter_out_chkstk); } else { // TODO: This should be turned into a nice user-facing diagnostic about an // unsupported target. CARBON_CHECK( target_triple.isArch32Bit(), "The Carbon toolchain doesn't currently support 16-bit x86."); - append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsI386Srcs)); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsI386Srcs), + filter_out_chkstk); } } else { // TODO: This should be turned into a nice user-facing diagnostic about an diff --git a/toolchain/driver/clang_runner_test.cpp b/toolchain/driver/clang_runner_test.cpp index ce1359896e37c..d87aba3c60013 100644 --- a/toolchain/driver/clang_runner_test.cpp +++ b/toolchain/driver/clang_runner_test.cpp @@ -222,7 +222,9 @@ TEST_F(ClangRunnerTest, BuildResourceDir) { } // Across all targets, check that the builtins archive exists, and contains a - // relevant symbol by running the `llvm-nm` tool over it. + // relevant symbol by running the `llvm-nm` tool over it. Using `nm` rather + // than directly inspecting the objects is a bit awkward, but lets us easily + // ignore the wrapping in an archive file. std::filesystem::path builtins_path = resource_path / "lib" / target / "libclang_rt.builtins.a"; LLVMRunner llvm_runner(&install_paths_, &llvm::errs()); @@ -231,11 +233,17 @@ TEST_F(ClangRunnerTest, BuildResourceDir) { EXPECT_TRUE(Testing::CallWithCapturedOutput(out, err, [&] { return llvm_runner.Run(LLVMTool::Nm, {builtins_path.native()}); })); + // Check that we found a definition of `__mulodi4`, a builtin function // provided by Compiler-RT, but not `libgcc` historically. Note that on macOS // there is a leading `_` due to mangling. EXPECT_THAT(out, HasSubstr(target_triple.isMacOSX() ? "T ___mulodi4\n" : "T __mulodi4\n")); + + // Check that we don't include the `chkstk` builtins outside of Windows. + if (!target_triple.isOSWindows()) { + EXPECT_THAT(out, Not(HasSubstr("chkstk"))); + } } // It's hard to write a portable and reliable unittest for all the layers of the diff --git a/toolchain/install/BUILD b/toolchain/install/BUILD index aad54f57101a2..caa379a3ec0ee 100644 --- a/toolchain/install/BUILD +++ b/toolchain/install/BUILD @@ -10,7 +10,7 @@ load("@rules_python//python:defs.bzl", "py_test") load("//bazel/cc_rules:defs.bzl", "cc_binary", "cc_library", "cc_test") load("//bazel/manifest:defs.bzl", "manifest") load("//toolchain/base:llvm_tools.bzl", "LLVM_MAIN_TOOLS", "LLVM_TOOL_ALIASES") -load("//toolchain/base:runtime_sources.bzl", "RUNTIME_SRCS") +load("//toolchain/base:runtime_sources.bzl", "BUILTINS_FILEGROUPS", "CRT_FILES") load("install_filegroups.bzl", "install_filegroup", "install_symlink", "install_target", "make_install_filegroups") load("pkg_helpers.bzl", "pkg_naming_variables", "pkg_tar_and_test") @@ -148,6 +148,16 @@ filegroup( srcs = ["@llvm-project//clang:builtin_headers_gen"], ) +# Collect the runtime sources that are collectively installed into the +# `builtins` directory. +filegroup( + name = "clang_builtins_runtimes", + srcs = [ + runtime + for runtime in CRT_FILES.values() + BUILTINS_FILEGROUPS.values() + ], +) + # Given a root `prefix_root`, the hierarchy looks like: # # - prefix_root/bin: Binaries intended for direct use. @@ -183,12 +193,8 @@ install_dirs = { "lib/carbon/llvm/lib/clang/" + LLVM_VERSION_MAJOR: [ install_filegroup("include", ":clang_headers", "staging/include/"), ], - "lib/carbon/llvm/lib/clang/" + LLVM_VERSION_MAJOR + "/src/builtins": [ - install_target( - src, - "@llvm-project//compiler-rt:lib/builtins/" + src, - ) - for src in RUNTIME_SRCS + "lib/carbon/llvm/lib/clang/" + LLVM_VERSION_MAJOR + "/src": [ + install_filegroup("builtins", ":clang_builtins_runtimes", "lib/builtins/"), ], } From c45792b9a58d4d453db42fe41538932898c28325 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Apr 2025 06:59:58 +0000 Subject: [PATCH 09/15] format --- toolchain/driver/clang_runner.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toolchain/driver/clang_runner.h b/toolchain/driver/clang_runner.h index 1488777a99710..6649b02fc998d 100644 --- a/toolchain/driver/clang_runner.h +++ b/toolchain/driver/clang_runner.h @@ -117,7 +117,8 @@ class ClangRunner : ToolRunnerBase { const std::filesystem::path& objs_path, llvm::StringRef out_path) -> void; - // If non-empty, a temp directory for this instance which will be cleaned up on destruction. Set by `MakeTmpDir`. + // If non-empty, a temp directory for this instance which will be cleaned up + // on destruction. Set by `MakeTmpDir`. std::filesystem::path tmp_dir_; llvm::IntrusiveRefCntPtr fs_; From 8809b20469d5c05e5cf8195928b1085c2c3a8c3d Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 25 Apr 2025 03:13:44 +0000 Subject: [PATCH 10/15] fix test for new world --- toolchain/install/llvm_symlinks_test.py | 45 ++++++++++++++++++------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/toolchain/install/llvm_symlinks_test.py b/toolchain/install/llvm_symlinks_test.py index 4621da6339aa8..9ec3c09ca89a2 100644 --- a/toolchain/install/llvm_symlinks_test.py +++ b/toolchain/install/llvm_symlinks_test.py @@ -11,6 +11,7 @@ from pathlib import Path import subprocess import os +import platform import sys import unittest @@ -26,8 +27,6 @@ def setUp(self) -> None: def get_link_cmd(self, clang: Path) -> list[str | Path]: return [ clang, - # We pick an arbitrary linux target to get stable results. - "--target=aarch64-unknown-linux-gnu", # Verbose printing to help with debugging. "-v", # Print out the link command rather than running it. @@ -39,27 +38,49 @@ def get_link_cmd(self, clang: Path) -> list[str | Path]: self.test_o_file, ] + # Note that we can't test `clang` vs. `clang++` portably. The only commands + # with useful differences are _link_ commands, and those need to build + # runtime libraries on demand, which requires the host to be able to compile + # and link for the target. Instead, we test linking with the default target + # (the host), as that is the one that should reliably work if we're + # developing Carbon, and encode all the different platform results in the + # test expectations. def test_clang(self) -> None: bin = self.install_root / "lib/carbon/llvm/bin/clang" run = subprocess.run( self.get_link_cmd(bin), check=True, capture_output=True, text=True ) - # Check that we do have a plausible link command. - self.assertRegex(run.stderr, r'"-m" "aarch64linux"') - - # Ensure it doesn't contain the C++ standard library. - self.assertNotRegex(run.stderr, r'"-lstdc++"') + def unsupported() -> None: + self.fail( + f"Unsupported platform '{platform.uname()}':\n{run.stderr}" + ) + + if platform.system() == "Linux": + # Check that we do have a plausible link command. + if platform.machine() == "x86_64": + self.assertRegex(run.stderr, r'"-m" "elf_x86_64"') + elif platform.machine() == "AArch64": + self.assertRegex(run.stderr, r'"-m" "aarch64linux"') + else: + unsupported() + + # Ensure it doesn't contain the C++ standard library. + self.assertNotRegex(run.stderr, r'"-lstdc\+\+"') + elif platform.system() == "Darwin": + unsupported() + else: + unsupported() + + # Note that we can't test `clang` vs. `clang++` portably. See the comment on + # `test_clang` for details. def test_clangplusplus(self) -> None: bin = self.install_root / "lib/carbon/llvm/bin/clang++" run = subprocess.run( self.get_link_cmd(bin), check=True, capture_output=True, text=True ) - # Check that we do have a plausible link command. - self.assertRegex(run.stderr, r'"-m" "aarch64linux"') - - # Ensure it doesn't contain the C++ standard library. - self.assertNotRegex(run.stderr, r'"-lstdc++"') + # Ensure it does contain the C++ standard library. + self.assertRegex(run.stderr, r'"-lstdc\+\+"') def test_clang_cl(self) -> None: bin = self.install_root / "lib/carbon/llvm/bin/clang-cl" From 7e9407f3e540c28b960d93ac2f944961c7e430bc Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Apr 2025 22:44:32 -0700 Subject: [PATCH 11/15] simplify --- toolchain/install/llvm_symlinks_test.py | 33 +++++++++++-------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/toolchain/install/llvm_symlinks_test.py b/toolchain/install/llvm_symlinks_test.py index 9ec3c09ca89a2..73b3b98b33b5a 100644 --- a/toolchain/install/llvm_symlinks_test.py +++ b/toolchain/install/llvm_symlinks_test.py @@ -38,6 +38,9 @@ def get_link_cmd(self, clang: Path) -> list[str | Path]: self.test_o_file, ] + def unsupported(self, stderr: str) -> None: + self.fail(f"Unsupported platform '{platform.uname()}':\n{stderr}") + # Note that we can't test `clang` vs. `clang++` portably. The only commands # with useful differences are _link_ commands, and those need to build # runtime libraries on demand, which requires the host to be able to compile @@ -47,30 +50,18 @@ def get_link_cmd(self, clang: Path) -> list[str | Path]: # test expectations. def test_clang(self) -> None: bin = self.install_root / "lib/carbon/llvm/bin/clang" + # Most errors are caught by ensuring the command succeeds. run = subprocess.run( self.get_link_cmd(bin), check=True, capture_output=True, text=True ) - def unsupported() -> None: - self.fail( - f"Unsupported platform '{platform.uname()}':\n{run.stderr}" - ) - + # Also ensure that it correctly didn't imply a C++ link. if platform.system() == "Linux": - # Check that we do have a plausible link command. - if platform.machine() == "x86_64": - self.assertRegex(run.stderr, r'"-m" "elf_x86_64"') - elif platform.machine() == "AArch64": - self.assertRegex(run.stderr, r'"-m" "aarch64linux"') - else: - unsupported() - - # Ensure it doesn't contain the C++ standard library. self.assertNotRegex(run.stderr, r'"-lstdc\+\+"') elif platform.system() == "Darwin": - unsupported() + self.assertNotRegex(run.stderr, r'"-lc\+\+"') else: - unsupported() + self.unsupported(run.stderr) # Note that we can't test `clang` vs. `clang++` portably. See the comment on # `test_clang` for details. @@ -79,8 +70,14 @@ def test_clangplusplus(self) -> None: run = subprocess.run( self.get_link_cmd(bin), check=True, capture_output=True, text=True ) - # Ensure it does contain the C++ standard library. - self.assertRegex(run.stderr, r'"-lstdc\+\+"') + + # Ensure that this binary _does_ imply a C++ link. + if platform.system() == "Linux": + self.assertRegex(run.stderr, r'"-lstdc\+\+"') + elif platform.system() == "Darwin": + self.assertRegex(run.stderr, r'"-lc\+\+"') + else: + self.unsupported(run.stderr) def test_clang_cl(self) -> None: bin = self.install_root / "lib/carbon/llvm/bin/clang-cl" From 789b12c0bf4126d4b632c9a92fcb08b92e331a84 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 25 Apr 2025 22:36:03 +0000 Subject: [PATCH 12/15] fix patch --- ...ups_for_compiler_rt_builtins_runtime.patch | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch b/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch index 61d5a1b5be4a9..3a43b555f4ffd 100644 --- a/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch +++ b/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch @@ -1,4 +1,4 @@ -From 53747ea5328ec878fa019961a59ca74eb539d1c1 Mon Sep 17 00:00:00 2001 +From 19d5d9913778ca95da272f41c5916907154a5e73 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Apr 2025 05:03:43 +0000 Subject: [PATCH] Introduce filegroups for compiler-rt builtins runtimes @@ -11,23 +11,14 @@ create the structured breakdown of sources needed to target different architectures and platforms without having to maintain a complete parallel list of sources from CMake. --- - .../compiler-rt/BUILD.bazel | 163 +++++++++++++++++- - 1 file changed, 160 insertions(+), 3 deletions(-) + .../compiler-rt/BUILD.bazel | 167 ++++++++++++++++++ + 1 file changed, 167 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel -index 6a6f58995df3..3b930372a54d 100644 +index 6a5a89fdee40..7d158f0c13f2 100644 --- a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel -@@ -122,9 +122,166 @@ cc_library( - "lib/fuzzer/Fuzzer*.def", - ]), - copts = [ -- # Not using no-sanitize=address per https://github.com/google/sanitizers/wiki/AddressSanitizerContainerOverflow#false-positives -- "-fno-sanitize=memory,thread,undefined", -- "-fsanitize-coverage=0", -+ # Not using no-sanitize=address per https://github.com/google/sanitizers/wiki/AddressSanitizerContainerOverflow#false-positives -+ "-fno-sanitize=memory,thread,undefined", -+ "-fsanitize-coverage=0", +@@ -128,3 +128,170 @@ cc_library( ], includes = ["lib/fuzzer"], ) @@ -105,7 +96,11 @@ index 6a6f58995df3..3b930372a54d 100644 + "lib/builtins/*tf*.c", +] + -+BUILTINS_TF_EXCLUDES = BUILTINS_HOSTED_SRCS + BUILTINS_BF16_SRCS_PATTERNS + BUILTINS_X86_FP80_SRCS_PATTERNS ++BUILTINS_TF_EXCLUDES = ( ++ BUILTINS_HOSTED_SRCS + ++ BUILTINS_BF16_SRCS_PATTERNS + ++ BUILTINS_X86_FP80_SRCS_PATTERNS ++) + +filegroup( + name = "builtins_tf_srcs", @@ -185,9 +180,15 @@ index 6a6f58995df3..3b930372a54d 100644 + "lib/builtins/*.h", + "lib/builtins/*.inc", + ], -+ exclude = BUILTINS_CRTBEGIN_SRCS + BUILTINS_CRTEND_SRCS + BUILTINS_TF_EXCLUDES + BUILTINS_TF_SRCS_PATTERNS + BUILTINS_MACOS_ATOMIC_SRCS_PATTERNS, ++ exclude = ( ++ BUILTINS_CRTBEGIN_SRCS + ++ BUILTINS_CRTEND_SRCS + ++ BUILTINS_TF_EXCLUDES + ++ BUILTINS_TF_SRCS_PATTERNS + ++ BUILTINS_MACOS_ATOMIC_SRCS_PATTERNS ++ ), + ), +) -- -2.49.0.805.g082f7c87e0-goog +2.49.0.850.g28803427d3-goog From d158bf6f80dd821e6778607e58cf7c8ddeb07a71 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 25 Apr 2025 23:39:40 +0000 Subject: [PATCH 13/15] fixes --- toolchain/driver/clang_runner.cpp | 2 +- toolchain/install/BUILD | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index b9054b44ecfe7..611dbaf5f0443 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -136,7 +136,7 @@ static auto IsNonLinkCommand(llvm::ArrayRef args) -> bool { // since it's fine to have false negatives. return arg == "-c" || arg == "-E" || arg == "-S" || arg == "-fsyntax-only" || arg == "--version" || arg == "--help" || - arg == "/?"; + arg == "/?" || arg == "--driver-mode=cpp"; }); } diff --git a/toolchain/install/BUILD b/toolchain/install/BUILD index caa379a3ec0ee..ca3c928f950f4 100644 --- a/toolchain/install/BUILD +++ b/toolchain/install/BUILD @@ -152,10 +152,7 @@ filegroup( # `builtins` directory. filegroup( name = "clang_builtins_runtimes", - srcs = [ - runtime - for runtime in CRT_FILES.values() + BUILTINS_FILEGROUPS.values() - ], + srcs = CRT_FILES.values() + BUILTINS_FILEGROUPS.values(), ) # Given a root `prefix_root`, the hierarchy looks like: From e964a87b574ffb2652c8bb560c81746336947236 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sat, 26 Apr 2025 00:20:40 +0000 Subject: [PATCH 14/15] fix harder --- toolchain/install/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchain/install/BUILD b/toolchain/install/BUILD index ca3c928f950f4..d8e6b96687f90 100644 --- a/toolchain/install/BUILD +++ b/toolchain/install/BUILD @@ -205,7 +205,7 @@ make_install_filegroups( py_test( name = "llvm_symlinks_test", - size = "small", + size = "medium", srcs = ["llvm_symlinks_test.py"], data = [":install_data"], ) From 130e1ad6da0d1b7b4573388f599945ce3dfa0ff7 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 6 May 2025 18:04:19 -0700 Subject: [PATCH 15/15] Update toolchain/driver/clang_runner.cpp Co-authored-by: Jon Ross-Perkins --- toolchain/driver/clang_runner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index 611dbaf5f0443..20586e4e195af 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -365,7 +365,7 @@ auto ClangRunner::MakeTmpDir() -> void { // We use the object's address to avoid collisions between two instances, // whether from two processes or two instances in this process. This is a bit - // of a hack perfect, but should be fine until we implement caching at which + // of a hack, but should be fine until we implement caching at which // point we'll need to do something better here. auto salt = reinterpret_cast(this); std::string name = llvm::formatv("carbon-runtimes.{0:x}", salt).str();