diff --git a/.codespell_ignore b/.codespell_ignore index 04c4a9d37a093..06387ec012c8f 100644 --- a/.codespell_ignore +++ b/.codespell_ignore @@ -2,6 +2,7 @@ # Exceptions. See /LICENSE for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +ArchType atleast circularly compiletime @@ -14,6 +15,7 @@ forin groupt indext inout +isELF parameteras pullrequest rightt diff --git a/MODULE.bazel b/MODULE.bazel index 35a11f23ce791..6f02f0d25079c 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -126,6 +126,7 @@ http_archive( "@carbon//bazel/llvm_project:0001_Patch_for_mallinfo2_when_using_Bazel_build_system.patch", "@carbon//bazel/llvm_project:0002_Added_Bazel_build_for_compiler_rt_fuzzer.patch", "@carbon//bazel/llvm_project:0003_Comment_out_unloaded_proto_library_dependencies.patch", + "@carbon//bazel/llvm_project:0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch", ], sha256 = "8466760c8d69c5d3a1d2561813f47fa9a6962076adfb2b3f7aa0a69417b36c52", strip_prefix = "llvm-project-{0}".format(llvm_project_version), diff --git a/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch b/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch new file mode 100644 index 0000000000000..3a43b555f4ffd --- /dev/null +++ b/bazel/llvm_project/0004_Introduce_filegroups_for_compiler_rt_builtins_runtime.patch @@ -0,0 +1,194 @@ +From 19d5d9913778ca95da272f41c5916907154a5e73 Mon Sep 17 00:00:00 2001 +From: Chandler Carruth +Date: Thu, 24 Apr 2025 05:03:43 +0000 +Subject: [PATCH] Introduce filegroups for compiler-rt builtins runtimes + +These filegroups allow downstream projects to package and build +customized runtime libraries. + +The filegroups work hard to use globs and a careful structuring to +create the structured breakdown of sources needed to target different +architectures and platforms without having to maintain a complete +parallel list of sources from CMake. +--- + .../compiler-rt/BUILD.bazel | 167 ++++++++++++++++++ + 1 file changed, 167 insertions(+) + +diff --git a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +index 6a5a89fdee40..7d158f0c13f2 100644 +--- a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel ++++ b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +@@ -128,3 +128,170 @@ cc_library( + ], + includes = ["lib/fuzzer"], + ) ++ ++BUILTINS_CRTBEGIN_SRCS = ["lib/builtins/crtbegin.c"] ++ ++filegroup( ++ name = "builtins_crtbegin_src", ++ srcs = BUILTINS_CRTBEGIN_SRCS, ++) ++ ++BUILTINS_CRTEND_SRCS = ["lib/builtins/crtend.c"] ++ ++filegroup( ++ name = "builtins_crtend_src", ++ srcs = BUILTINS_CRTEND_SRCS, ++) ++ ++# Note that while LLVM's CompilerRT provides a few hosted sources, we don't ++# currently build them: ++# ++# - `emutls.c`: Unclear we need to support targets with software emulated ++# TLS rather than hardware support. ++# - `enable_execute_stack.c`: Used to implement support for a builtin that ++# marks part of the stack as *executable* to support the GCC extension of ++# nested functions. This extension was never implemented in Clang, and is ++# generally considered a security issue to include. We expect to be able ++# to avoid even linking the support code for this into binaries at this ++# point. ++# - `eprintf.c`: This provided a legacy `__eprintf` builtin used by old ++# versions of `assert.h` in its macros, but does not appear to be needed ++# when building with modern versions of this header. ++BUILTINS_HOSTED_SRCS = [ ++ "lib/builtins/emutls.c", ++ "lib/builtins/enable_execute_stack.c", ++ "lib/builtins/eprintf.c", ++] ++ ++filegroup( ++ name = "builtins_hosted_srcs", ++ srcs = BUILTINS_HOSTED_SRCS, ++) ++ ++BUILTINS_BF16_SRCS_PATTERNS = [ ++ # `bf` marks 16-bit Brain floating-point number builtins. ++ "lib/builtins/*bf*.c", ++] ++ ++filegroup( ++ name = "builtins_bf16_srcs", ++ srcs = glob(BUILTINS_BF16_SRCS_PATTERNS), ++) ++ ++BUILTINS_X86_FP80_SRCS_PATTERNS = [ ++ # `xc` marks 80-bit complex number builtins. ++ "lib/builtins/*xc*.c", ++ ++ # `xf` marks 80-bit floating-point builtins. ++ "lib/builtins/*xf*.c", ++] ++ ++filegroup( ++ name = "builtins_x86_fp80_srcs", ++ srcs = glob( ++ BUILTINS_X86_FP80_SRCS_PATTERNS, ++ exclude = BUILTINS_BF16_SRCS_PATTERNS, ++ ), ++) ++ ++BUILTINS_TF_SRCS_PATTERNS = [ ++ # `tc` marks 128-bit complex number builtins. ++ "lib/builtins/*tc*.c", ++ ++ # `tf` marks 128-bit floating-point builtins. ++ "lib/builtins/*tf*.c", ++] ++ ++BUILTINS_TF_EXCLUDES = ( ++ BUILTINS_HOSTED_SRCS + ++ BUILTINS_BF16_SRCS_PATTERNS + ++ BUILTINS_X86_FP80_SRCS_PATTERNS ++) ++ ++filegroup( ++ name = "builtins_tf_srcs", ++ srcs = glob( ++ BUILTINS_TF_SRCS_PATTERNS, ++ exclude = BUILTINS_TF_EXCLUDES, ++ ), ++) ++ ++BUILTINS_MACOS_ATOMIC_SRCS_PATTERNS = [ ++ "lib/builtins/atomic_*.c", ++] ++ ++filegroup( ++ name = "builtins_macos_atomic_srcs", ++ srcs = glob(BUILTINS_MACOS_ATOMIC_SRCS_PATTERNS), ++) ++ ++filegroup( ++ name = "builtins_aarch64_srcs", ++ srcs = [ ++ "lib/builtins/cpu_model/aarch64.c", ++ "lib/builtins/cpu_model/aarch64.h", ++ ] + glob( ++ [ ++ "lib/builtins/cpu_model/AArch64*.inc", ++ "lib/builtins/cpu_model/aarch64/**/*.inc", ++ "lib/builtins/aarch64/*.S", ++ "lib/builtins/aarch64/*.c", ++ ], ++ exclude = [ ++ # This file isn't intended to directly compile, but to be used to ++ # generate a collection of outline atomic helpers. ++ # TODO: Add support for generating the sources for these helpers if ++ # there are users that need this functionality from the builtins ++ # library. ++ "lib/builtins/aarch64/lse.S", ++ ], ++ ), ++) ++ ++filegroup( ++ name = "builtins_x86_arch_srcs", ++ srcs = [ ++ "lib/builtins/cpu_model/x86.c", ++ "lib/builtins/i386/fp_mode.c", ++ ], ++) ++ ++filegroup( ++ name = "builtins_x86_64_srcs", ++ srcs = glob([ ++ "lib/builtins/x86_64/*.c", ++ "lib/builtins/x86_64/*.S", ++ ]), ++) ++ ++filegroup( ++ name = "builtins_i386_srcs", ++ srcs = glob( ++ [ ++ "lib/builtins/i386/*.c", ++ "lib/builtins/i386/*.S", ++ ], ++ exclude = [ ++ # This file is used for both i386 and x86_64. ++ "lib/builtins/i386/fp_mode.c", ++ ], ++ ), ++) ++ ++filegroup( ++ name = "builtins_generic_srcs", ++ srcs = ["lib/builtins/cpu_model/cpu_model.h"] + glob( ++ [ ++ "lib/builtins/*.c", ++ "lib/builtins/*.h", ++ "lib/builtins/*.inc", ++ ], ++ exclude = ( ++ BUILTINS_CRTBEGIN_SRCS + ++ BUILTINS_CRTEND_SRCS + ++ BUILTINS_TF_EXCLUDES + ++ BUILTINS_TF_SRCS_PATTERNS + ++ BUILTINS_MACOS_ATOMIC_SRCS_PATTERNS ++ ), ++ ), ++) +-- +2.49.0.850.g28803427d3-goog + diff --git a/scripts/fix_cc_deps.py b/scripts/fix_cc_deps.py index d8a75fd50bd4b..001f757892a5c 100755 --- a/scripts/fix_cc_deps.py +++ b/scripts/fix_cc_deps.py @@ -70,7 +70,8 @@ class RuleChoice(NamedTuple): IGNORE_SOURCE_FILE_REGEX = re.compile( r"^(third_party/clangd.*|common/version.*\.cpp" r"|.*_autogen_manifest\.cpp" - r"|toolchain/base/llvm_tools.def)$" + r"|toolchain/base/llvm_tools.def" + r"|toolchain/base/runtime_sources.h)$" ) diff --git a/toolchain/base/BUILD b/toolchain/base/BUILD index 5255190b16b98..ed72a1eb740cb 100644 --- a/toolchain/base/BUILD +++ b/toolchain/base/BUILD @@ -4,6 +4,7 @@ load("//bazel/cc_rules:defs.bzl", "cc_library", "cc_test") load("llvm_tools.bzl", "LLVM_MAIN_TOOLS", "generate_llvm_tools_def") +load("runtime_sources.bzl", "generate_runtime_sources_cc_library") package(default_visibility = ["//visibility:public"]) @@ -173,6 +174,8 @@ cc_library( ] + [info.lib for info in LLVM_MAIN_TOOLS.values()], ) +generate_runtime_sources_cc_library(name = "runtime_sources") + cc_library( name = "shared_value_stores", hdrs = ["shared_value_stores.h"], diff --git a/toolchain/base/runtime_sources.bzl b/toolchain/base/runtime_sources.bzl new file mode 100644 index 0000000000000..16a8dadc35f36 --- /dev/null +++ b/toolchain/base/runtime_sources.bzl @@ -0,0 +1,157 @@ +# Part of the Carbon Language project, under the Apache License v2.0 with LLVM +# Exceptions. See /LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""Provides variables and rules to work with Clang's runtime library sources. + +These are organized into groups based on the runtime functionality: +- CRT: The C language runtimes not provided by the C standard library, currently + just infrastructure for global initialization and teardown. +- Builtins: The compiler builtins library mirroring `libgcc` that provides + function definitions for operations not reliably available in hardware bet + needed by Clang. + +Future runtimes we plan to add support for but not yet included: +- Libunwind +- Libc++ and libc++abi +- Sanitizers +- Profiling runtimes +""" + +load("@rules_cc//cc:cc_library.bzl", "cc_library") + +CRT_FILES = { + "crtbegin_src": "@llvm-project//compiler-rt:builtins_crtbegin_src", + "crtend_src": "@llvm-project//compiler-rt:builtins_crtend_src", +} + +BUILTINS_FILEGROUPS = { + "aarch64_srcs": "@llvm-project//compiler-rt:builtins_aarch64_srcs", + "bf16_srcs": "@llvm-project//compiler-rt:builtins_bf16_srcs", + "generic_srcs": "@llvm-project//compiler-rt:builtins_generic_srcs", + "i386_srcs": "@llvm-project//compiler-rt:builtins_i386_srcs", + "macos_srcs": "@llvm-project//compiler-rt:builtins_macos_atomic_srcs", + "tf_srcs": "@llvm-project//compiler-rt:builtins_tf_srcs", + "x86_64_srcs": "@llvm-project//compiler-rt:builtins_x86_64_srcs", + "x86_arch_srcs": "@llvm-project//compiler-rt:builtins_x86_arch_srcs", + "x86_fp80_srcs": "@llvm-project//compiler-rt:builtins_x86_fp80_srcs", +} + +_TEMPLATE = """ +// Part of the Carbon Language project, under the Apache License v2.0 with LLVM +// Exceptions. See /LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Generated header file of strings describing the Clang runtime library source +// files. +// +// See toolchain/driver/runtime_sources.bzl for more details. + +#ifndef CARBON_TOOLCHAIN_BASE_RUNTIME_SOURCES_H_ +#define CARBON_TOOLCHAIN_BASE_RUNTIME_SOURCES_H_ + +#include "llvm/ADT/StringRef.h" + +namespace Carbon::RuntimeSources {{ + +constexpr inline llvm::StringLiteral CrtBegin = {crtbegin_src}; +constexpr inline llvm::StringLiteral CrtEnd = {crtend_src}; + +constexpr inline llvm::StringLiteral BuiltinsGenericSrcs[] = {{ +{generic_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsMacosSrcs[] = {{ +{macos_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsBf16Srcs[] = {{ +{bf16_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsTfSrcs[] = {{ +{tf_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsX86ArchSrcs[] = {{ +{x86_arch_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsX86Fp80Srcs[] = {{ +{x86_fp80_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsAarch64Srcs[] = {{ +{aarch64_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsX86_64Srcs[] = {{ +{x86_64_srcs} +}}; +constexpr inline llvm::StringLiteral BuiltinsI386Srcs[] = {{ +{i386_srcs} +}}; + +}} // namespace Carbon::RuntimeSources + +#endif // CARBON_TOOLCHAIN_BASE_RUNTIME_SOURCES_H_ +""" + +def _builtins_path(file): + """Returns the runtime install path for a file in CompilerRT's builtins library.""" + + # The CompilerRT package has the builtins runtime sources in the + # "lib/builtins/" subdirectory, and we install into a "builtins/" + # subdirectory, so just remove the "lib/" prefix from the package-relative + # label name. + return file.owner.name.removeprefix("lib/") + +def _get_path(file_attr, to_path_fn): + files = file_attr[DefaultInfo].files.to_list() + if len(files) > 1: + fail(msg = "Expected a single file and got {0} files.".format(len(files))) + + return '"{0}"'.format(to_path_fn(files[0])) + +def _get_paths(files_attr, to_path_fn): + files = [] + for src in files_attr: + files.extend(src[DefaultInfo].files.to_list()) + files.extend(src[DefaultInfo].default_runfiles.files.to_list()) + + return "\n".join([ + ' "{0}",'.format(to_path_fn(f)) + for f in files + ]) + +def _generate_runtime_sources_h_rule(ctx): + h_file = ctx.actions.declare_file(ctx.label.name) + ctx.actions.write(h_file, _TEMPLATE.format(**({ + k: _get_path(getattr(ctx.attr, "_" + k), _builtins_path) + for k in CRT_FILES.keys() + } | { + k: _get_paths(getattr(ctx.attr, "_" + k), _builtins_path) + for k in BUILTINS_FILEGROUPS.keys() + }))) + return [DefaultInfo(files = depset([h_file]))] + +generate_runtime_sources_h = rule( + implementation = _generate_runtime_sources_h_rule, + attrs = { + "_" + k: attr.label(default = v, allow_single_file = True) + for k, v in CRT_FILES.items() + } | { + "_" + k: attr.label_list(default = [v], allow_files = True) + for k, v in BUILTINS_FILEGROUPS.items() + }, +) + +def generate_runtime_sources_cc_library(name, **kwargs): + """Generates a `runtime_sources.h` header and a `cc_library` rule for it. + + This first generates the header file with variables describing the runtime + sources from Clang, and then a `cc_library` that exports that header. + + The `cc_library` rule name is the provided `name` and should be depended on + by code that includes the generated header. The `kwargs` are expanded into + the `cc_library` in case other attributes need to be configured there. + """ + generate_runtime_sources_h(name = "runtime_sources.h") + cc_library( + name = name, + hdrs = ["runtime_sources.h"], + **kwargs + ) diff --git a/toolchain/driver/BUILD b/toolchain/driver/BUILD index 9913a85fe02ac..0c7afe41ed537 100644 --- a/toolchain/driver/BUILD +++ b/toolchain/driver/BUILD @@ -21,15 +21,18 @@ cc_library( srcs = ["clang_runner.cpp"], hdrs = ["clang_runner.h"], deps = [ + ":llvm_runner", ":tool_runner_base", "//common:ostream", "//common:vlog", + "//toolchain/base:runtime_sources", "//toolchain/install:install_paths", "@llvm-project//clang:basic", "@llvm-project//clang:clang-driver", "@llvm-project//clang:driver", "@llvm-project//clang:frontend", "@llvm-project//llvm:Core", + "@llvm-project//llvm:Object", "@llvm-project//llvm:Support", "@llvm-project//llvm:TargetParser", ], @@ -39,8 +42,10 @@ cc_test( name = "clang_runner_test", size = "small", srcs = ["clang_runner_test.cpp"], + data = ["//toolchain/install:install_data"], deps = [ ":clang_runner", + ":llvm_runner", "//common:all_llvm_targets", "//common:check", "//common:ostream", diff --git a/toolchain/driver/clang_runner.cpp b/toolchain/driver/clang_runner.cpp index 8ffbae9e6c089..20586e4e195af 100644 --- a/toolchain/driver/clang_runner.cpp +++ b/toolchain/driver/clang_runner.cpp @@ -4,11 +4,15 @@ #include "toolchain/driver/clang_runner.h" +#include + #include +#include #include #include #include #include +#include #include #include "clang/Basic/Diagnostic.h" @@ -23,11 +27,15 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/LLVMDriver.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/TargetParser/Host.h" +#include "toolchain/base/runtime_sources.h" // Defined in: // https://github.com/llvm/llvm-project/blob/main/clang/tools/driver/driver.cpp @@ -42,17 +50,170 @@ auto clang_main(int Argc, char** Argv, const llvm::ToolContext& ToolContext) namespace Carbon { ClangRunner::ClangRunner(const InstallPaths* install_paths, - llvm::StringRef target, llvm::IntrusiveRefCntPtr fs, llvm::raw_ostream* vlog_stream) : ToolRunnerBase(install_paths, vlog_stream), - target_(target), fs_(std::move(fs)), diagnostic_ids_(new clang::DiagnosticIDs()) {} +ClangRunner::~ClangRunner() { + if (!tmp_dir_.empty()) { + std::error_code ec; + if (std::filesystem::remove_all(tmp_dir_, ec) == 0) { + CARBON_VLOG("Expected a temporary directory and found none: {0}", + tmp_dir_); + } else if (ec) { + CARBON_VLOG("Error when cleaning temporary directory `{0}`: {1}", + tmp_dir_, ec.message()); + } + } +} + +// Searches an argument list to a Clang execution to determine the expected +// target string, suitable for use with `llvm::Triple`. +// +// If no explicit target flags are present, this defaults to the default +// LLVM target. +// +// Works to handle the most common flags that modify the expected target as +// well as direct target flags. +// +// Note: this has known fidelity issues if the args include separate-value flags +// (`--flag value` style as opposed to `--flag=value`) where the value might +// match the spelling of one of the target flags. For example, args that include +// an output file spelled `-m32` (so `-o` followed by `-m32`) will be +// misinterpreted by considering the value to itself be a flag. Addressing this +// would add substantial complexity, including likely parsing the entire args +// twice with the Clang driver. Instead, our current plan is to document this +// limitation and encourage the use of flags with joined values +// (`--flag=value`). +static auto ComputeClangTarget(llvm::ArrayRef args) + -> std::string { + std::string target = llvm::sys::getDefaultTargetTriple(); + bool explicit_target = false; + for (auto [i, arg] : llvm::enumerate(args)) { + if (llvm::StringRef arg_copy = arg; arg_copy.consume_front("--target=")) { + target = arg_copy.str(); + explicit_target = true; + } else if ((arg == "--target" || arg == "-target") && + (i + 1) < args.size()) { + target = args[i + 1].str(); + explicit_target = true; + } else if (!explicit_target && + (arg == "--driver-mode=cl" || + ((arg == "--driver-mode" || arg == "-driver-mode") && + (i + 1) < args.size() && args[i + 1] == "cl"))) { + // The `cl.exe` compatible driver mode should switch the default target to + // a `...-pc-windows-msvc` target. However, a subsequent explicit target + // should override this. + llvm::Triple triple(target); + triple.setVendor(llvm::Triple::PC); + triple.setOS(llvm::Triple::Win32); + triple.setEnvironment(llvm::Triple::MSVC); + target = triple.str(); + } else if (arg == "-m32") { + llvm::Triple triple(target); + if (!triple.isArch32Bit()) { + target = triple.get32BitArchVariant().str(); + } + } else if (arg == "-m64") { + llvm::Triple triple(target); + if (!triple.isArch64Bit()) { + target = triple.get64BitArchVariant().str(); + } + } + } + return target; +} + +// Tries to detect a a non-linking list of Clang arguments to avoid setting up +// the more complete resource directory needed for linking. False negatives are +// fine here, and we use that to keep things simple. +static auto IsNonLinkCommand(llvm::ArrayRef args) -> bool { + return llvm::any_of(args, [](llvm::StringRef arg) { + // Only check the most common cases as we have to do this for each argument. + // Everything else is rare and likely not worth the cost of searching for + // since it's fine to have false negatives. + return arg == "-c" || arg == "-E" || arg == "-S" || + arg == "-fsyntax-only" || arg == "--version" || arg == "--help" || + arg == "/?" || arg == "--driver-mode=cpp"; + }); +} + auto ClangRunner::Run(llvm::ArrayRef args) -> bool { + // Check the args to see if we have a known target-independent command. If so, + // directly dispatch it to avoid the cost of building the target resource + // directory. // TODO: Maybe handle response file expansion similar to the Clang CLI? + if (args.empty() || args[0].starts_with("-cc1") || IsNonLinkCommand(args)) { + return RunTargetIndependentCommand(args); + } + // Otherwise, we need to build a target resource directory. + CARBON_VLOG("Building target resource dir...\n"); + std::string target = ComputeClangTarget(args); + std::string resource_dir_path = BuildTargetResourceDir(target); + + return RunInternal(args, target, resource_dir_path); +} + +auto ClangRunner::RunTargetIndependentCommand( + llvm::ArrayRef args) -> bool { + std::string target = ComputeClangTarget(args); + return RunInternal(args, target, std::nullopt); +} + +auto ClangRunner::BuildTargetResourceDir(llvm::StringRef target) + -> std::string { + // Disable any leaking of memory while building the target resource dir, and + // restore the previous setting at the end. + auto restore_leak_flag = llvm::make_scope_exit( + [&, orig_flag = enable_leaking_] { enable_leaking_ = orig_flag; }); + enable_leaking_ = false; + + MakeTmpDir(); + std::filesystem::path resource_dir_path = tmp_dir_ / "clang"; + std::error_code ec; + std::filesystem::create_directory(resource_dir_path, ec); + + // Symlink the installation's `include` and `share` directories. + std::filesystem::path install_resource_path = + installation_->clang_resource_path(); + std::filesystem::create_symlink(install_resource_path / "include", + resource_dir_path / "include", ec); + std::filesystem::create_symlink(install_resource_path / "share", + resource_dir_path / "share", ec); + + std::filesystem::path lib_path = resource_dir_path / "lib" / target.str(); + std::filesystem::create_directories(lib_path, ec); + + llvm::Triple target_triple(target); + CARBON_CHECK(!target_triple.isOSWindows(), + "TODO: Windows runtimes are untested and not yet supported."); + + // For Linux targets, the system libc (typically glibc) doesn't necessarily + // provide the CRT begin/end files, and so we need to build them. + if (target_triple.isOSLinux()) { + std::filesystem::path begin_o_path = lib_path / "clang_rt.crtbegin.o"; + BuildCrtFile(target, RuntimeSources::CrtBegin, begin_o_path.native()); + + std::filesystem::path end_o_path = lib_path / "clang_rt.crtend.o"; + BuildCrtFile(target, RuntimeSources::CrtEnd, end_o_path.native()); + } + + // We need a separate directory for object files used in the builtins archive. + std::filesystem::path objs_path = tmp_dir_ / "_objs"; + std::filesystem::create_directory(objs_path, ec); + std::filesystem::path builtins_a_path = lib_path / "libclang_rt.builtins.a"; + BuildBuiltinsLib(target, target_triple, objs_path, builtins_a_path.native()); + + return resource_dir_path; +} + +// Handles building the Clang driver and passing the arguments down to it. +auto ClangRunner::RunInternal( + llvm::ArrayRef args, llvm::StringRef target, + std::optional target_resource_dir_path) -> bool { std::string clang_path = installation_->clang_path(); // Rebuild the args as C-string args. @@ -60,6 +221,7 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { llvm::SmallVector cstr_args = BuildCStrArgs("Clang", clang_path, "-v", args, cstr_arg_storage); + // Handle special dispatch for CC1 commands as they don't use the driver. if (!args.empty() && args[0].starts_with("-cc1")) { CARBON_VLOG("Calling clang_main for cc1..."); // cstr_args[0] will be the `clang_path` so we don't need the prepend arg. @@ -89,8 +251,31 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { /*ShouldOwnClient=*/false); clang::ProcessWarningOptions(diagnostics, *diagnostic_options, *fs_); - clang::driver::Driver driver(clang_path, target_, diagnostics, - "clang LLVM compiler", fs_); + // Note that we configure the driver's *default* target here, not the expected + // target as that will be parsed out of the command line below. + clang::driver::Driver driver(clang_path, llvm::sys::getDefaultTargetTriple(), + diagnostics, "clang LLVM compiler", fs_); + + llvm::Triple target_triple(target); + + // We need to set an SDK system root on macOS by default. Setting it here + // allows a custom sysroot to still be specified on the command line. + // + // TODO: A different system root should be used for iOS, watchOS, tvOS. + // Currently, we're only targeting macOS support though. + if (target_triple.isMacOSX()) { + // This is the default CLT system root, shown by `xcrun --show-sdk-path`. + // We hard code it here to avoid the overhead of subprocessing to `xcrun` on + // each Clang invocation, but this may need to be updated to search or + // reflect macOS versions if this changes in the future. + driver.SysRoot = "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk"; + } + + // If we have a target-specific resource directory, set it as the default + // here. + if (target_resource_dir_path) { + driver.ResourceDir = target_resource_dir_path->str(); + } // Configure the install directory to find other tools and data files. // @@ -134,6 +319,14 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { return false; } + // Make sure our target detection matches Clang's. Sadly, we can't just reuse + // Clang's as it is available too late. + // TODO: Use nice diagnostics here rather than a check failure. + CARBON_CHECK(llvm::Triple(target) == llvm::Triple(driver.getTargetTriple()), + "Mismatch between the expected target '{0}' and the one " + "computed by Clang '{1}'", + target, driver.getTargetTriple()); + CARBON_VLOG("Running Clang driver...\n"); llvm::SmallVector> @@ -157,4 +350,187 @@ auto ClangRunner::Run(llvm::ArrayRef args) -> bool { return result == 0 && failing_commands.empty(); } +auto ClangRunner::MakeTmpDir() -> void { + if (!tmp_dir_.empty()) { + return; + } + + // Make a temporary directory exclusive to this builder. + std::filesystem::path tmp_dir; + if (const char* tmpdir_env = getenv("TEST_TMPDIR"); tmpdir_env != nullptr) { + tmp_dir = tmpdir_env; + } else { + tmp_dir = std::filesystem::temp_directory_path(); + } + + // We use the object's address to avoid collisions between two instances, + // whether from two processes or two instances in this process. This is a bit + // of a hack, but should be fine until we implement caching at which + // point we'll need to do something better here. + auto salt = reinterpret_cast(this); + std::string name = llvm::formatv("carbon-runtimes.{0:x}", salt).str(); + std::error_code ec; + int attempt = 0; + // Limit how many tries we make. + constexpr int MaxAttempts = 128; + while (!std::filesystem::create_directory(tmp_dir / name, ec)) { + CARBON_CHECK(ec != std::errc::file_exists, + "Unable to create temporary directory: {0}", ec.message()); + ++attempt; + CARBON_CHECK(attempt < MaxAttempts, + "Tried too many times to create a temporary directory, last " + "attempted path: {0}", + tmp_dir / name); + + // Try appending an attempt string. + name = llvm::formatv("carbon-runtimes.{0:x}.{1}", salt, attempt).str(); + } + + // When we successfully create a temporary directory, commit it to the object. + tmp_dir_ = tmp_dir / name; +} + +auto ClangRunner::BuildCrtFile(llvm::StringRef target, llvm::StringRef src_file, + llvm::StringRef out_path) -> void { + llvm::SmallString<256> src_path(installation_->llvm_runtime_srcs()); + llvm::sys::path::append(src_path, llvm::sys::path::Style::posix, src_file); + CARBON_VLOG("Building `{0}' from `{1}`...\n", out_path, src_path); + + std::string target_arg = llvm::formatv("--target={0}", target).str(); + CARBON_CHECK(RunTargetIndependentCommand({ + "-no-canonical-prefixes", + target_arg, + "-DCRT_HAS_INITFINI_ARRAY", + "-DEH_USE_FRAME_REGISTRY", + "-O3", + "-fPIC", + "-ffreestanding", + "-std=c11", + "-w", + "-c", + "-o", + out_path, + src_path, + })); +} + +auto ClangRunner::CollectBuiltinsSrcFiles(const llvm::Triple& target_triple) + -> llvm::SmallVector { + llvm::SmallVector src_files; + auto append_src_files = + [&](auto input_srcs, + llvm::function_ref filter_out = {}) { + for (llvm::StringRef input_src : input_srcs) { + if (!input_src.ends_with(".c") && !input_src.ends_with(".S")) { + // Not a compiled file. + continue; + } + if (filter_out && filter_out(input_src)) { + // Filtered out. + continue; + } + + src_files.push_back(input_src); + } + }; + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsGenericSrcs)); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsBf16Srcs)); + if (target_triple.isArch64Bit()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsTfSrcs)); + } + auto filter_out_chkstk = [&](llvm::StringRef src) { + return !target_triple.isOSWindows() || !src.ends_with("chkstk.S"); + }; + if (target_triple.isAArch64()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsAarch64Srcs), + filter_out_chkstk); + } else if (target_triple.isX86()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86ArchSrcs)); + if (target_triple.isArch64Bit()) { + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsX86_64Srcs), + filter_out_chkstk); + } else { + // TODO: This should be turned into a nice user-facing diagnostic about an + // unsupported target. + CARBON_CHECK( + target_triple.isArch32Bit(), + "The Carbon toolchain doesn't currently support 16-bit x86."); + append_src_files(llvm::ArrayRef(RuntimeSources::BuiltinsI386Srcs), + filter_out_chkstk); + } + } else { + // TODO: This should be turned into a nice user-facing diagnostic about an + // unsupported target. + CARBON_FATAL("Target architecture is not supported: {0}", + target_triple.str()); + } + return src_files; +} + +auto ClangRunner::BuildBuiltinsFile(llvm::StringRef target, + llvm::StringRef src_file, + llvm::StringRef out_path) -> void { + llvm::SmallString<256> src_path(installation_->llvm_runtime_srcs()); + llvm::sys::path::append(src_path, llvm::sys::path::Style::posix, src_file); + CARBON_VLOG("Building `{0}' from `{1}`...\n", out_path, src_path); + + std::string target_arg = llvm::formatv("--target={0}", target).str(); + CARBON_CHECK(RunTargetIndependentCommand({ + "-no-canonical-prefixes", + target_arg, + "-O3", + "-fPIC", + "-ffreestanding", + "-fno-builtin", + "-fomit-frame-pointer", + "-fvisibility=hidden", + "-std=c11", + "-w", + "-c", + "-o", + out_path, + src_path, + })); +} + +auto ClangRunner::BuildBuiltinsLib(llvm::StringRef target, + const llvm::Triple& target_triple, + const std::filesystem::path& objs_path, + llvm::StringRef out_path) -> void { + llvm::SmallVector src_files = + CollectBuiltinsSrcFiles(target_triple); + + llvm::SmallVector objs; + objs.reserve(src_files.size()); + for (llvm::StringRef src_file : src_files) { + std::filesystem::path obj_path = objs_path / src_file.str(); + std::error_code ec; + std::filesystem::create_directories(obj_path.parent_path(), ec); + obj_path += ".o"; + BuildBuiltinsFile(target, src_file, obj_path.native()); + + llvm::Expected obj = + llvm::NewArchiveMember::getFile(obj_path.native(), + /*Deterministic=*/true); + CARBON_CHECK(obj, "TODO: Diagnose this: {0}", + llvm::fmt_consume(obj.takeError())); + objs.push_back(std::move(*obj)); + } + + // Now build an archive out of the `.o` files for the builtins. + std::error_code ec; + llvm::raw_fd_ostream builtins_a_os(out_path, ec); + // TODO: Here and elsewhere we check for filesystem errors, and we should + // revisit this with some more structured approach for reporting errors due to + // a broken environment (filesystem misbehavior) as opposed to user-error. A + // check failure and stack trace is likely not how we want to handle that. + CARBON_CHECK(!ec, "Unable to open archive for writing: {0}", ec.message()); + llvm::Error archive_err = llvm::writeArchiveToStream( + builtins_a_os, objs, llvm::SymtabWritingMode::NormalSymtab, + target_triple.isOSDarwin() ? llvm::object::Archive::K_DARWIN + : llvm::object::Archive::K_GNU, + /*Deterministic=*/true, /*Thin=*/false); + CARBON_CHECK(!archive_err, "Error writing archive: {0}", archive_err); +} + } // namespace Carbon diff --git a/toolchain/driver/clang_runner.h b/toolchain/driver/clang_runner.h index 854c5bb85c224..6649b02fc998d 100644 --- a/toolchain/driver/clang_runner.h +++ b/toolchain/driver/clang_runner.h @@ -5,11 +5,14 @@ #ifndef CARBON_TOOLCHAIN_DRIVER_CLANG_RUNNER_H_ #define CARBON_TOOLCHAIN_DRIVER_CLANG_RUNNER_H_ +#include + #include "clang/Basic/DiagnosticIDs.h" #include "common/ostream.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/VirtualFileSystem.h" +#include "llvm/TargetParser/Triple.h" #include "toolchain/driver/tool_runner_base.h" #include "toolchain/install/install_paths.h" @@ -43,13 +46,36 @@ class ClangRunner : ToolRunnerBase { // // If `verbose` is passed as true, will enable verbose logging to the // `err_stream` both from the runner and Clang itself. - ClangRunner(const InstallPaths* install_paths, llvm::StringRef target, + ClangRunner(const InstallPaths* install_paths, llvm::IntrusiveRefCntPtr fs, llvm::raw_ostream* vlog_stream = nullptr); + ~ClangRunner(); // Run Clang with the provided arguments. + // + // This works to support all of the Clang commandline, including commands that + // use target-dependent resources like linking. When it detects such commands, + // it will build the necessary resources before running the command. auto Run(llvm::ArrayRef args) -> bool; + // Run Clang with the provided arguments and without any target-dependent + // resources. + // + // This method can be used to avoid building target-dependent resources when + // unnecessary, but not all Clang command lines will work correctly. + // Specifically, compile-only commands will typically work, while linking will + // not. + auto RunTargetIndependentCommand(llvm::ArrayRef args) + -> bool; + + // Builds the target-specific resource directory for Clang. + // + // There is a resource directory installed along side the Clang binary that + // contains all the target independent files such as headers. However, for + // target-specific files like runtimes, we build those on demand here and + // return the path. + auto BuildTargetResourceDir(llvm::StringRef target) -> std::string; + // Enable leaking memory. // // Clang can avoid deallocating some of its memory to improve compile time. @@ -62,7 +88,38 @@ class ClangRunner : ToolRunnerBase { auto EnableLeakingMemory() -> void { enable_leaking_ = true; } private: - llvm::StringRef target_; + auto RunInternal(llvm::ArrayRef args, llvm::StringRef target, + std::optional target_resource_dir_path) + -> bool; + + // TODO: Replace this with a filesystem cache, and proper management of + // concurrent cache access. There are a bunch of hacks in this function that + // aren't realistically reliable for larger use, and only designed as in + // interim state to separate the implementation of the caching behavior. + auto MakeTmpDir() -> void; + + // Helper to compile a single file of the CRT runtimes. + auto BuildCrtFile(llvm::StringRef target, llvm::StringRef src_file, + llvm::StringRef out_path) -> void; + + // Returns the target-specific source files for the builtins runtime library. + auto CollectBuiltinsSrcFiles(const llvm::Triple& target_triple) + -> llvm::SmallVector; + + // Helper to compile a single file of the compiler builtins runtimes. + auto BuildBuiltinsFile(llvm::StringRef target, llvm::StringRef src_file, + llvm::StringRef out_path) -> void; + + // Builds the builtins runtime library into the provided archive file path, + // using the provided objects path for intermediate object files. + auto BuildBuiltinsLib(llvm::StringRef target, + const llvm::Triple& target_triple, + const std::filesystem::path& objs_path, + llvm::StringRef out_path) -> void; + + // If non-empty, a temp directory for this instance which will be cleaned up + // on destruction. Set by `MakeTmpDir`. + std::filesystem::path tmp_dir_; llvm::IntrusiveRefCntPtr fs_; llvm::IntrusiveRefCntPtr diagnostic_ids_; diff --git a/toolchain/driver/clang_runner_test.cpp b/toolchain/driver/clang_runner_test.cpp index b2ab6e72e6ec1..d87aba3c60013 100644 --- a/toolchain/driver/clang_runner_test.cpp +++ b/toolchain/driver/clang_runner_test.cpp @@ -17,26 +17,58 @@ #include "common/raw_string_ostream.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Program.h" #include "llvm/TargetParser/Host.h" #include "testing/base/capture_std_streams.h" #include "testing/base/file_helpers.h" #include "testing/base/global_exe_path.h" +#include "toolchain/driver/llvm_runner.h" namespace Carbon { namespace { +using ::testing::Eq; using ::testing::HasSubstr; +using ::testing::IsSupersetOf; using ::testing::StrEq; -TEST(ClangRunnerTest, Version) { - RawStringOstream test_os; - const auto install_paths = +// NOLINTNEXTLINE(modernize-use-trailing-return-type): Macro based function. +MATCHER_P(TextSymbolNamed, name_matcher, "") { + llvm::Expected name = arg.getName(); + if (auto error = name.takeError()) { + *result_listener << "with an error instead of a name: " << error; + return false; + } + if (!testing::ExplainMatchResult(name_matcher, *name, result_listener)) { + return false; + } + // We have to dig out the section to determine if this was a text symbol. + auto expected_section_it = arg.getSection(); + if (auto error = expected_section_it.takeError()) { + *result_listener << "without a section: " << error; + return false; + } + llvm::object::SectionRef section = **expected_section_it; + if (!section.isText()) { + *result_listener << "in the non-text section: " << *section.getName(); + return false; + } + return true; +} + +class ClangRunnerTest : public ::testing::Test { + public: + InstallPaths install_paths_ = InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &test_os); + llvm::IntrusiveRefCntPtr vfs_ = + llvm::vfs::getRealFileSystem(); +}; + +TEST_F(ClangRunnerTest, Version) { + RawStringOstream test_os; + ClangRunner runner(&install_paths_, vfs_, &test_os); std::string out; std::string err; @@ -51,65 +83,22 @@ TEST(ClangRunnerTest, Version) { // Flush and get the captured stdout to test that this command worked. // We don't care about any particular version, just that it is printed. EXPECT_THAT(out, HasSubstr("clang version")); - // The target should match what we provided. - EXPECT_THAT(out, HasSubstr((llvm::Twine("Target: ") + target).str())); + // The target should match the LLVM default. + EXPECT_THAT(out, HasSubstr((llvm::Twine("Target: ") + + llvm::sys::getDefaultTargetTriple()) + .str())); // Clang's install should be our private LLVM install bin directory. EXPECT_THAT(out, HasSubstr(std::string("InstalledDir: ") + - install_paths.llvm_install_bin())); + install_paths_.llvm_install_bin())); } -// It's hard to write a portable and reliable unittest for all the layers of the -// Clang driver because they work hard to interact with the underlying -// filesystem and operating system. For now, we just check that a link command -// is echoed back with plausible contents. -// -// TODO: We should eventually strive to have a more complete setup that lets us -// test more complete Clang functionality here. -TEST(ClangRunnerTest, LinkCommandEcho) { - // Just create some empty files to use in a synthetic link command below. - std::filesystem::path foo_file = *Testing::WriteTestFile("foo.o", ""); - std::filesystem::path bar_file = *Testing::WriteTestFile("bar.o", ""); - - const auto install_paths = - InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); - RawStringOstream verbose_out; - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &verbose_out); - std::string out; - std::string err; - EXPECT_TRUE(Testing::CallWithCapturedOutput( - out, err, - [&] { - return runner.Run( - {"-###", "-o", "binary", foo_file.string(), bar_file.string()}); - })) - << "Verbose output from runner:\n" - << verbose_out.TakeStr() << "\n"; - verbose_out.clear(); - - // Because we use `-###' above, we should just see the command that the Clang - // driver would have run in a subprocess. This will be very architecture - // dependent and have lots of variety, but we expect to see both file strings - // in it the command at least. - EXPECT_THAT(err, HasSubstr(foo_file.string())) << err; - EXPECT_THAT(err, HasSubstr(bar_file.string())) << err; - - // And no non-stderr output should be produced. - EXPECT_THAT(out, StrEq("")); -} - -TEST(ClangRunnerTest, DashC) { +TEST_F(ClangRunnerTest, DashC) { std::filesystem::path test_file = *Testing::WriteTestFile("test.cpp", "int test() { return 0; }"); std::filesystem::path test_output = *Testing::WriteTestFile("test.o", ""); - const auto install_paths = - InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); RawStringOstream verbose_out; - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &verbose_out); + ClangRunner runner(&install_paths_, vfs_, &verbose_out); std::string out; std::string err; EXPECT_TRUE(Testing::CallWithCapturedOutput( @@ -127,7 +116,7 @@ TEST(ClangRunnerTest, DashC) { EXPECT_THAT(err, StrEq("")); } -TEST(ClangRunnerTest, BuitinHeaders) { +TEST_F(ClangRunnerTest, BuitinHeaders) { std::filesystem::path test_file = *Testing::WriteTestFile("test.c", R"cpp( #include @@ -137,12 +126,8 @@ TEST(ClangRunnerTest, BuitinHeaders) { )cpp"); std::filesystem::path test_output = *Testing::WriteTestFile("test.o", ""); - const auto install_paths = - InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); RawStringOstream verbose_out; - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &verbose_out); + ClangRunner runner(&install_paths_, vfs_, &verbose_out); std::string out; std::string err; EXPECT_TRUE(Testing::CallWithCapturedOutput( @@ -160,10 +145,7 @@ TEST(ClangRunnerTest, BuitinHeaders) { EXPECT_THAT(err, StrEq("")); } -TEST(ClangRunnerTest, CompileMultipleFiles) { - const auto install_paths = - InstallPaths::MakeForBazelRunfiles(Testing::GetExePath()); - +TEST_F(ClangRunnerTest, CompileMultipleFiles) { // Memory leaks and other errors from running Clang can at times only manifest // with repeated compilations. Use a lambda to just do a series of compiles. auto compile = [&](llvm::StringRef filename, llvm::StringRef source) { @@ -172,9 +154,7 @@ TEST(ClangRunnerTest, CompileMultipleFiles) { std::filesystem::path output = *Testing::WriteTestFile(output_file, ""); RawStringOstream verbose_out; - std::string target = llvm::sys::getDefaultTargetTriple(); - auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner runner(&install_paths, target, vfs, &verbose_out); + ClangRunner runner(&install_paths_, vfs_, &verbose_out); std::string out; std::string err; EXPECT_TRUE(Testing::CallWithCapturedOutput( @@ -195,5 +175,117 @@ TEST(ClangRunnerTest, CompileMultipleFiles) { compile("test3.cpp", "int test3() { return 0; }"); } +TEST_F(ClangRunnerTest, BuildResourceDir) { + ClangRunner runner(&install_paths_, vfs_, &llvm::errs()); + + // Note that we can't test arbitrary targets here as we need to be able to + // compile the builtin functions for the target. We use the default target as + // the most likely to pass. + std::string target = llvm::sys::getDefaultTargetTriple(); + llvm::Triple target_triple(target); + std::filesystem::path resource_path = runner.BuildTargetResourceDir(target); + + // For Linux we can directly check the CRT begin/end object files. + if (target_triple.isOSLinux()) { + std::filesystem::path crt_begin_path = + resource_path / "lib" / target / "clang_rt.crtbegin.o"; + ASSERT_TRUE(std::filesystem::is_regular_file(crt_begin_path)); + auto begin_result = + llvm::object::ObjectFile::createObjectFile(crt_begin_path.native()); + llvm::object::ObjectFile& crtbegin = *begin_result->getBinary(); + EXPECT_TRUE(crtbegin.isELF()); + EXPECT_TRUE(crtbegin.isObject()); + EXPECT_THAT(crtbegin.getArch(), Eq(target_triple.getArch())); + + llvm::SmallVector symbols(crtbegin.symbols()); + // The first symbol should come from the source file. + EXPECT_THAT(*symbols.front().getName(), Eq("crtbegin.c")); + + // Check for representative symbols of `crtbegin.o` -- we always use + // `.init_array` in our runtimes build so we have predictable functions. + EXPECT_THAT(symbols, IsSupersetOf({TextSymbolNamed("__do_init"), + TextSymbolNamed("__do_fini")})); + + std::filesystem::path crt_end_path = + resource_path / "lib" / target / "clang_rt.crtend.o"; + ASSERT_TRUE(std::filesystem::is_regular_file(crt_end_path)); + auto end_result = + llvm::object::ObjectFile::createObjectFile(crt_end_path.native()); + llvm::object::ObjectFile& crtend = *end_result->getBinary(); + EXPECT_TRUE(crtend.isELF()); + EXPECT_TRUE(crtend.isObject()); + EXPECT_THAT(crtend.getArch(), Eq(target_triple.getArch())); + + // Just check the source file symbol, not much of interest in the end. + llvm::object::SymbolRef crtend_front_symbol = *crtend.symbol_begin(); + EXPECT_THAT(*crtend_front_symbol.getName(), Eq("crtend.c")); + } + + // Across all targets, check that the builtins archive exists, and contains a + // relevant symbol by running the `llvm-nm` tool over it. Using `nm` rather + // than directly inspecting the objects is a bit awkward, but lets us easily + // ignore the wrapping in an archive file. + std::filesystem::path builtins_path = + resource_path / "lib" / target / "libclang_rt.builtins.a"; + LLVMRunner llvm_runner(&install_paths_, &llvm::errs()); + std::string out; + std::string err; + EXPECT_TRUE(Testing::CallWithCapturedOutput(out, err, [&] { + return llvm_runner.Run(LLVMTool::Nm, {builtins_path.native()}); + })); + + // Check that we found a definition of `__mulodi4`, a builtin function + // provided by Compiler-RT, but not `libgcc` historically. Note that on macOS + // there is a leading `_` due to mangling. + EXPECT_THAT(out, HasSubstr(target_triple.isMacOSX() ? "T ___mulodi4\n" + : "T __mulodi4\n")); + + // Check that we don't include the `chkstk` builtins outside of Windows. + if (!target_triple.isOSWindows()) { + EXPECT_THAT(out, Not(HasSubstr("chkstk"))); + } +} + +// It's hard to write a portable and reliable unittest for all the layers of the +// Clang driver because they work hard to interact with the underlying +// filesystem and operating system. For now, we just check that a link command +// is echoed back with plausible contents. +// +// TODO: We should eventually strive to have a more complete setup that lets us +// test more complete Clang functionality here. +TEST_F(ClangRunnerTest, LinkCommandEcho) { + // Just create some empty files to use in a synthetic link command below. + std::filesystem::path foo_file = *Testing::WriteTestFile("foo.o", ""); + std::filesystem::path bar_file = *Testing::WriteTestFile("bar.o", ""); + + RawStringOstream verbose_out; + ClangRunner runner(&install_paths_, vfs_, &verbose_out); + std::string out; + std::string err; + EXPECT_TRUE(Testing::CallWithCapturedOutput( + out, err, + [&] { + // Note that we use the target independent run command here because + // we're just getting the echo-ed output back. For this to actually + // link, we'd need to have the target-dependent resources, but those are + // expensive to build so we only want to test them once (above). + return runner.RunTargetIndependentCommand( + {"-###", "-o", "binary", foo_file.string(), bar_file.string()}); + })) + << "Verbose output from runner:\n" + << verbose_out.TakeStr() << "\n"; + verbose_out.clear(); + + // Because we use `-###' above, we should just see the command that the Clang + // driver would have run in a subprocess. This will be very architecture + // dependent and have lots of variety, but we expect to see both file strings + // in it the command at least. + EXPECT_THAT(err, HasSubstr(foo_file.string())) << err; + EXPECT_THAT(err, HasSubstr(bar_file.string())) << err; + + // And no non-stderr output should be produced. + EXPECT_THAT(out, StrEq("")); +} + } // namespace } // namespace Carbon diff --git a/toolchain/driver/clang_subcommand.cpp b/toolchain/driver/clang_subcommand.cpp index 6911cc75f33fe..40e458db23f14 100644 --- a/toolchain/driver/clang_subcommand.cpp +++ b/toolchain/driver/clang_subcommand.cpp @@ -46,8 +46,7 @@ ClangSubcommand::ClangSubcommand() : DriverSubcommand(SubcommandInfo) {} // add more. // https://github.com/llvm/llvm-project/blob/main/clang/tools/driver/driver.cpp auto ClangSubcommand::Run(DriverEnv& driver_env) -> DriverResult { - std::string target = llvm::sys::getDefaultTargetTriple(); - ClangRunner runner(driver_env.installation, target, driver_env.fs, + ClangRunner runner(driver_env.installation, driver_env.fs, driver_env.vlog_stream); // Don't run Clang when fuzzing, it is known to not be reliable under fuzzing diff --git a/toolchain/driver/link_subcommand.cpp b/toolchain/driver/link_subcommand.cpp index 60e53f1369b2f..ef15801f40405 100644 --- a/toolchain/driver/link_subcommand.cpp +++ b/toolchain/driver/link_subcommand.cpp @@ -94,6 +94,11 @@ auto LinkSubcommand::Run(DriverEnv& driver_env) -> DriverResult { // We link using a C++ mode of the driver. clang_args.push_back("--driver-mode=g++"); + // Pass the target down to Clang to pick up the correct defaults. + std::string target_arg = + llvm::formatv("--target={0}", options_.codegen_options.target).str(); + clang_args.push_back(target_arg); + // Use LLD, which we provide in our install directory, for linking. clang_args.push_back("-fuse-ld=lld"); @@ -113,8 +118,8 @@ auto LinkSubcommand::Run(DriverEnv& driver_env) -> DriverResult { clang_args.append(options_.object_filenames.begin(), options_.object_filenames.end()); - ClangRunner runner(driver_env.installation, options_.codegen_options.target, - driver_env.fs, driver_env.vlog_stream); + ClangRunner runner(driver_env.installation, driver_env.fs, + driver_env.vlog_stream); return {.success = runner.Run(clang_args)}; } diff --git a/toolchain/driver/lld_runner_test.cpp b/toolchain/driver/lld_runner_test.cpp index 539bd22ce4920..7b55ee58305f6 100644 --- a/toolchain/driver/lld_runner_test.cpp +++ b/toolchain/driver/lld_runner_test.cpp @@ -85,7 +85,7 @@ static auto CompileTwoSources(const InstallPaths& install_paths, // First compile the two source files to `.o` files with Clang. RawStringOstream verbose_out; auto vfs = llvm::vfs::getRealFileSystem(); - ClangRunner clang(&install_paths, target, vfs, &verbose_out); + ClangRunner clang(&install_paths, vfs, &verbose_out); std::string target_arg = llvm::formatv("--target={0}", target).str(); std::string out; std::string err; diff --git a/toolchain/install/BUILD b/toolchain/install/BUILD index a8a10fe297049..d8e6b96687f90 100644 --- a/toolchain/install/BUILD +++ b/toolchain/install/BUILD @@ -10,6 +10,7 @@ load("@rules_python//python:defs.bzl", "py_test") load("//bazel/cc_rules:defs.bzl", "cc_binary", "cc_library", "cc_test") load("//bazel/manifest:defs.bzl", "manifest") load("//toolchain/base:llvm_tools.bzl", "LLVM_MAIN_TOOLS", "LLVM_TOOL_ALIASES") +load("//toolchain/base:runtime_sources.bzl", "BUILTINS_FILEGROUPS", "CRT_FILES") load("install_filegroups.bzl", "install_filegroup", "install_symlink", "install_target", "make_install_filegroups") load("pkg_helpers.bzl", "pkg_naming_variables", "pkg_tar_and_test") @@ -33,6 +34,7 @@ cc_library( "//common:error", "//toolchain/base:llvm_tools", "@bazel_tools//tools/cpp/runfiles", + "@llvm-project//clang:basic", "@llvm-project//llvm:Support", ], ) @@ -146,6 +148,13 @@ filegroup( srcs = ["@llvm-project//clang:builtin_headers_gen"], ) +# Collect the runtime sources that are collectively installed into the +# `builtins` directory. +filegroup( + name = "clang_builtins_runtimes", + srcs = CRT_FILES.values() + BUILTINS_FILEGROUPS.values(), +) + # Given a root `prefix_root`, the hierarchy looks like: # # - prefix_root/bin: Binaries intended for direct use. @@ -181,6 +190,9 @@ install_dirs = { "lib/carbon/llvm/lib/clang/" + LLVM_VERSION_MAJOR: [ install_filegroup("include", ":clang_headers", "staging/include/"), ], + "lib/carbon/llvm/lib/clang/" + LLVM_VERSION_MAJOR + "/src": [ + install_filegroup("builtins", ":clang_builtins_runtimes", "lib/builtins/"), + ], } make_install_filegroups( @@ -193,7 +205,7 @@ make_install_filegroups( py_test( name = "llvm_symlinks_test", - size = "small", + size = "medium", srcs = ["llvm_symlinks_test.py"], data = [":install_data"], ) diff --git a/toolchain/install/busybox_info.h b/toolchain/install/busybox_info.h index 926350e9e2efc..7ad54d90bc08d 100644 --- a/toolchain/install/busybox_info.h +++ b/toolchain/install/busybox_info.h @@ -35,8 +35,18 @@ struct BusyboxInfo { // If unable to locate a plausible busybox binary, returns an error instead. inline auto GetBusyboxInfo(llvm::StringRef argv0) -> ErrorOr { // Check for an override of `argv[0]` from the environment and apply it. + std::string argv0_override_storage; if (const char* argv0_override = getenv(Argv0OverrideEnv)) { - argv0 = argv0_override; + // Capture the override into local storage here so we can clear it from the + // environment. + argv0_override_storage = argv0_override; + argv0 = argv0_override_storage; + + // Unset the override environment variable so that subsequent subprocessing + // back into the busybox doesn't pick it up. This is essential to, for + // example, allow the `clang` driver to subprocess back into a `clang` + // busyboxed binary with an internal CC1 commandline. + unsetenv(Argv0OverrideEnv); } BusyboxInfo info = {.bin_path = argv0.str(), .mode = std::nullopt}; diff --git a/toolchain/install/busybox_info_test.cpp b/toolchain/install/busybox_info_test.cpp index dd8010c738152..75710c018103a 100644 --- a/toolchain/install/busybox_info_test.cpp +++ b/toolchain/install/busybox_info_test.cpp @@ -290,8 +290,6 @@ TEST_F(BusyboxInfoTest, RejectSymlinkInUnrelatedInstall) { TEST_F(BusyboxInfoTest, EnvBinaryPathOverride) { // The test should not have this environment variable set. ASSERT_THAT(getenv(Argv0OverrideEnv), Eq(nullptr)); - // Clean up this environment variable when this test finishes. - auto _ = llvm::make_scope_exit([] { unsetenv(Argv0OverrideEnv); }); // Set the environment to our actual busybox. auto busybox = MakeFile(dir_ / "carbon-busybox"); @@ -301,6 +299,9 @@ TEST_F(BusyboxInfoTest, EnvBinaryPathOverride) { ASSERT_TRUE(info.ok()) << info.error(); EXPECT_THAT(info->bin_path, Eq(busybox)); EXPECT_THAT(info->mode, Eq(std::nullopt)); + + // Make sure that we cleaned up the environment afterward. + EXPECT_THAT(getenv(Argv0OverrideEnv), Eq(nullptr)); } } // namespace diff --git a/toolchain/install/install_paths.cpp b/toolchain/install/install_paths.cpp index 0bd18db6e2663..1ee478dfe0871 100644 --- a/toolchain/install/install_paths.cpp +++ b/toolchain/install/install_paths.cpp @@ -7,6 +7,7 @@ #include #include +#include "clang/Basic/Version.h" #include "common/check.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -204,4 +205,22 @@ auto InstallPaths::llvm_tool_path(LLVMTool tool) const -> std::string { return path.str().str(); } +auto InstallPaths::clang_resource_path() const -> std::string { + llvm::SmallString<256> path(prefix_); + // TODO: Adjust this to work equally well on Windows. + llvm::sys::path::append( + path, llvm::sys::path::Style::posix, + "lib/carbon/llvm/lib/clang/" CLANG_VERSION_MAJOR_STRING); + return path.str().str(); +} + +auto InstallPaths::llvm_runtime_srcs() const -> std::string { + llvm::SmallString<256> path(prefix_); + // TODO: Adjust this to work equally well on Windows. + llvm::sys::path::append( + path, llvm::sys::path::Style::posix, + "lib/carbon/llvm/lib/clang/" CLANG_VERSION_MAJOR_STRING "/src"); + return path.str().str(); +} + } // namespace Carbon diff --git a/toolchain/install/install_paths.h b/toolchain/install/install_paths.h index fa3bc9e24f3cd..a1220d06bca35 100644 --- a/toolchain/install/install_paths.h +++ b/toolchain/install/install_paths.h @@ -96,6 +96,12 @@ class InstallPaths { // The path to any of the LLVM tools. auto llvm_tool_path(LLVMTool tool) const -> std::string; + // The path to the Clang resources. + auto clang_resource_path() const -> std::string; + + // The path to the root of LLVM runtime sources. + auto llvm_runtime_srcs() const -> std::string; + private: friend class InstallPathsTestPeer; diff --git a/toolchain/install/llvm_symlinks_test.py b/toolchain/install/llvm_symlinks_test.py index 4621da6339aa8..73b3b98b33b5a 100644 --- a/toolchain/install/llvm_symlinks_test.py +++ b/toolchain/install/llvm_symlinks_test.py @@ -11,6 +11,7 @@ from pathlib import Path import subprocess import os +import platform import sys import unittest @@ -26,8 +27,6 @@ def setUp(self) -> None: def get_link_cmd(self, clang: Path) -> list[str | Path]: return [ clang, - # We pick an arbitrary linux target to get stable results. - "--target=aarch64-unknown-linux-gnu", # Verbose printing to help with debugging. "-v", # Print out the link command rather than running it. @@ -39,27 +38,46 @@ def get_link_cmd(self, clang: Path) -> list[str | Path]: self.test_o_file, ] + def unsupported(self, stderr: str) -> None: + self.fail(f"Unsupported platform '{platform.uname()}':\n{stderr}") + + # Note that we can't test `clang` vs. `clang++` portably. The only commands + # with useful differences are _link_ commands, and those need to build + # runtime libraries on demand, which requires the host to be able to compile + # and link for the target. Instead, we test linking with the default target + # (the host), as that is the one that should reliably work if we're + # developing Carbon, and encode all the different platform results in the + # test expectations. def test_clang(self) -> None: bin = self.install_root / "lib/carbon/llvm/bin/clang" + # Most errors are caught by ensuring the command succeeds. run = subprocess.run( self.get_link_cmd(bin), check=True, capture_output=True, text=True ) - # Check that we do have a plausible link command. - self.assertRegex(run.stderr, r'"-m" "aarch64linux"') - # Ensure it doesn't contain the C++ standard library. - self.assertNotRegex(run.stderr, r'"-lstdc++"') + # Also ensure that it correctly didn't imply a C++ link. + if platform.system() == "Linux": + self.assertNotRegex(run.stderr, r'"-lstdc\+\+"') + elif platform.system() == "Darwin": + self.assertNotRegex(run.stderr, r'"-lc\+\+"') + else: + self.unsupported(run.stderr) + # Note that we can't test `clang` vs. `clang++` portably. See the comment on + # `test_clang` for details. def test_clangplusplus(self) -> None: bin = self.install_root / "lib/carbon/llvm/bin/clang++" run = subprocess.run( self.get_link_cmd(bin), check=True, capture_output=True, text=True ) - # Check that we do have a plausible link command. - self.assertRegex(run.stderr, r'"-m" "aarch64linux"') - # Ensure it doesn't contain the C++ standard library. - self.assertNotRegex(run.stderr, r'"-lstdc++"') + # Ensure that this binary _does_ imply a C++ link. + if platform.system() == "Linux": + self.assertRegex(run.stderr, r'"-lstdc\+\+"') + elif platform.system() == "Darwin": + self.assertRegex(run.stderr, r'"-lc\+\+"') + else: + self.unsupported(run.stderr) def test_clang_cl(self) -> None: bin = self.install_root / "lib/carbon/llvm/bin/clang-cl"