@@ -674,9 +674,8 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args,
674
674
// because it only increases amount of code for device compiler to handle,
675
675
// without any actual benefits.
676
676
// TODO: Try to extend this feature for non-Intel GPUs.
677
- if ((!Args.hasFlag (OPT_no_sycl_remove_unused_external_funcs,
678
- OPT_sycl_remove_unused_external_funcs, false ) &&
679
- !Triple.isNativeCPU ()) &&
677
+ if (!Args.hasFlag (OPT_no_sycl_remove_unused_external_funcs,
678
+ OPT_sycl_remove_unused_external_funcs, false ) &&
680
679
!Args.hasArg (OPT_sycl_allow_device_image_dependencies) &&
681
680
!Triple.isNVPTX () && !Triple.isAMDGPU ())
682
681
PostLinkArgs.push_back (" -emit-only-kernels-as-entry-points" );
@@ -1567,8 +1566,7 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
1567
1566
return ClangPath.takeError ();
1568
1567
1569
1568
llvm::Triple Triple (Args.getLastArgValue (OPT_triple_EQ));
1570
- if (Triple.isNativeCPU ())
1571
- Triple = llvm::Triple (Args.getLastArgValue (OPT_host_triple_EQ));
1569
+ llvm::Triple HostTriple (Args.getLastArgValue (OPT_host_triple_EQ));
1572
1570
1573
1571
StringRef Arch = Args.getLastArgValue (OPT_arch_EQ);
1574
1572
// Create a new file to write the linked device image to. Assume that the
@@ -1585,7 +1583,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
1585
1583
" --no-default-config" ,
1586
1584
" -o" ,
1587
1585
*TempFileOrErr,
1588
- Args.MakeArgString (" --target=" + Triple.getTriple ()),
1586
+ Args.MakeArgString (
1587
+ " --target=" +
1588
+ (Triple.isNativeCPU () ? HostTriple : Triple).getTriple ()),
1589
1589
};
1590
1590
1591
1591
if (!Arch.empty ())
@@ -1602,16 +1602,24 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
1602
1602
{" -Xlinker" ,
1603
1603
Args.MakeArgString (" --plugin-opt=" + StringRef (Arg->getValue ()))});
1604
1604
1605
- if (!Triple.isNVPTX () && !Triple.isSPIRV ())
1605
+ if (!Triple.isNVPTX () && !Triple.isSPIRV () && !Triple. isNativeCPU () )
1606
1606
CmdArgs.push_back (" -Wl,--no-undefined" );
1607
1607
1608
1608
if (IsSYCLKind && Triple.isNVPTX ())
1609
1609
CmdArgs.push_back (" -S" );
1610
+
1611
+ if (IsSYCLKind && Triple.isNativeCPU ()) {
1612
+ CmdArgs.push_back (" -Wno-override-module" );
1613
+ CmdArgs.push_back (" -mllvm" );
1614
+ CmdArgs.push_back (" -sycl-native-cpu-backend" );
1615
+ CmdArgs.push_back (" -c" );
1616
+ }
1617
+
1610
1618
for (StringRef InputFile : InputFiles)
1611
1619
CmdArgs.push_back (InputFile);
1612
1620
1613
1621
// If this is CPU offloading we copy the input libraries.
1614
- if (!Triple.isGPU ()) {
1622
+ if (!Triple.isGPU () && !Triple. isNativeCPU () ) {
1615
1623
CmdArgs.push_back (" -Wl,-Bsymbolic" );
1616
1624
CmdArgs.push_back (" -shared" );
1617
1625
ArgStringList LinkerArgs;
@@ -1664,6 +1672,38 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
1664
1672
Args.MakeArgString (Arg.split (' =' ).second )});
1665
1673
}
1666
1674
1675
+ // link NativeCPU utils lib if needed
1676
+ if (Triple.isNativeCPU ()) {
1677
+ if (auto *A = Args.getLastArg (OPT_sycl_device_library_location_EQ)) {
1678
+ std::string NativeCPUUtilsLib = " " ;
1679
+
1680
+ SmallVector<std::string, 8 > LibraryPaths;
1681
+ for (const auto &Path : A->getValues ()) {
1682
+ SmallString<128 > LPath (Path);
1683
+ if (llvm::sys::fs::exists (LPath)) {
1684
+ LibraryPaths.emplace_back (LPath);
1685
+ }
1686
+ }
1687
+
1688
+ for (auto &LPath : LibraryPaths) {
1689
+ // Call llvm-link without --only-needed to link to the nativecpu_utils
1690
+ // lib
1691
+ const char LibNativeCPUUtilsName[] = " libsycl-nativecpu_utils.bc" ;
1692
+ SmallString<128 > LibNativeCPUUtilsPath (LPath);
1693
+ llvm::sys::path::append (LibNativeCPUUtilsPath, LibNativeCPUUtilsName);
1694
+ if (llvm::sys::fs::exists (LibNativeCPUUtilsPath)) {
1695
+ NativeCPUUtilsLib = LibNativeCPUUtilsPath.str ();
1696
+ break ;
1697
+ }
1698
+ }
1699
+
1700
+ if (NativeCPUUtilsLib != " " ) {
1701
+ CmdArgs.append ({" -Xclang" , " -mlink-bitcode-file" , " -Xclang" ,
1702
+ Args.MakeArgString (NativeCPUUtilsLib)});
1703
+ }
1704
+ }
1705
+ }
1706
+
1667
1707
// The OpenMPOpt pass can introduce new calls and is expensive, we do
1668
1708
// not want this when running CodeGen through clang.
1669
1709
if (Args.hasArg (OPT_clang_backend) || Args.hasArg (OPT_builtin_bitcode_EQ))
@@ -2137,6 +2177,13 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
2137
2177
SplitModules[I].ModuleFilePath = *BundledFileOrErr;
2138
2178
} else {
2139
2179
SplitModules[I].ModuleFilePath = *ClangOutputOrErr;
2180
+ if (Triple.isNativeCPU ()) {
2181
+ // Add to WrappedOutput directly rather than combining this with the
2182
+ // below because WrappedOutput holds references and
2183
+ // SplitModules[I].ModuleFilePath will go out of scope too soon.
2184
+ std::scoped_lock Guard (ImageMtx);
2185
+ WrappedOutput.push_back (*ClangOutputOrErr);
2186
+ }
2140
2187
}
2141
2188
}
2142
2189
0 commit comments