Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 44 additions & 13 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10452,7 +10452,7 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA,
static_cast<const toolchains::SYCLToolChain &>(*TC);
SYCLTC.AddImpliedTargetArgs(TC->getTriple(), Args, BuildArgs, JA, *HostTC,
Arch);
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs);
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs, Arch);
createArgString("compile-opts=");
BuildArgs.clear();
SYCLTC.TranslateLinkerTargetArgs(TC->getTriple(), Args, BuildArgs);
Expand Down Expand Up @@ -11608,27 +11608,58 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
if (!TC->getTriple().isSPIROrSPIRV())
continue;
ArgStringList BuildArgs;
SmallString<128> BackendOptString;
SmallVector<SmallString<128>> BackendOptVec;
SmallString<128> LinkOptString;
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs);
for (const auto &A : BuildArgs)
appendOption(BackendOptString, A);

BuildArgs.clear();
// Build backend options for each target passed via
// -Xsycl-target-backend or
// -Xsycl-target-backend=spir64_gen, spir64_x86_64, intel_gpu_*
// in the form: "-device <arch> <backend_opt>"
for (const Arg *A : Args.filtered(options::OPT_Xsycl_backend_EQ,
options::OPT_Xsycl_backend)) {
StringRef Device = "";
SmallString<128> BackendArgs;
// Handle the OPT_Xsycl_backend_EQ case
if (A->getNumValues() > 1) {
Device = SYCL::gen::resolveGenDevice(A->getValue());
if (Device.empty()) {
// If the target is spir64_gen, the device name needs to be
// extracted from the backend arguments. If the target is
// spir64_x86_64, the Device value returned by extractDeviceFromArg
// will be an empty string.
Device = SYCL::gen::extractDeviceFromArg(A->getValue(1));
} else {
// If target is intel_gpu_*, "-device <arch>"
// is appended to BackendArgs.
appendOption(BackendArgs, "-device " + Device.str());
}
}
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs,
Device);
for (const auto &BA : BuildArgs) {
appendOption(BackendArgs, BA);
}
BackendOptVec.push_back(BackendArgs);
BuildArgs.clear();
}

SYCLTC.TranslateLinkerTargetArgs(TC->getTriple(), Args, BuildArgs);
for (const auto &A : BuildArgs) {
if (TC->getTriple().getSubArch() == llvm::Triple::NoSubArch)
appendOption(LinkOptString, A);
else
else {
// For AOT, combine the Backend and Linker strings into one.
appendOption(BackendOptString, A);
for (SmallString<128> &BackendArgs : BackendOptVec)
appendOption(BackendArgs, A);
}
}

if (!BackendOptString.empty()) {
CmdArgs.push_back(Args.MakeArgString(
"--device-compiler=" +
Action::GetOffloadKindName(Action::OFK_SYCL) + ":" +
TC->getTripleString() + "=" + BackendOptString));
for (SmallString<128> &BackendArgs : BackendOptVec) {
if (!BackendArgs.empty())
CmdArgs.push_back(Args.MakeArgString(
"--device-compiler=" +
Action::GetOffloadKindName(Action::OFK_SYCL) + ":" +
TC->getTripleString() + "=" + BackendArgs));
}
if (!LinkOptString.empty()) {
CmdArgs.push_back(Args.MakeArgString(
Expand Down
17 changes: 14 additions & 3 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1083,6 +1083,18 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
C.addCommand(std::move(Cmd));
}

// Extracts the device specified after "-device" from the backend
// argument string provided via -Xsycl-target-backend.
StringRef SYCL::gen::extractDeviceFromArg(llvm::StringRef Arg) {
llvm::SmallVector<StringRef, 8> Arglist;
Arg.split(Arglist, ' ');
for (size_t i = 0; i + 1 < Arglist.size(); ++i) {
if (Arglist[i] == "-device")
Copy link
Copy Markdown
Contributor

@YuriPlyakhin YuriPlyakhin Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I searched through SYCL.cpp and it seems we parse -device in several places. Could you please double check. Several things to consider:

  1. Can we avoid parsing -device at all? What are all use cases, where we have to parse -device?
  2. If we have to parse -device, can we limit parsing to simple cases (e.g. 1 device option with one value) and for the rest issue an error, that such case is not supported, and document clearly, which -device syntax is and is not supported by SYCL driver?
  3. If we have to parse multiple cases (or all cases), should we check, what ocloc accepts and parse this option the same way as ocloc?

For #2 and #3 above, should we make 1 single function that parses -device option and we use it everywhere, so that behavior is consistent and not different for different use cases?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank for pointing this out! Here are some of my finding for the three question, based on all the places that are parsing -device in SYCL.cpp.

Question 1: (Use case for parsing -device)
Case 1:
getDeviceArg() + hasPVCDevice() , used in AddImpliedTargetArgs. It is to decide whether to inject the default pvc:auto GRF register allocation mode based on where PVC device is specified

if (IsJIT || Device == "pvc" || hasPVCDevice(TargArgs, DevArg)) {

Case 2:
getDeviceArg() , used in addSYCLDeviceSanitizerLibs. It is used to pick the right sanitizer device library (JIT, CPU, DG2, or PVC variant) because on the device specified.

std::string DeviceArg = getDeviceArg(CmdArgs);

Case 3:
selectBfloatLibs parse -device for bfloat16 library selection

size_t DevicesPos = Params.find("-device ");
// "-device xxx" is used to specify AOT target device, so user must apply
// -Xs "-device xxx" or -Xsycl-target-backend=spir64_gen "-device xxx"
if (DevicesPos != std::string::npos) {
UseNative = true;
std::istringstream Devices(Params.substr(DevicesPos + 8));

I don't think we would be able to avoid parsing -device in these case, because these operation are dependent on the device. When the user pass in -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device arch" we have to parse the backend options to find the device name (this is the same case as why we are parsing -device in our PRs #21493 and #21495)

Question 2:
I am a little bit not sure about this question. Do you mean we should only allow for one device passing at a time, so only -device arch but not -device arch1, arch2? Or we should only allow one way to represent device, so only -device pvc, but not -device 12.60.7 nor -device 0x0BD0 ?

Question 3:
Yes, I have checked the bahavior of ocloc for device name being passed in through different form.

Case 1: -device pvc

clang++ -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc -options -extraopt_pvc"  ...

clang-linker-wrapper --device-compiler=sycl:spir64_gen-unknown-unknown=-device pvc -options -extraopt_pvc  ...

ocloc -device pvc  -options -extraopt_pvc  ...

Case 2: -device 12.60.7 where 12.60.7 is the version number

clang++ -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device 12.60.7 -options -extraopt_pvc" ...

clang-linker-wrapper "--device-compiler=sycl:spir64_gen-unknown-unknown=-device 12.60.7 -options -extraopt_pvc"  ...
 
ocloc -device 12.60.7  -options -extraopt_pvc ...

Case 3: -device 0x0BD0, 0x0BD0 is the hex for PVC

clang++ -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device 0x0BD0 -options -extraopt_pvc" ...

clang-linker-wrapper "--device-compiler=sycl:spir64_gen-unknown-unknown=-device 0x0BD0 -options -extraopt_pvc" ...

ocloc -device 0x0BD0 -options -extraopt_pvc ...

As we can see, the device name being passed into ocloc is the device being passed to -Xsycl-target-backend=spir64_gen "-device arch". If a invalid device name (either version number, hex or something else), error will be raise at ocloc.

For the really last question, yes, there is a function getDeviceArg can be used to parse and find the device name. We can refactor the code to use getDeviceArg().

I have updated the PRs for our new approaches (#21493 and #21495) to use getDeviceArg() to parse the - device, and I have checked they handle the device name passing to ocloc the same way as mentioned in Question 3.

return Arglist[i + 1];
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not a common occurance, but if a user says -device pvc -device bdw the ocloc parsing will actually take -device bdw. Grabbing the first instance will cause a mismatch of target expectations.

Copy link
Copy Markdown
Contributor Author

@YixingZhang007 YixingZhang007 Feb 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought if a user wants to specify more than one architecture for spir64_gen, they would need to use a format like -device arch1,arch2,arch3,... rather than repeating -device arch1 -device arch2 .... This is based on some tests I’ve seen; I haven’t come across any examples using the second approach. The code works for the first approach. Please let me know if you’re aware if the second one is being allowed. Thank you!

Copy link
Copy Markdown
Contributor

@YuriPlyakhin YuriPlyakhin Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we cannot rely on tests only to decide what is the correct behavior. We don't know, what our customers are using actually.
And if compiler doesn't issue an error, when -device pvc -device bdw is used, then it is allowed.
I agree with Mike that inconsistency in behavior between SYCL driver and ocloc driver may cause confusion.
I provided detailed comment above on how I think this problem should be handled.

}
return "";
}

StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
StringRef Device;
Device =
Expand Down Expand Up @@ -1556,6 +1568,8 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::Triple &Triple,
getDriver().getSYCLDeviceTriple(A->getValue(), A);
// Passing device args: -X<Opt>=<triple> -opt=val.
StringRef GenDevice = SYCL::gen::resolveGenDevice(A->getValue());
if (GenDevice.empty())
GenDevice = SYCL::gen::extractDeviceFromArg(A->getValue(1));
bool IsGenTriple = Triple.isSPIR() &&
Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen;
if (IsGenTriple) {
Expand All @@ -1565,9 +1579,6 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::Triple &Triple,
// Triples do not match, but only skip when we know we are not
// comparing against intel_gpu_*
continue;
if (OptTargetTriple == Triple && !Device.empty())
// Triples match, but we are expecting a specific device to be set.
continue;
} else if (OptTargetTriple != Triple)
continue;
} else if (!OptNoTriple)
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Driver/ToolChains/SYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool {
const char *LinkingOutput) const override;
};

StringRef extractDeviceFromArg(StringRef Arg);
StringRef resolveGenDevice(StringRef DeviceName);
SmallString<64> getGenDeviceMacro(StringRef DeviceName);
StringRef getGenGRFFlag(StringRef GRFMode);
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/clang-linker-wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@

// Check that when "--device-compiler=triple=-device pvc" is specified in clang-linker-wrapper
// (happen when AOT device is specified via -Xsycl-target-backend '-device pvc' in clang),
// the target is not passed to sycl-post-link for filtering.
// the target is passed to sycl-post-link for filtering.
// RUN: clang-linker-wrapper -sycl-embed-ir -sycl-device-libraries=%t1.devicelib.o -sycl-post-link-options="SYCL_POST_LINK_OPTIONS" -llvm-spirv-options="LLVM_SPIRV_OPTIONS" "--host-triple=x86_64-unknown-linux-gnu" "--device-compiler=spir64_gen-unknown-unknown=-device pvc" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" HOST_LIB_PATH HOST_STAT_LIB %t1.o --dry-run 2>&1 | FileCheck -check-prefix=CHK-NO-CMDS-AOT-GEN %s
// CHK-NO-CMDS-AOT-GEN: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o {{[^,]*}}.table {{.*}}.bc
// CHK-NO-CMDS-AOT-GEN: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o intel_gpu_pvc,{{.*}}.table {{.*}}.bc

/// Check for list of commands for standalone clang-linker-wrapper run for sycl (AOT for Intel CPU)
// -------
Expand Down
9 changes: 9 additions & 0 deletions clang/test/Driver/sycl-offload-new-driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,15 @@
// WRAPPER_OPTIONS_BACKEND_AOT-SAME: "--device-compiler=sycl:spir64_gen-unknown-unknown=-backend-gen-opt"
// WRAPPER_OPTIONS_BACKEND_AOT-SAME: "--device-compiler=sycl:spir64_x86_64-unknown-unknown=-backend-cpu-opt"

// Check that AOT backend compiler options passed via
// -Xsycl-target-backend=<target>,<option> are passed to clang-linker-wrapper in the form: "-device <arch> <backend_opt>"
// RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \
// RUN: -fsycl-targets=intel_gpu_dg1,spir64_gen \
// RUN: -Xsycl-target-backend=intel_gpu_dg1 "-options -extraopt_dg1" \
// RUN: -Xsycl-target-backend=spir64_gen "-device pvc -options -extraopt_pvc" %s -### 2>&1 \
// RUN: | FileCheck -check-prefixes=MULTI_TARGETS_OPTIONS_BACKEND_AOT %s
// MULTI_TARGETS_OPTIONS_BACKEND_AOT: clang-linker-wrapper{{.*}} "--device-compiler=sycl:spir64_gen-unknown-unknown=-device dg1 -options -extraopt_dg1" "--device-compiler=sycl:spir64_gen-unknown-unknown=-device pvc -options -extraopt_pvc"

/// Verify arch settings for nvptx and amdgcn targets
// RUN: %clangxx -fsycl -### -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv \
// RUN: -nocudalib --offload-new-driver \
Expand Down
43 changes: 26 additions & 17 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -720,12 +720,8 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args,
/// 'Args' encompasses all arguments required for linking and wrapping device
/// code and will be parsed to generate options required to be passed into the
/// sycl-post-link tool.
/// 'IsDevicePassedWithSyclTargetBackend' indicates whether the device
/// architecture is already specified through -Xsycl-target-backend=spir64_gen
/// "-device <arch>" format.
static Expected<std::vector<module_split::SplitModule>>
runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args,
bool IsDevicePassedWithSyclTargetBackend) {
runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
Expected<std::string> SYCLPostLinkPath =
findProgram("sycl-post-link", {getMainExecutable("sycl-post-link")});
if (!SYCLPostLinkPath)
Expand All @@ -743,10 +739,21 @@ runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args,
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);

// Prefix the output path with the target architectures,
// e.g. intel_gpu_dg2,intel_gpu_pvc for arch = "dg2,pvc".
if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && !Arch.empty() &&
!IsDevicePassedWithSyclTargetBackend && Arch != "*")
OutputPathWithArch = "intel_gpu_" + Arch.str() + "," + OutputPathWithArch;
else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)
Arch != "*") {
SmallVector<StringRef, 8> ArchList;
Arch.split(ArchList, ',');
std::string ArchString;
for (StringRef SingleArch : ArchList) {
// Handle cases where arch name contains a version, such as "bmg-g21"
std::string arch = SingleArch.str();
std::replace(arch.begin(), arch.end(), '-', '_');
ArchString += "intel_gpu_" + arch + ",";
}
OutputPathWithArch = ArchString + OutputPathWithArch;
} else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)
OutputPathWithArch = "spir64_x86_64," + OutputPathWithArch;

SmallVector<StringRef, 8> CmdArgs;
Expand Down Expand Up @@ -2169,16 +2176,22 @@ extractSYCLCompileLinkOptions(ArrayRef<OffloadFile> OffloadFiles) {
}

// Append SYCL device compiler and linker options specified at link time,
// filtering by target triple and offload kind.
// filtering by target triple, offload kind, and device architecture.
// TODO: Consider how to refactor this function to merge it with getLinkerArgs()
// and determine if it's possible to use OPT_compiler_arg_EQ and
// OPT_linker_arg_EQ to handle device compiler/linker options
static void appendSYCLDeviceOptionsAtLinkTime(const DerivedArgList &LinkerArgs,
std::string &CompileOptions,
std::string &LinkOptions) {
const StringRef TripleStr = LinkerArgs.getLastArgValue(OPT_triple_EQ);
auto processDeviceArgs = [&](unsigned OptID, std::string &Options) {
auto processDeviceArgs = [&](unsigned OptID, std::string &Options,
StringRef TargetArch = StringRef()) {
for (StringRef Arg : LinkerArgs.getAllArgValues(OptID)) {
if (!TargetArch.empty()) {
std::string DeviceArchPattern = "-device " + TargetArch.str();
if (Arg.find(DeviceArchPattern) == StringRef::npos)
continue;
}
size_t ColonPos = Arg.find(':');
if (ColonPos != StringRef::npos) {
StringRef Kind = Arg.substr(0, ColonPos);
Expand All @@ -2200,7 +2213,8 @@ static void appendSYCLDeviceOptionsAtLinkTime(const DerivedArgList &LinkerArgs,
}
};

processDeviceArgs(OPT_device_compiler_args_EQ, CompileOptions);
processDeviceArgs(OPT_device_compiler_args_EQ, CompileOptions,
LinkerArgs.getLastArgValue(OPT_arch_EQ));
processDeviceArgs(OPT_device_linker_args_EQ, LinkOptions);
}

Expand Down Expand Up @@ -2279,14 +2293,9 @@ linkAndWrapDeviceFiles(ArrayRef<SmallVector<OffloadFile>> LinkerInputFiles,
SmallVector<StringRef> InputFilesSYCL;
InputFilesSYCL.emplace_back(*TmpOutputOrErr);

SmallVector<StringRef, 16> Args;
StringRef(CompileLinkOptionsOrErr->first).split(Args, ' ');
bool IsDevicePassedWithSyclTargetBackend =
std::find(Args.begin(), Args.end(), "-device") != Args.end();
auto SplitModulesOrErr =
UseSYCLPostLinkTool
? sycl::runSYCLPostLinkTool(InputFilesSYCL, LinkerArgs,
IsDevicePassedWithSyclTargetBackend)
? sycl::runSYCLPostLinkTool(InputFilesSYCL, LinkerArgs)
: sycl::runSYCLSplitLibrary(InputFilesSYCL, LinkerArgs,
*SYCLModuleSplitMode);
if (!SplitModulesOrErr)
Expand Down
Loading
Loading