From cfdab13966502a89510731481d82489b77798599 Mon Sep 17 00:00:00 2001 From: "Zhao, Maosu" Date: Thu, 2 Apr 2026 05:31:54 +0200 Subject: [PATCH 1/2] [SYCL][InstCombine] Skip GEP canonicalization for JointMatrix types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GEP canonicalization in visitGetElementPtrInst rewrites single-index GEPs to use an [N x i8] stride based on DL.getTypeAllocSize(). For SPIR-V cooperative matrix types (spirv.CooperativeMatrixKHR), this allocation size is not meaningful — the type is opaque to the data layout — so the canonicalized stride is incorrect and produces invalid IR. Add an IsMatrixType predicate that recognizes GEP element types that are, and skip the i8-stride canonicalization when this predicate matches. --- .../InstCombine/InstructionCombining.cpp | 15 +++++- .../gep-spirv-cooperative-matrix-spir.ll | 52 +++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/InstCombine/gep-spirv-cooperative-matrix-spir.ll diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7c62050eef3e4..cd05ca83b5bdf 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3497,7 +3497,20 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Ty = AT->getElementType(); return Ty->isIntegerTy(8); }; - if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) { + // Skip canonicalization for JointMatrix type. DL.getTypeAllocSize() will not + // return the true allocation size, so the canonicalized [N x i8] stride would + // be incorrect. + auto IsMatrixType = [](Type *Ty) -> bool { + if (auto *AT = dyn_cast(Ty)) + Ty = AT->getElementType(); + if (auto *ST = dyn_cast(Ty)) + if (ST->getNumElements() == 1) + Ty = ST->getElementType(0); + return isa(Ty) && + cast(Ty)->getName() == "spirv.CooperativeMatrixKHR"; + }; + if (Indices.size() == 1 && !IsCanonicalType(GEPEltType) && + !IsMatrixType(GEPEltType)) { TypeSize Scale = DL.getTypeAllocSize(GEPEltType); assert(!Scale.isScalable() && "Should have been handled earlier"); Type *NewElemTy = Builder.getInt8Ty(); diff --git a/llvm/test/Transforms/InstCombine/gep-spirv-cooperative-matrix-spir.ll b/llvm/test/Transforms/InstCombine/gep-spirv-cooperative-matrix-spir.ll new file mode 100644 index 0000000000000..88bef6cc84cb6 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/gep-spirv-cooperative-matrix-spir.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +; GEP canonicalization must skip element types that wrap spirv.CooperativeMatrixKHR +; because DL.getTypeAllocSize() is not meaningful for this target extension type. +; Verify that visiting chained GEPs on [N x [M x JointMatrix]] arrays does NOT +; rewrite the resulting single-index GEP to use an [sizeof x i8] stride. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spirv64-unknown-unknown" + +%matrix_acc_t = type { target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) } +%matrix_a_t = type { target("spirv.CooperativeMatrixKHR", i16, 3, 16, 32, 0) } +%matrix_b_t = type { target("spirv.CooperativeMatrixKHR", i16, 3, 32, 16, 1) } + +; InstCombine folds [2 x [2 x %matrix_acc_t]], ptr, 0, %i -> [2 x %matrix_acc_t], ptr, %i +; then [2 x %matrix_acc_t], ptr, 0, %j -> %matrix_acc_t, ptr, %j. +; The final single-index GEP on %matrix_acc_t must NOT be canonicalized to [N x i8]. +define ptr addrspace(4) @test_acc_matrix_2d_array(ptr addrspace(4) %p, i64 %i, i64 %j) { +; CHECK-LABEL: define ptr addrspace(4) @test_acc_matrix_2d_array( +; CHECK-SAME: ptr addrspace(4) [[P:%.*]], i64 [[I:%.*]], i64 [[J:%.*]]) { +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [2 x [[MATRIX_ACC_T:%.*]]], ptr addrspace(4) [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [[MATRIX_ACC_T]], ptr addrspace(4) [[ARRAYIDX]], i64 [[J]] +; CHECK-NEXT: ret ptr addrspace(4) [[ARRAYIDX2]] +; + %arrayidx = getelementptr inbounds nuw [2 x [2 x %matrix_acc_t]], ptr addrspace(4) %p, i64 0, i64 %i + %arrayidx2 = getelementptr inbounds nuw [2 x %matrix_acc_t], ptr addrspace(4) %arrayidx, i64 0, i64 %j + ret ptr addrspace(4) %arrayidx2 +} + +; Same pattern for the A matrix: [2 x [1 x %matrix_a_t]] +define ptr addrspace(4) @test_a_matrix_2d_array(ptr addrspace(4) %p, i64 %i) { +; CHECK-LABEL: define ptr addrspace(4) @test_a_matrix_2d_array( +; CHECK-SAME: ptr addrspace(4) [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1 x [[MATRIX_A_T:%.*]]], ptr addrspace(4) [[P]], i64 [[I]] +; CHECK-NEXT: ret ptr addrspace(4) [[ARRAYIDX]] +; + %arrayidx = getelementptr inbounds nuw [2 x [1 x %matrix_a_t]], ptr addrspace(4) %p, i64 0, i64 %i + %arrayidx2 = getelementptr inbounds nuw [1 x %matrix_a_t], ptr addrspace(4) %arrayidx, i64 0, i64 0 + ret ptr addrspace(4) %arrayidx2 +} + +; Same pattern for the B matrix: [2 x [1 x %matrix_b_t]] +define ptr addrspace(4) @test_b_matrix_2d_array(ptr addrspace(4) %p, i64 %i) { +; CHECK-LABEL: define ptr addrspace(4) @test_b_matrix_2d_array( +; CHECK-SAME: ptr addrspace(4) [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1 x [[MATRIX_B_T:%.*]]], ptr addrspace(4) [[P]], i64 [[I]] +; CHECK-NEXT: ret ptr addrspace(4) [[ARRAYIDX]] +; + %arrayidx = getelementptr inbounds nuw [2 x [1 x %matrix_b_t]], ptr addrspace(4) %p, i64 0, i64 %i + %arrayidx2 = getelementptr inbounds nuw [1 x %matrix_b_t], ptr addrspace(4) %arrayidx, i64 0, i64 0 + ret ptr addrspace(4) %arrayidx2 +} From 6fb20d2f5bcc1610959b4747a2ecd1ef58ef7293 Mon Sep 17 00:00:00 2001 From: "Zhao, Maosu" Date: Thu, 2 Apr 2026 09:57:42 +0200 Subject: [PATCH 2/2] Revert "[JointMatrix] XFAIL tests after InstCombine GEP changes 6ecbc0c96e566" This reverts commit 293c99f898533e4977bbbf889f3004f865ef832d. --- sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp | 3 +-- .../Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp | 2 -- .../Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp | 2 -- sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_array.cpp | 2 -- sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp | 2 -- sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp | 2 -- sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp | 2 -- sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp | 3 --- .../test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp | 2 -- sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp | 3 --- .../Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp | 2 -- .../Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp | 3 --- sycl/test-e2e/Matrix/joint_matrix_bfloat16_array.cpp | 2 -- sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp | 2 -- 14 files changed, 1 insertion(+), 31 deletions(-) diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp index 8ba1412088b65..47a3c955cb17b 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp @@ -7,8 +7,7 @@ //===----------------------------------------------------------------------===// // SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2 // UNSUPPORTED: gpu-intel-dg2 -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 + // REQUIRES: target-spir // REQUIRES: aspect-ext_intel_matrix diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp index 35a5d77951318..3f717a5203902 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2 // UNSUPPORTED: gpu-intel-dg2 -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // REQUIRES: target-spir diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp index b8a6df7e1ba62..8116dd4a23a80 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2 // UNSUPPORTED: gpu-intel-dg2 -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // REQUIRES: target-spir diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_array.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_array.cpp index 5374c45605df0..e652c9ad91f6c 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_array.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_array.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2 // UNSUPPORTED: gpu-intel-dg2 -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // REQUIRES: target-spir diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp index 8fd06f255d967..b13b8966db1bb 100644 --- a/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp +++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// // REQUIRES: target-spir -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943 // REQUIRES: aspect-ext_intel_matrix diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp index 40c512e5278ac..99b53ead3d7c7 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // REQUIRES: target-spir // REQUIRES: aspect-ext_intel_matrix -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // RUN: %{build} -o %t_vnni.out -DVNNI %fp-model-precise // RUN: %{run} %t_vnni.out diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp index f05a8da1d61ca..e7871a8fa86b5 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// // REQUIRES: target-spir -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // REQUIRES: aspect-ext_intel_matrix, gpu // UNSUPPORTED: gpu-intel-dg2 diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp index 325b8c26f57b9..ef7a52fe1791e 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp @@ -9,9 +9,6 @@ // REQUIRES: aspect-ext_intel_matrix, gpu -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 - // XFAIL: arch-intel_gpu_pvc // XFAIL-TRACKER: https://github.com/intel/llvm/issues/21094 diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp index e479e42840bbf..5f01d834ae582 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp @@ -7,8 +7,6 @@ //===-----------------------------------------------------------------===// // REQUIRES: target-spir // REQUIRES: aspect-ext_intel_matrix -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // RUN: %{build} -o %t_arg_dim_vnni.out %fp-model-precise -DARG_DIM -DVNNI // RUN: %{run} %t_arg_dim_vnni.out diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp index d3e9064288aef..0b368ffb3f159 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp @@ -7,9 +7,6 @@ //===----------------------------------------------------------------------===// // REQUIRES: target-spir // REQUIRES: aspect-ext_intel_matrix, gpu -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 - // RUN: %{build} -o %t.out -DINIT_LIST -DVNNI %fp-model-precise // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp index 80d739388cf7b..b28e94a713a35 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// // REQUIRES: target-spir -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // REQUIRES: aspect-ext_intel_matrix // UNSUPPORTED: gpu-intel-dg2 diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp index d46b8c28192a5..16407d1c83002 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp @@ -8,9 +8,6 @@ // REQUIRES: target-spir // REQUIRES: aspect-ext_intel_matrix -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 - // RUN: %{build} -o %t_runtime_dim_vnni.out %fp-model-precise -DRUNTIME_DIM -DVNNI // RUN: %{run} %t_runtime_dim_vnni.out 256 diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array.cpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array.cpp index 635f8723b81f7..c667ce2c74b28 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// // REQUIRES: target-spir -// XFAIL: run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 // REQUIRES: aspect-ext_intel_matrix diff --git a/sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp b/sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp index 1fdf14daeb490..f1fd6cd8894c4 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp @@ -10,8 +10,6 @@ // REQUIRES: aspect-ext_intel_matrix // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// XFAIL: run-mode && (arch-intel_gpu_pvc || arch-intel_gpu_bmg_g21) -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612 #include "common.hpp"