Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3497,7 +3497,20 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Ty = AT->getElementType();
return Ty->isIntegerTy(8);
};
if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) {
// Skip canonicalization for JointMatrix type. DL.getTypeAllocSize() will not
// return the true allocation size, so the canonicalized [N x i8] stride would
// be incorrect.
auto IsMatrixType = [](Type *Ty) -> bool {
Copy link
Copy Markdown
Contributor

@YuriPlyakhin YuriPlyakhin Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we skip canonicalization for any TargetExtType? Would getTypeAllocSize return true allocation size for other target extension types?
I think we might need to submit this fix to upstream LLVM to avoid unnecessary customizations in intel/llvm.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's ok for me to upstream the fix to community, I open this PR mainly for testing purpose.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's ok for me to upstream the fix to community, I open this PR mainly for testing purpose.

In this case you should open draft PR.

if (auto *AT = dyn_cast<ArrayType>(Ty))
Ty = AT->getElementType();
if (auto *ST = dyn_cast<StructType>(Ty))
if (ST->getNumElements() == 1)
Ty = ST->getElementType(0);
return isa<TargetExtType>(Ty) &&
cast<TargetExtType>(Ty)->getName() == "spirv.CooperativeMatrixKHR";
};
if (Indices.size() == 1 && !IsCanonicalType(GEPEltType) &&
!IsMatrixType(GEPEltType)) {
TypeSize Scale = DL.getTypeAllocSize(GEPEltType);
assert(!Scale.isScalable() && "Should have been handled earlier");
Type *NewElemTy = Builder.getInt8Ty();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

; GEP canonicalization must skip element types that wrap spirv.CooperativeMatrixKHR
; because DL.getTypeAllocSize() is not meaningful for this target extension type.
; Verify that visiting chained GEPs on [N x [M x JointMatrix]] arrays does NOT
; rewrite the resulting single-index GEP to use an [sizeof x i8] stride.

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
target triple = "spirv64-unknown-unknown"

%matrix_acc_t = type { target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) }
%matrix_a_t = type { target("spirv.CooperativeMatrixKHR", i16, 3, 16, 32, 0) }
%matrix_b_t = type { target("spirv.CooperativeMatrixKHR", i16, 3, 32, 16, 1) }

; InstCombine folds [2 x [2 x %matrix_acc_t]], ptr, 0, %i -> [2 x %matrix_acc_t], ptr, %i
; then [2 x %matrix_acc_t], ptr, 0, %j -> %matrix_acc_t, ptr, %j.
; The final single-index GEP on %matrix_acc_t must NOT be canonicalized to [N x i8].
define ptr addrspace(4) @test_acc_matrix_2d_array(ptr addrspace(4) %p, i64 %i, i64 %j) {
; CHECK-LABEL: define ptr addrspace(4) @test_acc_matrix_2d_array(
; CHECK-SAME: ptr addrspace(4) [[P:%.*]], i64 [[I:%.*]], i64 [[J:%.*]]) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [2 x [[MATRIX_ACC_T:%.*]]], ptr addrspace(4) [[P]], i64 [[I]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [[MATRIX_ACC_T]], ptr addrspace(4) [[ARRAYIDX]], i64 [[J]]
; CHECK-NEXT: ret ptr addrspace(4) [[ARRAYIDX2]]
;
%arrayidx = getelementptr inbounds nuw [2 x [2 x %matrix_acc_t]], ptr addrspace(4) %p, i64 0, i64 %i
%arrayidx2 = getelementptr inbounds nuw [2 x %matrix_acc_t], ptr addrspace(4) %arrayidx, i64 0, i64 %j
ret ptr addrspace(4) %arrayidx2
}

; Same pattern for the A matrix: [2 x [1 x %matrix_a_t]]
define ptr addrspace(4) @test_a_matrix_2d_array(ptr addrspace(4) %p, i64 %i) {
; CHECK-LABEL: define ptr addrspace(4) @test_a_matrix_2d_array(
; CHECK-SAME: ptr addrspace(4) [[P:%.*]], i64 [[I:%.*]]) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1 x [[MATRIX_A_T:%.*]]], ptr addrspace(4) [[P]], i64 [[I]]
; CHECK-NEXT: ret ptr addrspace(4) [[ARRAYIDX]]
;
%arrayidx = getelementptr inbounds nuw [2 x [1 x %matrix_a_t]], ptr addrspace(4) %p, i64 0, i64 %i
%arrayidx2 = getelementptr inbounds nuw [1 x %matrix_a_t], ptr addrspace(4) %arrayidx, i64 0, i64 0
ret ptr addrspace(4) %arrayidx2
}

; Same pattern for the B matrix: [2 x [1 x %matrix_b_t]]
define ptr addrspace(4) @test_b_matrix_2d_array(ptr addrspace(4) %p, i64 %i) {
; CHECK-LABEL: define ptr addrspace(4) @test_b_matrix_2d_array(
; CHECK-SAME: ptr addrspace(4) [[P:%.*]], i64 [[I:%.*]]) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1 x [[MATRIX_B_T:%.*]]], ptr addrspace(4) [[P]], i64 [[I]]
; CHECK-NEXT: ret ptr addrspace(4) [[ARRAYIDX]]
;
%arrayidx = getelementptr inbounds nuw [2 x [1 x %matrix_b_t]], ptr addrspace(4) %p, i64 0, i64 %i
%arrayidx2 = getelementptr inbounds nuw [1 x %matrix_b_t], ptr addrspace(4) %arrayidx, i64 0, i64 0
ret ptr addrspace(4) %arrayidx2
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
//===----------------------------------------------------------------------===//
// SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2
// UNSUPPORTED: gpu-intel-dg2
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// REQUIRES: target-spir

// REQUIRES: aspect-ext_intel_matrix
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
//===----------------------------------------------------------------------===//
// SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2
// UNSUPPORTED: gpu-intel-dg2
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// REQUIRES: target-spir

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
//===----------------------------------------------------------------------===//
// SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2
// UNSUPPORTED: gpu-intel-dg2
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// REQUIRES: target-spir

Expand Down
2 changes: 0 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
//===----------------------------------------------------------------------===//
// SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2
// UNSUPPORTED: gpu-intel-dg2
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// REQUIRES: target-spir

Expand Down
2 changes: 0 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
// REQUIRES: target-spir
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
// REQUIRES: aspect-ext_intel_matrix
Expand Down
2 changes: 0 additions & 2 deletions sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
//===----------------------------------------------------------------------===//
// REQUIRES: target-spir
// REQUIRES: aspect-ext_intel_matrix
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// RUN: %{build} -o %t_vnni.out -DVNNI %fp-model-precise
// RUN: %{run} %t_vnni.out
Expand Down
2 changes: 0 additions & 2 deletions sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
// REQUIRES: target-spir
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// REQUIRES: aspect-ext_intel_matrix, gpu
// UNSUPPORTED: gpu-intel-dg2
Expand Down
3 changes: 0 additions & 3 deletions sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@

// REQUIRES: aspect-ext_intel_matrix, gpu

// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// XFAIL: arch-intel_gpu_pvc
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21094

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
//===-----------------------------------------------------------------===//
// REQUIRES: target-spir
// REQUIRES: aspect-ext_intel_matrix
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// RUN: %{build} -o %t_arg_dim_vnni.out %fp-model-precise -DARG_DIM -DVNNI
// RUN: %{run} %t_arg_dim_vnni.out
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
//===----------------------------------------------------------------------===//
// REQUIRES: target-spir
// REQUIRES: aspect-ext_intel_matrix, gpu
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612


// RUN: %{build} -o %t.out -DINIT_LIST -DVNNI %fp-model-precise
// RUN: %{run} %t.out
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
// REQUIRES: target-spir
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// REQUIRES: aspect-ext_intel_matrix
// UNSUPPORTED: gpu-intel-dg2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
// REQUIRES: target-spir

// REQUIRES: aspect-ext_intel_matrix
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612


// RUN: %{build} -o %t_runtime_dim_vnni.out %fp-model-precise -DRUNTIME_DIM -DVNNI
// RUN: %{run} %t_runtime_dim_vnni.out 256
Expand Down
2 changes: 0 additions & 2 deletions sycl/test-e2e/Matrix/joint_matrix_bfloat16_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
// REQUIRES: target-spir
// XFAIL: run-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

// REQUIRES: aspect-ext_intel_matrix

Expand Down
2 changes: 0 additions & 2 deletions sycl/test-e2e/Matrix/joint_matrix_prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
// REQUIRES: aspect-ext_intel_matrix
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// XFAIL: run-mode && (arch-intel_gpu_pvc || arch-intel_gpu_bmg_g21)
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21612

#include "common.hpp"

Expand Down
Loading