From c20ed2fb3ad41aaa56b8ef617aa9bf52d0a88149 Mon Sep 17 00:00:00 2001 From: Tiekai Bi Date: Mon, 26 Jan 2026 19:56:11 +0800 Subject: [PATCH] imageUtilKernels.cu: optimize initAttentionMaskKernel by simply changing loop index Signed-off-by: Tiekai Bi --- cpp/kernels/preprocessKernels/imageUtilKernels.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/kernels/preprocessKernels/imageUtilKernels.cu b/cpp/kernels/preprocessKernels/imageUtilKernels.cu index 0b87e3e..3379a1b 100644 --- a/cpp/kernels/preprocessKernels/imageUtilKernels.cu +++ b/cpp/kernels/preprocessKernels/imageUtilKernels.cu @@ -432,11 +432,11 @@ __global__ void initAttentionMaskKernel( auto const start = cuSeqlens[bIdx]; auto const end = cuSeqlens[bIdx + 1]; auto const tIdx = threadIdx.x; - auto const tidy = threadIdx.y; + auto const tIdy = threadIdx.y; - for (auto i = start + tIdx; i < end; i += 16) + for (auto i = start + tIdy; i < end; i += 16) { - for (auto j = start + tidy; j < end; j += 16) + for (auto j = start + tIdx; j < end; j += 16) { auto const posIdx = i * curHW + j; attentionMask[posIdx] = __float2half(0.0f);