Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions src/hwlm/noodle_engine_sve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ hwlm_error_t singleCheckMatched(const struct noodTable *n, const u8 *buf,
size_t len, const struct cb_info *cbi,
const u8 *d, svbool_t matched) {
if (unlikely(svptest_any(svptrue_b8(), matched))) {
hwlmcb_rv_t rv = checkMatched(n, buf, len, cbi, d, matched,
n->msk_len != 1);
hwlm_error_t rv = checkMatched(n, buf, len, cbi, d, matched,
n->msk_len != 1);
RETURN_IF_TERMINATED(rv);
}
return HWLM_SUCCESS;
Expand Down Expand Up @@ -88,7 +88,7 @@ hwlm_error_t scanSingleLoop(const struct noodTable *n, const u8 *buf,
for (size_t i = 0; i < loops; i++, d += svcntb()) {
DEBUG_PRINTF("d %p \n", d);
svbool_t matched = singleMatched(chars, d, svptrue_b8());
hwlmcb_rv_t rv = singleCheckMatched(n, buf, len, cbi, d, matched);
hwlm_error_t rv = singleCheckMatched(n, buf, len, cbi, d, matched);
RETURN_IF_TERMINATED(rv);
}
DEBUG_PRINTF("d %p e %p \n", d, e);
Expand Down Expand Up @@ -120,7 +120,7 @@ hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len,
const u8 *d1 = ROUNDUP_PTR(d, svcntb_pat(SV_POW2));
if (d != d1) {
DEBUG_PRINTF("until aligned %p \n", d1);
hwlmcb_rv_t rv = scanSingleOnce(n, buf, len, cbi, chars, d, d1);
hwlm_error_t rv = scanSingleOnce(n, buf, len, cbi, chars, d, d1);
RETURN_IF_TERMINATED(rv);
}
return scanSingleLoop(n, buf, len, cbi, chars, d1, e);
Expand All @@ -140,8 +140,8 @@ hwlm_error_t doubleCheckMatched(const struct noodTable *n, const u8 *buf,
// d - 1 won't underflow as the first position in buf has been dealt
// with meaning that d > buf
assert(d > buf);
hwlmcb_rv_t rv = checkMatched(n, buf, len, cbi, d - 1, matched,
n->msk_len != 2);
hwlm_error_t rv = checkMatched(n, buf, len, cbi, d - 1, matched,
n->msk_len != 2);
RETURN_IF_TERMINATED(rv);
}
return HWLM_SUCCESS;
Expand Down Expand Up @@ -177,7 +177,7 @@ hwlm_error_t scanDoubleOnce(const struct noodTable *n, const u8 *buf,

// we reuse u8 predicates for u16 lanes. This means that we will check against one
// extra \0 character at the end of the vector.
if(unlikely(n->key1 == '\0')) {
if (unlikely(n->key1 == '\0')) {
if (size % 2) {
// if odd, vec has an odd number of lanes and has the spurious \0
svbool_t lane_to_disable = svrev_b8(svpfirst(svrev_b8(pg), svpfalse()));
Expand Down Expand Up @@ -244,15 +244,15 @@ hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len,

svuint8_t chars = svreinterpret_u8(getCharMaskDouble(n->key0, n->key1, noCase));

if (scan_len <= svcntb()) {
if ((size_t)(e - d) <= svcntb()) {
return scanDoubleOnce(n, buf, len, cbi, chars, d, e);
}
// peel off first part to align to the vector size
const u8 *d1 = ROUNDUP_PTR(d, svcntb_pat(SV_POW2));
if (d != d1) {
DEBUG_PRINTF("until aligned %p \n", d1);
hwlmcb_rv_t rv = scanDoubleOnce(n, buf, len, cbi, chars,
d, d1);
hwlm_error_t rv = scanDoubleOnce(n, buf, len, cbi, chars,
d, d1);
RETURN_IF_TERMINATED(rv);
}
return scanDoubleLoop(n, buf, len, cbi, chars, d1, e);
Expand Down
2 changes: 1 addition & 1 deletion src/nfa/shufti_sve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ svbool_t doubleMatched(svuint8_t mask1_lo, svuint8_t mask1_hi,
svuint8_t t = svorr_x(svptrue_b8(), merged_t1, t2);
*inout_t1 = new_t1;

return svnot_z(svptrue_b8(), svcmpeq(svptrue_b8(), t, (uint8_t)0xff));
return svnot_z(pg, svcmpeq(svptrue_b8(), t, (uint8_t)0xff));
}

static really_inline
Expand Down
2 changes: 1 addition & 1 deletion src/nfa/truffle_simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ const u8 *rtruffleExecSVE(m256 shuf_mask_32, const u8 *buf, const u8 *buf_end){
if (work_buffer != buf) {
svuint8_t chars;
if (buf_end - buf < vect_size_int8) {
const svbool_t remaining_lanes = svwhilele_b8(0ll, buf_end - buf);
const svbool_t remaining_lanes = svwhilelt_b8(0ll, buf_end - buf);
chars = svld1(remaining_lanes, buf);
} else {
chars = svld1(svptrue_b8(), buf);
Expand Down
186 changes: 186 additions & 0 deletions unit/internal/shufti.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1147,3 +1147,189 @@ TEST(ReverseShufti, ExecMatch6) {
ASSERT_EQ(reinterpret_cast<const u8 *>(t1) + i, rv);
}
}

// Test that having the first char of a two-byte pair at the last position of a
// short buffer (shorter than a SIMD/SVE vector) does not produce a false match.
// This is a regression test for an SVE bug where inactive vector lanes in
// doubleMatched() were not properly masked by the predicate, causing false
// positives when the last byte matched a first-char pattern and the inactive
// lane (zero-filled) satisfied a null-byte second-char pattern.
TEST(DoubleShufti, ExecNoMatchLastByteShortBufNullPair) {
m128 lo1, hi1, lo2, hi2;

flat_set<pair<u8, u8>> lits;

// Use a pattern where the second char is the null byte. This ensures
// mask2_lo[0] and mask2_hi[0] both have the bucket bit cleared, which
// causes inactive SVE lanes (loaded as 0) to produce t != 0xff.
lits.insert(make_pair('a', '\0'));

bool ret = shuftiBuildDoubleMasks(CharReach(), lits,
reinterpret_cast<u8 *>(&lo1),
reinterpret_cast<u8 *>(&hi1),
reinterpret_cast<u8 *>(&lo2),
reinterpret_cast<u8 *>(&hi2));
ASSERT_TRUE(ret);

// Use a large backing buffer filled with 'b' to ensure safe memory reads
// past buf_end on platforms that use unaligned vector loads (SIMD).
const int maxlen = 128;
char t1[maxlen];
memset(t1, 'b', maxlen);

// For short buffers (length 2 to 15), place 'a' at the last position.
// Since there is no '\0' following it within the buffer, no match should
// be reported.
for (int len = 2; len <= 15; len++) {
memset(t1, 'b', maxlen);
t1[len - 1] = 'a';

const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2,
reinterpret_cast<u8 *>(t1),
reinterpret_cast<u8 *>(t1) + len);

ASSERT_EQ(reinterpret_cast<const u8 *>(t1 + len), rv)
<< "False match for len=" << len;
}
}

// Same as above, but also test medium-length buffers (16-80) where the tail
// portion processed with a partial predicate may expose the same issue.
TEST(DoubleShufti, ExecNoMatchLastByteNullPairVaryLen) {
m128 lo1, hi1, lo2, hi2;

flat_set<pair<u8, u8>> lits;
lits.insert(make_pair('a', '\0'));

bool ret = shuftiBuildDoubleMasks(CharReach(), lits,
reinterpret_cast<u8 *>(&lo1),
reinterpret_cast<u8 *>(&hi1),
reinterpret_cast<u8 *>(&lo2),
reinterpret_cast<u8 *>(&hi2));
ASSERT_TRUE(ret);

const int maxlen = 256;
char t1[maxlen];

for (int len = 2; len < maxlen; len++) {
memset(t1, 'b', maxlen);
t1[len - 1] = 'a';

const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2,
reinterpret_cast<u8 *>(t1),
reinterpret_cast<u8 *>(t1) + len);

ASSERT_EQ(reinterpret_cast<const u8 *>(t1 + len), rv)
<< "False match for len=" << len;
}
}

// Verify that a real match of ('a', '\0') within a short buffer IS found.
TEST(DoubleShufti, ExecMatchNullPairShortBuf) {
m128 lo1, hi1, lo2, hi2;

flat_set<pair<u8, u8>> lits;
lits.insert(make_pair('a', '\0'));

bool ret = shuftiBuildDoubleMasks(CharReach(), lits,
reinterpret_cast<u8 *>(&lo1),
reinterpret_cast<u8 *>(&hi1),
reinterpret_cast<u8 *>(&lo2),
reinterpret_cast<u8 *>(&hi2));
ASSERT_TRUE(ret);

const int maxlen = 128;
char t1[maxlen];

for (int len = 3; len <= 15; len++) {
memset(t1, 'b', maxlen);
// Place the pair ('a', '\0') inside the buffer
t1[len - 2] = 'a';
t1[len - 1] = '\0';

const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2,
reinterpret_cast<u8 *>(t1),
reinterpret_cast<u8 *>(t1) + len);

ASSERT_EQ(reinterpret_cast<const u8 *>(t1 + len - 2), rv)
<< "Match not found for len=" << len;
}
}

// Test short buffers with a normal two-byte pattern where first char is at the
// last position. This should not match on any platform.
TEST(DoubleShufti, ExecNoMatchLastByteShortBuf) {
m128 lo1, hi1, lo2, hi2;

flat_set<pair<u8, u8>> lits;
lits.insert(make_pair('x', 'y'));

bool ret = shuftiBuildDoubleMasks(CharReach(), lits,
reinterpret_cast<u8 *>(&lo1),
reinterpret_cast<u8 *>(&hi1),
reinterpret_cast<u8 *>(&lo2),
reinterpret_cast<u8 *>(&hi2));
ASSERT_TRUE(ret);

const int maxlen = 128;
char t1[maxlen];

for (int len = 2; len <= 15; len++) {
memset(t1, 'b', maxlen);
t1[len - 1] = 'x'; // first char at last position, no 'y' follows

const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2,
reinterpret_cast<u8 *>(t1),
reinterpret_cast<u8 *>(t1) + len);

ASSERT_EQ(reinterpret_cast<const u8 *>(t1 + len), rv)
<< "False match for len=" << len;
}
}

// Test short buffers with mixed one-byte and two-byte patterns. A one-byte
// match at the last position should be correctly reported.
TEST(DoubleShufti, ExecMatchMixedShortBuf) {
m128 lo1, hi1, lo2, hi2;

CharReach onebyte;
flat_set<pair<u8, u8>> twobyte;

onebyte.set('a');
twobyte.insert(make_pair('x', 'y'));

bool ret = shuftiBuildDoubleMasks(onebyte, twobyte,
reinterpret_cast<u8 *>(&lo1),
reinterpret_cast<u8 *>(&hi1),
reinterpret_cast<u8 *>(&lo2),
reinterpret_cast<u8 *>(&hi2));
ASSERT_TRUE(ret);

const int maxlen = 128;
char t1[maxlen];

// One-byte 'a' at the last position should be reported
for (int len = 2; len <= 15; len++) {
memset(t1, 'b', maxlen);
t1[len - 1] = 'a';

const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2,
reinterpret_cast<u8 *>(t1),
reinterpret_cast<u8 *>(t1) + len);

ASSERT_EQ(reinterpret_cast<const u8 *>(t1 + len - 1), rv)
<< "One-byte match not found for len=" << len;
}

// No match when target chars are absent
for (int len = 2; len <= 15; len++) {
memset(t1, 'b', maxlen);

const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2,
reinterpret_cast<u8 *>(t1),
reinterpret_cast<u8 *>(t1) + len);

ASSERT_EQ(reinterpret_cast<const u8 *>(t1 + len), rv)
<< "False match for len=" << len;
}
}
Loading