From 1cbcec35e9f74911c321972f3f699559cb8c4183 Mon Sep 17 00:00:00 2001 From: Byeonguk Jeong Date: Fri, 6 Mar 2026 10:58:41 +0900 Subject: [PATCH 1/3] shufti-double: use predicate mask to prevent false positives doubleMatched() in shufti_sve.hpp used svptrue_b8() for the final comparison instead of the caller-provided predicate pg. When called from dshuftiOnce() with a partial predicate (buffer shorter than SVE vector length), inactive lanes loaded as zero could satisfy the match condition, producing false positive matches. Changed the return statement to use pg for svnot_z, ensuring inactive lanes are excluded from match results. Added 5 unit tests covering short/variable-length buffers with null-byte pair patterns and mixed single/double-byte patterns to catch regressions. Fixes: 60b211250562626d6536e992cc1d0d52cd128f44 ("Use SVE for double shufti") Signed-off-by: Byeonguk Jeong --- src/nfa/shufti_sve.hpp | 2 +- unit/internal/shufti.cpp | 186 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+), 1 deletion(-) diff --git a/src/nfa/shufti_sve.hpp b/src/nfa/shufti_sve.hpp index 3e1bc86c9..2348c0913 100644 --- a/src/nfa/shufti_sve.hpp +++ b/src/nfa/shufti_sve.hpp @@ -173,7 +173,7 @@ svbool_t doubleMatched(svuint8_t mask1_lo, svuint8_t mask1_hi, svuint8_t t = svorr_x(svptrue_b8(), merged_t1, t2); *inout_t1 = new_t1; - return svnot_z(svptrue_b8(), svcmpeq(svptrue_b8(), t, (uint8_t)0xff)); + return svnot_z(pg, svcmpeq(svptrue_b8(), t, (uint8_t)0xff)); } static really_inline diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index 0b6f32197..c28c810ad 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -1147,3 +1147,189 @@ TEST(ReverseShufti, ExecMatch6) { ASSERT_EQ(reinterpret_cast(t1) + i, rv); } } + +// Test that having the first char of a two-byte pair at the last position of a +// short buffer (shorter than a SIMD/SVE vector) does not produce a false match. +// This is a regression test for an SVE bug where inactive vector lanes in +// doubleMatched() were not properly masked by the predicate, causing false +// positives when the last byte matched a first-char pattern and the inactive +// lane (zero-filled) satisfied a null-byte second-char pattern. +TEST(DoubleShufti, ExecNoMatchLastByteShortBufNullPair) { + m128 lo1, hi1, lo2, hi2; + + flat_set> lits; + + // Use a pattern where the second char is the null byte. This ensures + // mask2_lo[0] and mask2_hi[0] both have the bucket bit cleared, which + // causes inactive SVE lanes (loaded as 0) to produce t != 0xff. + lits.insert(make_pair('a', '\0')); + + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, + reinterpret_cast(&lo1), + reinterpret_cast(&hi1), + reinterpret_cast(&lo2), + reinterpret_cast(&hi2)); + ASSERT_TRUE(ret); + + // Use a large backing buffer filled with 'b' to ensure safe memory reads + // past buf_end on platforms that use unaligned vector loads (SIMD). + const int maxlen = 128; + char t1[maxlen]; + memset(t1, 'b', maxlen); + + // For short buffers (length 2 to 15), place 'a' at the last position. + // Since there is no '\0' following it within the buffer, no match should + // be reported. + for (int len = 2; len <= 15; len++) { + memset(t1, 'b', maxlen); + t1[len - 1] = 'a'; + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(t1), + reinterpret_cast(t1) + len); + + ASSERT_EQ(reinterpret_cast(t1 + len), rv) + << "False match for len=" << len; + } +} + +// Same as above, but also test medium-length buffers (16-80) where the tail +// portion processed with a partial predicate may expose the same issue. +TEST(DoubleShufti, ExecNoMatchLastByteNullPairVaryLen) { + m128 lo1, hi1, lo2, hi2; + + flat_set> lits; + lits.insert(make_pair('a', '\0')); + + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, + reinterpret_cast(&lo1), + reinterpret_cast(&hi1), + reinterpret_cast(&lo2), + reinterpret_cast(&hi2)); + ASSERT_TRUE(ret); + + const int maxlen = 256; + char t1[maxlen]; + + for (int len = 2; len < maxlen; len++) { + memset(t1, 'b', maxlen); + t1[len - 1] = 'a'; + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(t1), + reinterpret_cast(t1) + len); + + ASSERT_EQ(reinterpret_cast(t1 + len), rv) + << "False match for len=" << len; + } +} + +// Verify that a real match of ('a', '\0') within a short buffer IS found. +TEST(DoubleShufti, ExecMatchNullPairShortBuf) { + m128 lo1, hi1, lo2, hi2; + + flat_set> lits; + lits.insert(make_pair('a', '\0')); + + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, + reinterpret_cast(&lo1), + reinterpret_cast(&hi1), + reinterpret_cast(&lo2), + reinterpret_cast(&hi2)); + ASSERT_TRUE(ret); + + const int maxlen = 128; + char t1[maxlen]; + + for (int len = 3; len <= 15; len++) { + memset(t1, 'b', maxlen); + // Place the pair ('a', '\0') inside the buffer + t1[len - 2] = 'a'; + t1[len - 1] = '\0'; + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(t1), + reinterpret_cast(t1) + len); + + ASSERT_EQ(reinterpret_cast(t1 + len - 2), rv) + << "Match not found for len=" << len; + } +} + +// Test short buffers with a normal two-byte pattern where first char is at the +// last position. This should not match on any platform. +TEST(DoubleShufti, ExecNoMatchLastByteShortBuf) { + m128 lo1, hi1, lo2, hi2; + + flat_set> lits; + lits.insert(make_pair('x', 'y')); + + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, + reinterpret_cast(&lo1), + reinterpret_cast(&hi1), + reinterpret_cast(&lo2), + reinterpret_cast(&hi2)); + ASSERT_TRUE(ret); + + const int maxlen = 128; + char t1[maxlen]; + + for (int len = 2; len <= 15; len++) { + memset(t1, 'b', maxlen); + t1[len - 1] = 'x'; // first char at last position, no 'y' follows + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(t1), + reinterpret_cast(t1) + len); + + ASSERT_EQ(reinterpret_cast(t1 + len), rv) + << "False match for len=" << len; + } +} + +// Test short buffers with mixed one-byte and two-byte patterns. A one-byte +// match at the last position should be correctly reported. +TEST(DoubleShufti, ExecMatchMixedShortBuf) { + m128 lo1, hi1, lo2, hi2; + + CharReach onebyte; + flat_set> twobyte; + + onebyte.set('a'); + twobyte.insert(make_pair('x', 'y')); + + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, + reinterpret_cast(&lo1), + reinterpret_cast(&hi1), + reinterpret_cast(&lo2), + reinterpret_cast(&hi2)); + ASSERT_TRUE(ret); + + const int maxlen = 128; + char t1[maxlen]; + + // One-byte 'a' at the last position should be reported + for (int len = 2; len <= 15; len++) { + memset(t1, 'b', maxlen); + t1[len - 1] = 'a'; + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(t1), + reinterpret_cast(t1) + len); + + ASSERT_EQ(reinterpret_cast(t1 + len - 1), rv) + << "One-byte match not found for len=" << len; + } + + // No match when target chars are absent + for (int len = 2; len <= 15; len++) { + memset(t1, 'b', maxlen); + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(t1), + reinterpret_cast(t1) + len); + + ASSERT_EQ(reinterpret_cast(t1 + len), rv) + << "False match for len=" << len; + } +} From 98c9660842f84f75b5dd98c01b75f1e87c30c550 Mon Sep 17 00:00:00 2001 From: Byeonguk Jeong Date: Fri, 6 Mar 2026 11:34:32 +0900 Subject: [PATCH 2/3] truffle: fix off-by-one in rtruffleExecSVE tail and add unit tests Fix a bug in rtruffleExecSVE where the tail processing for short buffers used svwhilele_b8 instead of svwhilelt_b8. svwhilele_b8(0, N) activates lanes 0..N (N+1 lanes), reading one byte past the buffer end. The forward path (truffleExecSVE) already correctly uses svwhilelt_b8, which activates lanes 0..N-1 (N lanes). Add 26 new unit tests for the truffle accelerator covering: - Compile roundtrip: character ranges, empty class, same-nibble chars - Forward exec: single byte buffers, high byte (>=0x80) matching, same-nibble non-match, NUL char, dot (all chars), buffer-end match, varying lengths (1-130), alignment sweep, multi-char classes, all 256 single-char classes, 0x7F/0x80 boundary - Reverse exec: single byte, high byte, NUL, buffer-start match, varying lengths, large buffer (4K), alignment sweep, all 256 single-char classes, multiple matches, boundary chars Fixes: c67076ce22452bdfe423063b273ded8bd7444aae ("Add truffle SVE implementation") Signed-off-by: Byeonguk Jeong --- src/nfa/truffle_simd.hpp | 2 +- unit/internal/truffle.cpp | 475 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 476 insertions(+), 1 deletion(-) diff --git a/src/nfa/truffle_simd.hpp b/src/nfa/truffle_simd.hpp index 2e8ecb9ab..8a2a06280 100644 --- a/src/nfa/truffle_simd.hpp +++ b/src/nfa/truffle_simd.hpp @@ -247,7 +247,7 @@ const u8 *rtruffleExecSVE(m256 shuf_mask_32, const u8 *buf, const u8 *buf_end){ if (work_buffer != buf) { svuint8_t chars; if (buf_end - buf < vect_size_int8) { - const svbool_t remaining_lanes = svwhilele_b8(0ll, buf_end - buf); + const svbool_t remaining_lanes = svwhilelt_b8(0ll, buf_end - buf); chars = svld1(remaining_lanes, buf); } else { chars = svld1(svptrue_b8(), buf); diff --git a/unit/internal/truffle.cpp b/unit/internal/truffle.cpp index 3a2d67d69..527e4128d 100644 --- a/unit/internal/truffle.cpp +++ b/unit/internal/truffle.cpp @@ -34,6 +34,9 @@ #include "util/charreach.h" #include "util/simd_utils.h" +#include +#include + using namespace ue2; TEST(Truffle, CompileDot) { @@ -617,3 +620,475 @@ TEST(ReverseTruffle, ExecMatch5) { ASSERT_EQ(reinterpret_cast(t1) + i, rv); } } + +/* + * Additional unit tests for truffle accelerator. + * These cover edge cases and areas not handled by the original test suite. + */ + +// --- Compile/Roundtrip Tests --- + +TEST(Truffle, CompileRanges) { + // Test building masks for various character ranges and verify roundtrip + m128 mask1, mask2; + CharReach chars; + + // Printable ASCII range + chars.setRange(0x20, 0x7e); + truffleBuildMasks(chars, reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + CharReach out = truffle2cr(reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + ASSERT_EQ(out, chars); + + // High byte range + chars.clear(); + chars.setRange(0x80, 0xff); + truffleBuildMasks(chars, reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + out = truffle2cr(reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + ASSERT_EQ(out, chars); + + // Mixed low and high range + chars.clear(); + chars.setRange(0x30, 0x39); // digits + chars.setRange(0xC0, 0xDF); // some high bytes + truffleBuildMasks(chars, reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + out = truffle2cr(reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + ASSERT_EQ(out, chars); +} + +TEST(Truffle, CompileEmpty) { + // Empty character class - no characters set + m128 mask1, mask2; + CharReach chars; // empty + truffleBuildMasks(chars, reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + CharReach out = truffle2cr(reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + ASSERT_EQ(out, chars); + ASSERT_TRUE(out.none()); +} + +TEST(Truffle, CompileSameNibbleDiffHigh) { + // Characters with the same low nibble but different high nibble + // e.g., 'V' = 0x56 and 'e' = 0x65 have different nibbles, but + // 0x13 and 0x23 share low nibble 0x3 but differ in bits 4-6 + m128 mask1, mask2; + CharReach chars; + chars.set(0x13); + chars.set(0x23); + chars.set(0x43); + chars.set(0x93); + chars.set(0xA3); + truffleBuildMasks(chars, reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + CharReach out = truffle2cr(reinterpret_cast(&mask1), reinterpret_cast(&mask2)); + ASSERT_EQ(out, chars); +} + +// --- Forward Exec: Edge cases --- + +TEST(Truffle, ExecSingleByte) { + // Buffer of length 1 - matching char + m128 lo, hi; + CharReach chars; + chars.set('Z'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[1] = { 'Z' }; + const u8 *rv = truffleExec(lo, hi, buf, buf + 1); + ASSERT_EQ(buf, rv); +} + +TEST(Truffle, ExecSingleByteNoMatch) { + // Buffer of length 1 - non-matching char + m128 lo, hi; + CharReach chars; + chars.set('Z'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[1] = { 'A' }; + const u8 *rv = truffleExec(lo, hi, buf, buf + 1); + ASSERT_EQ(buf + 1, rv); +} + +TEST(Truffle, ExecHighByteMatch) { + // Test high byte characters (>= 0x80) match correctly + m128 lo, hi; + CharReach chars; + chars.set(0xAB); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[128]; + memset(buf, 0x20, sizeof(buf)); + + for (size_t pos = 0; pos < 64; pos++) { + buf[pos] = 0xAB; + const u8 *rv = truffleExec(lo, hi, buf, buf + 128); + ASSERT_EQ(buf + pos, rv); + buf[pos] = 0x20; // restore + } +} + +TEST(Truffle, ExecHighByteNoMatchSameNibble) { + // 0xAB and 0x2B share the same low nibble (0xB). + // Searching for 0xAB should NOT match 0x2B. + m128 lo, hi; + CharReach chars; + chars.set(0xAB); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[64]; + memset(buf, 0x2B, sizeof(buf)); // same low nibble, different high nibble + const u8 *rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 64, rv); +} + +TEST(Truffle, ExecNulCharMatch) { + // Searching for NUL character + m128 lo, hi; + CharReach chars; + chars.set(0x00); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[80]; + memset(buf, 0xFF, sizeof(buf)); + buf[45] = 0x00; + const u8 *rv = truffleExec(lo, hi, buf, buf + 80); + ASSERT_EQ(buf + 45, rv); +} + +TEST(Truffle, ExecDotMatchAll) { + // Dot (all chars) should match the first byte + m128 lo, hi; + CharReach chars; + chars.setall(); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[64]; + memset(buf, 0x42, sizeof(buf)); + const u8 *rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf, rv); // first byte always matches +} + +TEST(Truffle, ExecMatchAtBufferEnd) { + // Match only at the very last byte of the buffer + m128 lo, hi; + CharReach chars; + chars.set('X'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[200]; + memset(buf, '.', sizeof(buf)); + buf[199] = 'X'; + const u8 *rv = truffleExec(lo, hi, buf, buf + 200); + ASSERT_EQ(buf + 199, rv); +} + +TEST(Truffle, ExecVaryingLengths) { + // Test with buffers of many different lengths (1 to 130) + m128 lo, hi; + CharReach chars; + chars.set('q'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[130]; + for (size_t len = 1; len <= 130; len++) { + memset(buf, '.', len); + // No match + const u8 *rv = truffleExec(lo, hi, buf, buf + len); + ASSERT_EQ(buf + len, rv) << "len=" << len; + + // Match at last position + buf[len - 1] = 'q'; + rv = truffleExec(lo, hi, buf, buf + len); + ASSERT_EQ(buf + len - 1, rv) << "len=" << len; + } +} + +TEST(Truffle, ExecAlignmentSweep) { + // Test match at every offset within a larger buffer to exercise + // alignment code paths + m128 lo, hi; + CharReach chars; + chars.set('!'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[256]; + memset(buf, '.', sizeof(buf)); + + for (size_t offset = 0; offset < 64; offset++) { + buf[offset] = '!'; + const u8 *rv = truffleExec(lo, hi, buf, buf + 256); + ASSERT_EQ(buf + offset, rv) << "offset=" << offset; + buf[offset] = '.'; + } +} + +TEST(Truffle, ExecMultipleCharsInClass) { + // Test character class with many different characters + m128 lo, hi; + CharReach chars; + // [a-zA-Z0-9] + chars.setRange('a', 'z'); + chars.setRange('A', 'Z'); + chars.setRange('0', '9'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + // Buffer of non-matching chars + u8 buf[64]; + memset(buf, '!', sizeof(buf)); // not in character class + + for (u8 c = '0'; c <= '9'; c++) { + buf[32] = c; + const u8 *rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 32, rv) << "char=" << (char)c; + buf[32] = '!'; + } + for (u8 c = 'a'; c <= 'z'; c++) { + buf[32] = c; + const u8 *rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 32, rv) << "char=" << (char)c; + buf[32] = '!'; + } + for (u8 c = 'A'; c <= 'Z'; c++) { + buf[32] = c; + const u8 *rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 32, rv) << "char=" << (char)c; + buf[32] = '!'; + } +} + +TEST(Truffle, ExecAllSingleCharClasses) { + // For every possible character class of size 1, verify match works + for (unsigned c = 0; c < 256; c++) { + m128 lo, hi; + CharReach chars; + chars.set((u8)c); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + // Build a buffer that doesn't contain c (use c^1 if possible) + u8 filler = (u8)(c ^ 0x01); + if (filler == (u8)c) filler = (u8)(c ^ 0x02); + u8 buf[64]; + memset(buf, filler, sizeof(buf)); + + // No match + const u8 *rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 64, rv) << "c=" << c; + + // Place character at position 17 + buf[17] = (u8)c; + rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 17, rv) << "c=" << c; + } +} + +// --- Reverse Exec: Edge cases --- + +TEST(ReverseTruffle, ExecSingleByte) { + m128 lo, hi; + CharReach chars; + chars.set('Z'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[1] = { 'Z' }; + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 1); + ASSERT_EQ(buf, rv); +} + +TEST(ReverseTruffle, ExecSingleByteNoMatch) { + m128 lo, hi; + CharReach chars; + chars.set('Z'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 raw[2] = { 0, 'A' }; + const u8 *buf = raw + 1; + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 1); + ASSERT_EQ(raw, rv); +} + +TEST(ReverseTruffle, ExecHighByteReverse) { + // Reverse search for high byte characters + m128 lo, hi; + CharReach chars; + chars.set(0xFE); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[128]; + memset(buf, 0x20, sizeof(buf)); + + // Place match at various positions and verify reverse finds the last one + for (size_t pos = 64; pos < 128; pos++) { + memset(buf, 0x20, sizeof(buf)); + buf[pos] = 0xFE; + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 128); + ASSERT_EQ(buf + pos, rv) << "pos=" << pos; + } +} + +TEST(ReverseTruffle, ExecNulReverse) { + // Reverse search for NUL + m128 lo, hi; + CharReach chars; + chars.set(0x00); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[80]; + memset(buf, 0xFF, sizeof(buf)); + buf[10] = 0x00; + buf[50] = 0x00; + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 80); + ASSERT_EQ(buf + 50, rv); // should find the last NUL +} + +TEST(ReverseTruffle, ExecMatchAtBufferStart) { + // Match only at the very first byte + m128 lo, hi; + CharReach chars; + chars.set('X'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[200]; + memset(buf, '.', sizeof(buf)); + buf[0] = 'X'; + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 200); + ASSERT_EQ(buf, rv); +} + +TEST(ReverseTruffle, ExecVaryingLengths) { + // Test reverse with buffers of many different lengths + m128 lo, hi; + CharReach chars; + chars.set('q'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 raw[131]; + u8 *buf = raw + 1; + for (size_t len = 1; len <= 130; len++) { + memset(buf, '.', len); + // No match + const u8 *rv = rtruffleExec(lo, hi, buf, buf + len); + ASSERT_EQ(raw, rv) << "len=" << len; + + // Match at first position + buf[0] = 'q'; + rv = rtruffleExec(lo, hi, buf, buf + len); + ASSERT_EQ(buf, rv) << "len=" << len; + } +} + +TEST(ReverseTruffle, ExecLargeBuffer) { + // Large buffer reverse test + m128 lo, hi; + CharReach chars; + chars.set('!'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[4096]; + memset(buf, '.', sizeof(buf)); + buf[2048] = '!'; + + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 4096); + ASSERT_EQ(buf + 2048, rv); +} + +TEST(ReverseTruffle, ExecAlignmentSweep) { + // Reverse alignment sweep + m128 lo, hi; + CharReach chars; + chars.set('#'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[256]; + memset(buf, '.', sizeof(buf)); + + for (size_t offset = 192; offset < 256; offset++) { + buf[offset] = '#'; + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 256); + ASSERT_EQ(buf + offset, rv) << "offset=" << offset; + buf[offset] = '.'; + } +} + +TEST(ReverseTruffle, ExecAllSingleCharClasses) { + // For every possible single-char class, verify reverse match works + for (unsigned c = 0; c < 256; c++) { + m128 lo, hi; + CharReach chars; + chars.set((u8)c); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 filler = (u8)(c ^ 0x01); + if (filler == (u8)c) filler = (u8)(c ^ 0x02); + u8 raw[65]; + u8 *buf = raw + 1; + memset(buf, filler, 64); + + // No match + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(raw, rv) << "c=" << c; + + // Place character at position 40 + buf[40] = (u8)c; + rv = rtruffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 40, rv) << "c=" << c; + } +} + +TEST(ReverseTruffle, ExecMultipleMatches) { + // Verify reverse finds the LAST match + m128 lo, hi; + CharReach chars; + chars.set('a'); + chars.set('b'); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[128]; + memset(buf, '.', sizeof(buf)); + buf[10] = 'a'; + buf[50] = 'b'; + buf[90] = 'a'; + + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 128); + ASSERT_EQ(buf + 90, rv); + + // Now check with restricted end + rv = rtruffleExec(lo, hi, buf, buf + 60); + ASSERT_EQ(buf + 50, rv); +} + +// --- Forward: Boundary between low and high characters --- + +TEST(Truffle, ExecBoundaryChars) { + // Test characters at the 0x7f/0x80 boundary + m128 lo, hi; + CharReach chars; + chars.set(0x7f); + chars.set(0x80); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[64]; + memset(buf, 0x41, sizeof(buf)); + + buf[20] = 0x80; + const u8 *rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 20, rv); + + buf[20] = 0x41; + buf[15] = 0x7f; + rv = truffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 15, rv); +} + +TEST(ReverseTruffle, ExecBoundaryChars) { + m128 lo, hi; + CharReach chars; + chars.set(0x7f); + chars.set(0x80); + truffleBuildMasks(chars, reinterpret_cast(&lo), reinterpret_cast(&hi)); + + u8 buf[64]; + memset(buf, 0x41, sizeof(buf)); + + buf[40] = 0x7f; + buf[50] = 0x80; + const u8 *rv = rtruffleExec(lo, hi, buf, buf + 64); + ASSERT_EQ(buf + 50, rv); +} From b0158196dff9e5869ed5ef98b1113107e7443952 Mon Sep 17 00:00:00 2001 From: Byeonguk Jeong Date: Fri, 6 Mar 2026 14:13:17 +0900 Subject: [PATCH 3/3] hwlm: correct return types and scan length in SVE noodle engine - Change return type from hwlmcb_rv_t to hwlm_error_t to match the actual return type of checkMatched() and singleCheckMatched() - Fix scanDouble short-path condition: use (e - d) instead of scan_len which could be stale after adjusting d for history - Fix formatting: add space after 'if' keyword Fixes: 0ba1cbb32b5b ("Add SVE2 support for noodle") Fixes: b2332218a474 ("Remove possibly undefined behaviour from Noodle.") Signed-off-by: Byeonguk Jeong --- src/hwlm/noodle_engine_sve.hpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/hwlm/noodle_engine_sve.hpp b/src/hwlm/noodle_engine_sve.hpp index fcf2f5b46..8a63dcbf9 100644 --- a/src/hwlm/noodle_engine_sve.hpp +++ b/src/hwlm/noodle_engine_sve.hpp @@ -50,8 +50,8 @@ hwlm_error_t singleCheckMatched(const struct noodTable *n, const u8 *buf, size_t len, const struct cb_info *cbi, const u8 *d, svbool_t matched) { if (unlikely(svptest_any(svptrue_b8(), matched))) { - hwlmcb_rv_t rv = checkMatched(n, buf, len, cbi, d, matched, - n->msk_len != 1); + hwlm_error_t rv = checkMatched(n, buf, len, cbi, d, matched, + n->msk_len != 1); RETURN_IF_TERMINATED(rv); } return HWLM_SUCCESS; @@ -88,7 +88,7 @@ hwlm_error_t scanSingleLoop(const struct noodTable *n, const u8 *buf, for (size_t i = 0; i < loops; i++, d += svcntb()) { DEBUG_PRINTF("d %p \n", d); svbool_t matched = singleMatched(chars, d, svptrue_b8()); - hwlmcb_rv_t rv = singleCheckMatched(n, buf, len, cbi, d, matched); + hwlm_error_t rv = singleCheckMatched(n, buf, len, cbi, d, matched); RETURN_IF_TERMINATED(rv); } DEBUG_PRINTF("d %p e %p \n", d, e); @@ -120,7 +120,7 @@ hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len, const u8 *d1 = ROUNDUP_PTR(d, svcntb_pat(SV_POW2)); if (d != d1) { DEBUG_PRINTF("until aligned %p \n", d1); - hwlmcb_rv_t rv = scanSingleOnce(n, buf, len, cbi, chars, d, d1); + hwlm_error_t rv = scanSingleOnce(n, buf, len, cbi, chars, d, d1); RETURN_IF_TERMINATED(rv); } return scanSingleLoop(n, buf, len, cbi, chars, d1, e); @@ -140,8 +140,8 @@ hwlm_error_t doubleCheckMatched(const struct noodTable *n, const u8 *buf, // d - 1 won't underflow as the first position in buf has been dealt // with meaning that d > buf assert(d > buf); - hwlmcb_rv_t rv = checkMatched(n, buf, len, cbi, d - 1, matched, - n->msk_len != 2); + hwlm_error_t rv = checkMatched(n, buf, len, cbi, d - 1, matched, + n->msk_len != 2); RETURN_IF_TERMINATED(rv); } return HWLM_SUCCESS; @@ -177,7 +177,7 @@ hwlm_error_t scanDoubleOnce(const struct noodTable *n, const u8 *buf, // we reuse u8 predicates for u16 lanes. This means that we will check against one // extra \0 character at the end of the vector. - if(unlikely(n->key1 == '\0')) { + if (unlikely(n->key1 == '\0')) { if (size % 2) { // if odd, vec has an odd number of lanes and has the spurious \0 svbool_t lane_to_disable = svrev_b8(svpfirst(svrev_b8(pg), svpfalse())); @@ -244,15 +244,15 @@ hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, svuint8_t chars = svreinterpret_u8(getCharMaskDouble(n->key0, n->key1, noCase)); - if (scan_len <= svcntb()) { + if ((size_t)(e - d) <= svcntb()) { return scanDoubleOnce(n, buf, len, cbi, chars, d, e); } // peel off first part to align to the vector size const u8 *d1 = ROUNDUP_PTR(d, svcntb_pat(SV_POW2)); if (d != d1) { DEBUG_PRINTF("until aligned %p \n", d1); - hwlmcb_rv_t rv = scanDoubleOnce(n, buf, len, cbi, chars, - d, d1); + hwlm_error_t rv = scanDoubleOnce(n, buf, len, cbi, chars, + d, d1); RETURN_IF_TERMINATED(rv); } return scanDoubleLoop(n, buf, len, cbi, chars, d1, e);