diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 9b340d9..7984247 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -143,3 +143,41 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/nearpoint.cpp") else() message(WARNING "nearpoint.cpp does not exist") endif() + + +# Build nearpoint binary if if available +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/fast_gcc_unwind.cpp") + message(STATUS "fast_gcc_unwind.cpp exists!") + set(elf fast_gcc_unwind.cpp.elf) + message(STATUS "Generating Demo for \"${elf}\"") + add_executable(${elf} generated_tests/except.cpp fast_gcc_unwind.cpp) + target_include_directories(${elf} PRIVATE .) + target_compile_options(${elf} PRIVATE + ${BENCHMARK_COMPILE_OPTIONS} + -fexceptions + -Wno-error=attributes) + + target_link_libraries(${elf} PRIVATE + startup_code + libhal::$ENV{LIBHAL_PLATFORM_LIBRARY} + ) + target_link_options(${elf} PRIVATE -fno-threadsafe-statics + -L${CMAKE_SOURCE_DIR}/ + -Wl,-Map=${CMAKE_BINARY_DIR}/${elf}.map + -Wl,--wrap=__gnu_unwind_execute + -Wl,--wrap=_Unwind_VRS_Pop + -Wl,--wrap=__gnu_unwind_frame + ) + if(prebuilt-picolibc_FOUND) + target_link_libraries(${elf} PRIVATE picolibc) + endif() + + if(${CMAKE_CROSSCOMPILING}) + # Convert elf into .bin, .hex and other formats needed for programming + # devices. + libhal_post_build(${elf}) + libhal_disassemble(${elf}) + endif() +else() + message(WARNING "nearpoint.cpp does not exist") +endif() diff --git a/benchmark/conanfile.py b/benchmark/conanfile.py index ee8566b..cfeea49 100644 --- a/benchmark/conanfile.py +++ b/benchmark/conanfile.py @@ -65,4 +65,4 @@ def requirements(self): bootstrap = self.python_requires["libhal-bootstrap"] bootstrap.module.add_demo_requirements(self) if self.options.platform != "mac": - self.requires("libhal-exceptions/1.4.0") + self.requires("libhal-exceptions/1.4.1") diff --git a/benchmark/fast_gcc_unwind.cpp b/benchmark/fast_gcc_unwind.cpp new file mode 100644 index 0000000..c40836f --- /dev/null +++ b/benchmark/fast_gcc_unwind.cpp @@ -0,0 +1,428 @@ +#include + +#include +#include +#include +#include +#include + +/* Misc constants. */ +#define R_IP 12 +#define R_SP 13 +#define R_LR 14 +#define R_PC 15 + +struct core_regs +{ + std::uint32_t r[16]; +}; + +/* We use normal integer types here to avoid the compiler generating + coprocessor instructions. */ +struct vfp_regs +{ + std::uint64_t d[16]; + std::uint32_t pad; +}; + +struct vfpv3_regs +{ + /* Always populated via VSTM, so no need for the "pad" field from + vfp_regs (which is used to store the format word for FSTMX). */ + std::uint64_t d[16]; +}; + +struct wmmxd_regs +{ + std::uint64_t wd[16]; +}; + +struct wmmxc_regs +{ + std::uint32_t wc[4]; +}; + +/* The ABI specifies that the unwind routines may only use core registers, + except when actually manipulating coprocessor state. This allows + us to write one implementation that works on all platforms by + demand-saving coprocessor registers. + + During unwinding we hold the coprocessor state in the actual hardware + registers and allocate demand-save areas for use during phase1 + unwinding. */ + +struct phase1_vrs +{ + /* The first fields must be the same as a phase2_vrs. */ + std::uint32_t demand_save_flags; + struct core_regs core; + std::uint32_t prev_sp; /* Only valid during forced unwinding. */ + struct vfp_regs vfp; + struct vfpv3_regs vfp_regs_16_to_31; + struct wmmxd_regs wmmxd; + struct wmmxc_regs wmmxc; +}; + +/* This must match the structure created by the assembly wrappers. */ +struct phase2_vrs +{ + std::uint32_t demand_save_flags; + struct core_regs core; +}; + +typedef struct __EIT_entry +{ + std::uint32_t fnoffset; + std::uint32_t content; +} __EIT_entry; + +std::uint32_t selfrel_offset31(std::uint32_t const* p) +{ + std::uint32_t offset; + + offset = *p; + /* Sign extend to 32 bits. */ + if (offset & (1 << 30)) + offset |= 1u << 31; + else + offset &= ~(1u << 31); + + return offset + (std::uint32_t)p; +} + +struct eit_entry_less_than +{ + [[gnu::always_inline]] static std::uint32_t to_absolute( + __EIT_entry const& entry) + { + auto entry_addr = reinterpret_cast(&entry.content); + // Sign extend :D + entry_addr <<= 1; + entry_addr >>= 1; + return entry_addr; + } + + bool operator()(__EIT_entry const& left, __EIT_entry const& right) + { + return left.fnoffset < right.fnoffset; + } + bool operator()(__EIT_entry const& left, std::uint32_t right) + { + std::uint32_t absolute_left = to_absolute(left); + return absolute_left < right; + } + bool operator()(std::uint32_t left, __EIT_entry const& right) + { + std::uint32_t absolute_right = to_absolute(right); + return left < absolute_right; + } +}; + +/* Return the next byte of unwinding information, or CODE_FINISH if there is + no data remaining. */ +[[gnu::always_inline]] _uw8 next_unwind_byte(__gnu_unwind_state* uws) +{ + _uw8 b; + + if (uws->bytes_left == 0) { + /* Load another word */ + if (uws->words_left == 0) + return 0xB0; /* Nothing left. */ + uws->words_left--; + uws->data = *(uws->next++); + uws->bytes_left = 3; + } else + uws->bytes_left--; + + /* Extract the most significant byte. */ + b = (uws->data >> 24) & 0xff; + uws->data <<= 8; + return b; +} + +[[gnu::always_inline]] _Unwind_VRS_Result _My_Unwind_VRS_Get( + _Unwind_Context* context, + _Unwind_VRS_RegClass, + _uw regno, + _Unwind_VRS_DataRepresentation, + void* valuep) +{ + auto* vrs = reinterpret_cast(context); + *(_uw*)valuep = vrs->core.r[regno]; + return _UVRSR_OK; +} + +/* ABI defined function to load a virtual register from memory. */ + +[[gnu::always_inline]] _Unwind_VRS_Result _My_Unwind_VRS_Set( + _Unwind_Context* context, + _Unwind_VRS_RegClass, + _uw regno, + _Unwind_VRS_DataRepresentation, + void* valuep) +{ + auto* vrs = reinterpret_cast(context); + vrs->core.r[regno] = *(_uw*)valuep; + return _UVRSR_OK; +} + +extern "C" +{ + + [[gnu::always_inline]] _Unwind_VRS_Result __wrap__Unwind_VRS_Pop( + _Unwind_Context* context, + _Unwind_VRS_RegClass regclass, + _uw discriminator, + _Unwind_VRS_DataRepresentation) + { + auto* vrs = reinterpret_cast(context); + + switch (regclass) { + case _UVRSC_CORE: { + std::uint32_t mask = discriminator & 0xffff; + // The mask may not demand that the stack pointer be popped, but the + // stack pointer will still need to be popped anyway, so this check + // determines if the mask handles this or not. + bool set_stack_pointer_afterwards = (mask & R_SP) == 0x0; + + std::uint32_t* ptr = // NOTLINTNEXTLINE + reinterpret_cast(vrs->core.r[R_SP]); + /* Pop the requested registers. */ + while (mask) { + auto reg_to_restore = std::countr_zero(mask); + mask &= ~(1 << reg_to_restore); + vrs->core.r[reg_to_restore] = *(ptr++); + } + if (set_stack_pointer_afterwards) { + vrs->core.r[R_SP] = reinterpret_cast(ptr); + } + } + return _UVRSR_OK; + case _UVRSC_VFP: + return _UVRSR_OK; + case _UVRSC_WMMXD: + return _UVRSR_OK; + case _UVRSC_WMMXC: + return _UVRSR_OK; + default: + return _UVRSR_FAILED; + } + } + + /* Execute the unwinding instructions described by UWS. */ + _Unwind_Reason_Code __wrap___gnu_unwind_execute(_Unwind_Context* context, + __gnu_unwind_state* uws) + { + _uw op; + int set_pc; + _uw reg; + set_pc = 0; + for (;;) { + op = next_unwind_byte(uws); + if (op == 0xb0) { + /* If we haven't already set pc then copy it from lr. */ + if (!set_pc) { + _My_Unwind_VRS_Get(context, _UVRSC_CORE, R_LR, _UVRSD_UINT32, ®); + _My_Unwind_VRS_Set(context, _UVRSC_CORE, R_PC, _UVRSD_UINT32, ®); + set_pc = 1; + } + /* Drop out of the loop. */ + break; + } + if ((op & 0x80) == 0) { + /* vsp = vsp +- (imm6 << 2 + 4). */ + _uw offset; + + offset = ((op & 0x3f) << 2) + 4; + _My_Unwind_VRS_Get(context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, ®); + if (op & 0x40) + reg -= offset; + else + reg += offset; + _My_Unwind_VRS_Set(context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, ®); + continue; + } + + if ((op & 0xf0) == 0x80) { + op = (op << 8) | next_unwind_byte(uws); + if (op == 0x8000) { + /* Refuse to unwind. */ + return _URC_FAILURE; + } + /* Pop r4-r15 under mask. */ + op = (op << 4) & 0xfff0; + if (_Unwind_VRS_Pop(context, _UVRSC_CORE, op, _UVRSD_UINT32) != + _UVRSR_OK) + return _URC_FAILURE; + if (op & (1 << R_PC)) + set_pc = 1; + continue; + } + if ((op & 0xf0) == 0x90) { + op &= 0xf; + if (op == 13 || op == 15) + /* Reserved. */ + return _URC_FAILURE; + /* vsp = r[nnnn]. */ + _My_Unwind_VRS_Get(context, _UVRSC_CORE, op, _UVRSD_UINT32, ®); + _My_Unwind_VRS_Set(context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, ®); + continue; + } + if ((op & 0xf0) == 0xa0) { + /* Pop r4-r[4+nnn], [lr]. */ + _uw mask; + + mask = (0xff0 >> (7 - (op & 7))) & 0xff0; + if (op & 8) + mask |= (1 << R_LR); + if (_Unwind_VRS_Pop(context, _UVRSC_CORE, mask, _UVRSD_UINT32) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if ((op & 0xf0) == 0xb0) { + /* op == 0xb0 already handled. */ + if (op == 0xb1) { + op = next_unwind_byte(uws); + if (op == 0 || ((op & 0xf0) != 0)) + /* Spare. */ + return _URC_FAILURE; + /* Pop r0-r4 under mask. */ + if (_Unwind_VRS_Pop(context, _UVRSC_CORE, op, _UVRSD_UINT32) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if (op == 0xb2) { + /* vsp = vsp + 0x204 + (uleb128 << 2). */ + int shift; + + _My_Unwind_VRS_Get(context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, ®); + op = next_unwind_byte(uws); + shift = 2; + while (op & 0x80) { + reg += ((op & 0x7f) << shift); + shift += 7; + op = next_unwind_byte(uws); + } + reg += ((op & 0x7f) << shift) + 0x204; + _My_Unwind_VRS_Set(context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, ®); + continue; + } + if (op == 0xb3) { + /* Pop VFP registers with fldmx. */ + op = next_unwind_byte(uws); + op = ((op & 0xf0) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop(context, _UVRSC_VFP, op, _UVRSD_VFPX) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if ((op & 0xfc) == 0xb4) /* Obsolete FPA. */ + return _URC_FAILURE; + + /* op & 0xf8 == 0xb8. */ + /* Pop VFP D[8]-D[8+nnn] with fldmx. */ + op = 0x80000 | ((op & 7) + 1); + if (_Unwind_VRS_Pop(context, _UVRSC_VFP, op, _UVRSD_VFPX) != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if ((op & 0xf0) == 0xc0) { + if (op == 0xc6) { + /* Pop iWMMXt D registers. */ + op = next_unwind_byte(uws); + op = ((op & 0xf0) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop(context, _UVRSC_WMMXD, op, _UVRSD_UINT64) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if (op == 0xc7) { + op = next_unwind_byte(uws); + if (op == 0 || (op & 0xf0) != 0) + /* Spare. */ + return _URC_FAILURE; + /* Pop iWMMXt wCGR{3,2,1,0} under mask. */ + if (_Unwind_VRS_Pop(context, _UVRSC_WMMXC, op, _UVRSD_UINT32) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if ((op & 0xf8) == 0xc0) { + /* Pop iWMMXt wR[10]-wR[10+nnn]. */ + op = 0xa0000 | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop(context, _UVRSC_WMMXD, op, _UVRSD_UINT64) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if (op == 0xc8) { + /* Pop VFPv3 registers D[16+ssss]-D[16+ssss+cccc] with vldm. */ + op = next_unwind_byte(uws); + op = (((op & 0xf0) + 16) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop(context, _UVRSC_VFP, op, _UVRSD_DOUBLE) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if (op == 0xc9) { + /* Pop VFP registers with fldmd. */ + op = next_unwind_byte(uws); + op = ((op & 0xf0) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop(context, _UVRSC_VFP, op, _UVRSD_DOUBLE) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + /* Spare. */ + return _URC_FAILURE; + } + if ((op & 0xf8) == 0xd0) { + /* Pop VFP D[8]-D[8+nnn] with fldmd. */ + op = 0x80000 | ((op & 7) + 1); + if (_Unwind_VRS_Pop(context, _UVRSC_VFP, op, _UVRSD_DOUBLE) != + _UVRSR_OK) + return _URC_FAILURE; + continue; + } + /* Spare. */ + return _URC_FAILURE; + } + return _URC_OK; + } + + [[gnu::used]] + _Unwind_Reason_Code __wrap___gnu_unwind_frame(_Unwind_Control_Block* ucbp, + _Unwind_Context* context) + { + _uw* ptr; + __gnu_unwind_state uws; + + ptr = (_uw*)ucbp->pr_cache.ehtp; + /* Skip over the personality routine address. */ + ptr++; + /* Setup the unwinder state. */ + uws.data = (*ptr) << 8; + uws.next = ptr + 1; + uws.bytes_left = 3; + uws.words_left = ((*ptr) >> 24) & 0xff; + + return __wrap___gnu_unwind_execute(context, &uws); + } + + __EIT_entry const* search_EIT_table(__EIT_entry const* table, + int nrec, // NOLINT + std::uint32_t return_address) + { + if (nrec == 0) { + return nullptr; + } + std::span<__EIT_entry const> table_span(table, nrec); + auto const& entry = std::upper_bound(table_span.begin(), + table_span.end(), + return_address, + eit_entry_less_than{}); + return entry.base(); + } +} diff --git a/benchmark/generated_tests/except.cpp.csv b/benchmark/generated_tests/except.cpp.csv index 9118b70..54f3647 100644 --- a/benchmark/generated_tests/except.cpp.csv +++ b/benchmark/generated_tests/except.cpp.csv @@ -1,61 +1,61 @@ depth,error_size,destructor_percentage,pulse_us -50,4,0,2345.38 -35,4,0,1720.96 -15,4,0,809.0 -5,4,0,350.08 -1,4,0,161.83 -50,4,25,2621.08 -35,4,25,1859.79 -15,4,25,873.33 -5,4,25,394.17 -1,4,25,182.12 -50,4,50,2876.92 -35,4,50,2053.5 -15,4,50,957.67 -5,4,50,405.04 -1,4,50,182.12 -50,4,100,3377.96 -35,4,100,2404.0 -15,4,100,1090.29 -5,4,100,445.75 -1,4,100,182.12 -50,16,0,2347.42 -35,16,0,1714.71 -15,16,0,815.96 -5,16,0,355.5 -1,16,0,165.58 -50,16,25,2629.58 -35,16,25,1863.58 -15,16,25,875.46 -5,16,25,392.54 -1,16,25,185.96 -50,16,50,2875.42 -35,16,50,2055.83 -15,16,50,949.17 -5,16,50,408.79 -1,16,50,185.92 -50,16,100,3380.08 -35,16,100,2395.83 -15,16,100,1088.75 -5,16,100,447.92 -1,16,100,185.96 -50,65,0,2360.38 -35,65,0,1729.08 -15,65,0,824.92 -5,65,0,364.38 -1,65,0,174.58 -50,65,25,2638.46 -35,65,25,1872.62 -15,65,25,877.62 -5,65,25,405.33 -1,65,25,194.88 -50,65,50,2888.33 -35,65,50,2060.33 -15,65,50,965.08 -5,65,50,417.83 -1,65,50,194.88 -50,65,100,3382.33 -35,65,100,2411.62 -15,65,100,1101.67 -5,65,100,460.79 -1,65,100,194.83 +50,4,0,2339.79 +35,4,0,1716.62 +15,4,0,807.08 +5,4,0,349.21 +1,4,0,161.46 +50,4,25,2614.88 +35,4,25,1855.21 +15,4,25,871.12 +5,4,25,393.25 +1,4,25,181.67 +50,4,50,2870.12 +35,4,50,2048.88 +15,4,50,955.42 +5,4,50,404.12 +1,4,50,181.67 +50,4,100,3369.71 +35,4,100,2398.21 +15,4,100,1087.79 +5,4,100,444.67 +1,4,100,181.71 +50,16,0,2341.92 +35,16,0,1710.54 +15,16,0,814.04 +5,16,0,354.62 +1,16,0,165.17 +50,16,25,2623.08 +35,16,25,1859.08 +15,16,25,873.38 +5,16,25,391.58 +1,16,25,185.46 +50,16,50,2868.42 +35,16,50,2050.88 +15,16,50,946.88 +5,16,50,407.79 +1,16,50,185.5 +50,16,100,3371.96 +35,16,100,2389.88 +15,16,100,1086.21 +5,16,100,446.83 +1,16,100,185.5 +50,65,0,2354.79 +35,65,0,1724.92 +15,65,0,822.92 +5,65,0,363.54 +1,65,0,174.17 +50,65,25,2632.21 +35,65,25,1868.08 +15,65,25,875.58 +5,65,25,404.46 +1,65,25,194.38 +50,65,50,2881.33 +35,65,50,2055.25 +15,65,50,962.71 +5,65,50,416.79 +1,65,50,194.38 +50,65,100,3374.0 +35,65,100,2405.58 +15,65,100,1098.96 +5,65,100,459.62 +1,65,100,194.42 diff --git a/benchmark/generated_tests/gcc_except_fast.csv b/benchmark/generated_tests/gcc_except_fast.csv new file mode 100644 index 0000000..5ab0a4c --- /dev/null +++ b/benchmark/generated_tests/gcc_except_fast.csv @@ -0,0 +1,61 @@ +depth,error_size,destructor_percentage,pulse_us +50,4,0,1657.12 +35,4,0,1201.67 +15,4,0,575.17 +5,4,0,258.71 +1,4,0,146.62 +50,4,25,1871.33 +35,4,25,1328.54 +15,4,25,620.17 +5,4,25,312.08 +1,4,25,166.58 +50,4,50,2188.79 +35,4,50,1573.83 +15,4,50,741.46 +5,4,50,334.88 +1,4,50,166.62 +50,4,100,2687.46 +35,4,100,1908.21 +15,4,100,881.46 +5,4,100,374.21 +1,4,100,166.58 +50,16,0,1659.17 +35,16,0,1205.38 +15,16,0,573.67 +5,16,0,257.21 +1,16,0,145.0 +50,16,25,1869.83 +35,16,25,1326.92 +15,16,25,611.62 +5,16,25,314.21 +1,16,25,170.42 +50,16,50,2190.71 +35,16,50,1567.75 +15,16,50,746.62 +5,16,50,333.25 +1,16,50,170.42 +50,16,100,2679.08 +35,16,100,1913.54 +15,16,100,879.71 +5,16,100,376.33 +1,16,100,170.38 +50,65,0,1666.38 +35,65,0,1219.5 +15,65,0,591.42 +5,65,0,275.0 +1,65,0,162.92 +50,65,25,1887.62 +35,65,25,1344.58 +15,65,25,632.67 +5,65,25,332.12 +1,65,25,184.38 +50,65,50,2208.42 +35,65,50,1586.79 +15,65,50,760.67 +5,65,50,351.12 +1,65,50,184.38 +50,65,100,2699.92 +35,65,100,1927.54 +15,65,100,897.71 +5,65,100,394.29 +1,65,100,184.38 diff --git a/benchmark/generated_tests/nearpoint.cpp.csv b/benchmark/generated_tests/nearpoint.cpp.csv index fe5e9dd..1e13f3b 100644 --- a/benchmark/generated_tests/nearpoint.cpp.csv +++ b/benchmark/generated_tests/nearpoint.cpp.csv @@ -1,61 +1,61 @@ depth,error_size,destructor_percentage,pulse_us -50,4,0,232.67 -35,4,0,163.0 -15,4,0,82.17 -5,4,0,44.08 -1,4,0,26.5 -50,4,25,337.33 -35,4,25,243.38 -15,4,25,125.17 -5,4,25,61.17 -1,4,25,31.17 -50,4,50,499.42 -35,4,50,372.25 -15,4,50,177.46 -5,4,50,77.17 -1,4,50,31.17 -50,4,100,838.42 -35,4,100,597.12 -15,4,100,268.12 -5,4,100,102.71 -1,4,100,31.17 -50,16,0,232.62 -35,16,0,165.88 -15,16,0,82.17 -5,16,0,44.08 -1,16,0,28.88 -50,16,25,337.38 -35,16,25,243.25 -15,16,25,126.79 -5,16,25,64.29 -1,16,25,36.38 -50,16,50,499.33 -35,16,50,372.12 -15,16,50,177.42 -5,16,50,77.12 -1,16,50,36.38 -50,16,100,838.29 -35,16,100,596.96 -15,16,100,268.08 -5,16,100,102.67 -1,16,100,36.42 -50,65,0,243.0 -35,65,0,176.29 -15,65,0,92.58 -5,65,0,54.5 -1,65,0,39.25 -50,65,25,347.83 -35,65,25,253.71 -15,65,25,137.25 -5,65,25,74.75 -1,65,25,46.92 -50,65,50,509.37 -35,65,50,382.21 -15,65,50,187.46 -5,65,50,87.21 -1,65,50,46.92 -50,65,100,848.08 -35,65,100,606.62 -15,65,100,276.92 -5,65,100,112.67 -1,65,100,46.88 +50,4,0,199.67 +35,4,0,142.33 +15,4,0,73.42 +5,4,0,38.83 +1,4,0,23.71 +50,4,25,300.0 +35,4,25,215.0 +15,4,25,109.08 +5,4,25,54.25 +1,4,25,29.04 +50,4,50,431.0 +35,4,50,317.88 +15,4,50,151.79 +5,4,50,66.83 +1,4,50,29.04 +50,4,100,709.08 +35,4,100,504.21 +15,4,100,224.79 +5,4,100,86.88 +1,4,100,29.04 +50,16,0,199.25 +35,16,0,143.83 +15,16,0,72.96 +5,16,0,38.42 +1,16,0,25.04 +50,16,25,299.71 +35,16,25,214.79 +15,16,25,110.12 +5,16,25,56.17 +1,16,25,32.21 +50,16,50,430.71 +35,16,50,317.62 +15,16,50,151.54 +5,16,50,66.79 +1,16,50,32.21 +50,16,100,709.0 +35,16,100,504.12 +15,16,100,224.88 +5,16,100,87.04 +1,16,100,32.21 +50,65,0,209.88 +35,65,0,154.67 +15,65,0,83.75 +5,65,0,49.21 +1,65,0,35.83 +50,65,25,310.42 +35,65,25,225.5 +15,65,25,120.83 +5,65,25,67.08 +1,65,25,43.17 +50,65,50,441.62 +35,65,50,328.58 +15,65,50,162.5 +5,65,50,77.5 +1,65,50,43.12 +50,65,100,719.71 +35,65,100,514.88 +15,65,100,234.08 +5,65,100,96.92 +1,65,100,43.12 diff --git a/benchmark/nearpoint.cpp b/benchmark/nearpoint.cpp index ace580f..4c573f9 100644 --- a/benchmark/nearpoint.cpp +++ b/benchmark/nearpoint.cpp @@ -7,51 +7,29 @@ namespace ke::__except_abi::inline v1 { namespace { std::array const _near_point_descriptor_data = { - 0x0000000a, - 0x08000048, + 0x0000000b, + 0x08000044, }; -std::array const _normal_table_data = { - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (0 << 10) | 0, // entry=0, avg_size=0 - (1 << 10) | 0, // entry=1, avg_size=0 - (1 << 10) | 0, // entry=1, avg_size=0 - (1 << 10) | 0, // entry=1, avg_size=0 - (1 << 10) | 0, // entry=1, avg_size=0 - (2 << 10) | 0, // entry=2, avg_size=0 - (2 << 10) | 0, // entry=2, avg_size=0 - (3 << 10) | 0, // entry=3, avg_size=0 - (3 << 10) | 0, // entry=3, avg_size=0 - (4 << 10) | 0, // entry=4, avg_size=0 - (5 << 10) | 0, // entry=5, avg_size=0 - (6 << 10) | 996, // entry=6, avg_size=996 - (7 << 10) | 754, // entry=7, avg_size=754 - (9 << 10) | 448, // entry=9, avg_size=448 - (10 << 10) | 332, // entry=10, avg_size=332 - (13 << 10) | 222, // entry=13, avg_size=222 - (18 << 10) | 155, // entry=18, avg_size=155 - (24 << 10) | 148, // entry=24, avg_size=148 - (31 << 10) | 109, // entry=31, avg_size=109 - (40 << 10) | 82, // entry=40, avg_size=82 - (52 << 10) | 80, // entry=52, avg_size=80 - (65 << 10) | 76, // entry=65, avg_size=76 - (79 << 10) | 76, // entry=79, avg_size=76 - (92 << 10) | 72, // entry=92, avg_size=72 - (106 << 10) | 68, // entry=106, avg_size=68 - (121 << 10) | 68, // entry=121, avg_size=68 - (136 << 10) | 68, // entry=136, avg_size=68 - (151 << 10) | 55, // entry=151, avg_size=55 +std::array const _normal_table_data = { + (0 << 11) | 1, // Block(start=0, count=1) + (0 << 11) | 1, // Block(start=0, count=1) + (0 << 11) | 1, // Block(start=0, count=1) + (0 << 11) | 1, // Block(start=0, count=1) + (0 << 11) | 1, // Block(start=0, count=1) + (0 << 11) | 1, // Block(start=0, count=1) + (1 << 11) | 1, // Block(start=1, count=1) + (1 << 11) | 1, // Block(start=1, count=1) + (2 << 11) | 2, // Block(start=2, count=2) + (3 << 11) | 3, // Block(start=3, count=3) + (5 << 11) | 3, // Block(start=5, count=3) + (7 << 11) | 6, // Block(start=7, count=6) + (12 << 11) | 14, // Block(start=12, count=14) + (25 << 11) | 20, // Block(start=25, count=20) + (44 << 11) | 27, // Block(start=44, count=27) + (70 << 11) | 28, // Block(start=70, count=28) + (97 << 11) | 32, // Block(start=97, count=32) + (128 << 11) | 26, // Block(start=128, count=26) }; } // namespace diff --git a/benchmark/order.ld b/benchmark/order.ld index b147ea7..593948b 100644 --- a/benchmark/order.ld +++ b/benchmark/order.ld @@ -4,7 +4,6 @@ SECTIONS { /* Unwind info 0x00000001 */ *(.text.unlikely._Z3endv) *(.text.unlikely._exit) - *(.text.unlikely.__wrap___cxa_call_unexpected) *(.text.unlikely._Znwj) *(.text.unlikely._ZnwjSt11align_val_t) *(.text.unlikely._ZSt9terminatev) @@ -112,21 +111,6 @@ SECTIONS { *_arm_fixunsdfsi.o(.text*) *_arm_addsubsf3.o(.text*) *_arm_fixunssfsi.o(.text*) - *(.text.selfrel_offset31) - *(.text.search_EIT_table) - *(.text.__gnu_unwind_get_pr_addr) - *(.text._Unwind_decode_typeinfo_ptr.constprop.0) - *(.text._Unwind_DebugHook) - *(.text.__gnu_Unwind_ForcedUnwind) - *(.text._Unwind_VRS_Get) - *(.text._Unwind_GetGR) - *(.text._Unwind_VRS_Set) - *(.text._Unwind_SetGR) - *(.text.__aeabi_unwind_cpp_pr0) - *(.text.__aeabi_unwind_cpp_pr1) - *(.text.__aeabi_unwind_cpp_pr2) - *libunwind.o(.text*) - *(.text.next_unwind_byte) *_arm_muldivsf3.o(.text*) *_arm_cmpsf2.o(.text*) *_arm_unordsf2.o(.text*) @@ -216,31 +200,6 @@ SECTIONS { *(.text._Z20depth_47_percent_000v.isra.0) *(.text._Z20depth_49_percent_000v.isra.0) *(.text._Z20depth_50_percent_000v.isra.0) - /* Unwind info 0x8002a9b0 */ - *(.text._Z20depth_10_percent_050v.isra.0) - *(.text._Z20depth_12_percent_050v.isra.0) - *(.text._Z20depth_36_percent_050v.isra.0) - *(.text._Z20depth_40_percent_050v.isra.0) - *(.text._Z20depth_42_percent_050v.isra.0) - *(.text._Z20depth_10_percent_025v.isra.0) - *(.text._Z20depth_12_percent_025v.isra.0) - *(.text._Z20depth_15_percent_025v.isra.0) - *(.text._Z20depth_36_percent_025v.isra.0) - *(.text._Z20depth_39_percent_025v.isra.0) - *(.text._Z20depth_40_percent_025v.isra.0) - *(.text._Z20depth_41_percent_025v.isra.0) - *(.text._Z20depth_09_percent_000v.isra.0) - *(.text._Z20depth_10_percent_000v.isra.0) - *(.text._Z20depth_12_percent_000v.isra.0) - *(.text._Z20depth_15_percent_000v.isra.0) - *(.text._Z20depth_25_percent_000v.isra.0) - *(.text._Z20depth_36_percent_000v.isra.0) - *(.text._Z20depth_39_percent_000v.isra.0) - *(.text._Z20depth_40_percent_000v.isra.0) - *(.text._Z20depth_41_percent_000v.isra.0) - *(.text._Z20depth_42_percent_000v.isra.0) - *(.text._ZN3hal7stm32f113configure_pinENS0_10pin_selectENS0_12pin_config_tE) - *(.text.get_eit_entry) /* Unwind info 0x8001a8b0 */ *(.text._Z20depth_02_percent_025v.isra.0) *(.text._Z20depth_18_percent_050v.isra.0) @@ -267,21 +226,40 @@ SECTIONS { *(.text._Z20depth_44_percent_000v.isra.0) *(.text._Z20depth_45_percent_000v.isra.0) *(.text._Z20depth_48_percent_000v.isra.0) + /* Unwind info 0x8002a9b0 */ + *(.text._Z20depth_10_percent_050v.isra.0) + *(.text._Z20depth_12_percent_050v.isra.0) + *(.text._Z20depth_36_percent_050v.isra.0) + *(.text._Z20depth_40_percent_050v.isra.0) + *(.text._Z20depth_42_percent_050v.isra.0) + *(.text._Z20depth_10_percent_025v.isra.0) + *(.text._Z20depth_12_percent_025v.isra.0) + *(.text._Z20depth_15_percent_025v.isra.0) + *(.text._Z20depth_36_percent_025v.isra.0) + *(.text._Z20depth_39_percent_025v.isra.0) + *(.text._Z20depth_40_percent_025v.isra.0) + *(.text._Z20depth_41_percent_025v.isra.0) + *(.text._Z20depth_09_percent_000v.isra.0) + *(.text._Z20depth_10_percent_000v.isra.0) + *(.text._Z20depth_12_percent_000v.isra.0) + *(.text._Z20depth_15_percent_000v.isra.0) + *(.text._Z20depth_25_percent_000v.isra.0) + *(.text._Z20depth_36_percent_000v.isra.0) + *(.text._Z20depth_39_percent_000v.isra.0) + *(.text._Z20depth_40_percent_000v.isra.0) + *(.text._Z20depth_41_percent_000v.isra.0) + *(.text._Z20depth_42_percent_000v.isra.0) /* Unwind info 0x8004afb0 */ *(.text.startup.main) /* Unwind info 0x8012afb0 */ *(.text._ZNK10__cxxabiv121__vmi_class_type_info12__do_dyncastEiNS_17__class_type_info10__sub_kindEPKS1_PKvS4_S6_RNS1_16__dyncast_resultE) - /* Unwind info 0x8008afb0 */ - *(.text._ZNK10__cxxabiv121__vmi_class_type_info11__do_upcastEPKNS_17__class_type_infoEPKvRNS1_15__upcast_resultE) - *(.text.__gnu_unwind_pr_common) - /* Unwind info 0x8004adb0 */ - *(.text.__gnu_unwind_execute) - /* Unwind info 0x80033fac */ - *(.text._Unwind_VRS_Pop) + /* Unwind info 0x8001aab0 */ + *(.text._ZN3hal7stm32f113configure_pinENS0_10pin_selectENS0_12pin_config_tE) + *(.text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj) /* Unwind info 0x8002afb0 */ *(.text.__wrap___cxa_throw) *(.text._ZNK10__cxxabiv121__vmi_class_type_info20__do_find_public_srcEiPKvPKNS_17__class_type_infoES2_) - /* Unwind info 0x7ffff2c8 */ + /* Unwind info 0x7ffff2c4 */ *(.text._Z20depth_41_percent_050v.isra.0) *(.text._Z20depth_17_percent_025v.isra.0) *(.text._Z20depth_25_percent_025v.isra.0) @@ -291,322 +269,313 @@ SECTIONS { *(.text._ZN3hal5delayERNS_12steady_clockENSt6chrono8durationIxSt5ratioILx1ELx1000000000EEEE.constprop.0) *(.text._ZNK10__cxxabiv120__si_class_type_info11__do_upcastEPKNS_17__class_type_infoEPKvRNS1_15__upcast_resultE) *(.text._ZN9__gnu_cxx15__snprintf_liteEPcjPKcSt9__va_list) + /* Unwind info 0x8008afb0 */ + *(.text._ZNK10__cxxabiv121__vmi_class_type_info11__do_upcastEPKNS_17__class_type_infoEPKvRNS1_15__upcast_resultE) + /* Unwind info 0x7ffff2b8 */ + *(.text._Z20depth_35_percent_050v.isra.0) + *(.text._Z20depth_38_percent_025v.isra.0) + *(.text._Z20depth_46_percent_025v.isra.0) /* Unwind info 0x80a8b0b0 */ *(.text.unlikely._ZN3hal7stm32f127throw_if_pin_is_unavailableENS0_10pin_selectE.part.0) *(.text.unlikely._ZN3hal7stm32f112_GLOBAL__N_124get_enable_register_infoENS0_10peripheralE.part.0) *(.text._ZNSt12length_errorC2EPKc) *(.text._ZNSt12out_of_rangeC2EPKc) *(.text._ZNSs4_Rep9_S_createEjjRKSaIcE) - *(.text.restore_non_core_regs) - /* Unwind info 0x80aab0b0 */ - *(.text._ZN3hal7stm32f112gpio_manager6output16driver_configureERKNS_10output_pin8settingsE) - *(.text.unwind_phase2) - *(.text.__gnu_Unwind_Resume) - /* Unwind info 0x7ffff2bc */ - *(.text._Z20depth_35_percent_050v.isra.0) - *(.text._Z20depth_38_percent_025v.isra.0) - *(.text._Z20depth_46_percent_025v.isra.0) - /* Unwind info 0x80b276af */ - *(.text.unwind_phase2_forced) - /* Unwind info 0x7ffff028 */ + /* Unwind info 0x7ffff024 */ *(.text._Z20depth_01_percent_025v.isra.0) /* Unwind info 0x8004a9b0 */ *(.text._Z20depth_01_percent_000v.isra.0) - /* Unwind info 0x7ffff2c0 */ + /* Unwind info 0x7ffff2bc */ *(.text._Z20depth_37_percent_050v.isra.0) *(.text._Z20depth_42_percent_025v.isra.0) - /* Unwind info 0x7ffff2c4 */ + /* Unwind info 0x7ffff2c0 */ *(.text._Z20depth_39_percent_050v.isra.0) *(.text._Z20depth_21_percent_025v.isra.0) - /* Unwind info 0x7ffff2cc */ + /* Unwind info 0x7ffff2c8 */ *(.text._Z20depth_43_percent_050v.isra.0) *(.text._Z20depth_29_percent_025v.isra.0) - /* Unwind info 0x7ffff770 */ + /* Unwind info 0x7ffff76c */ *(.text._ZNK12_GLOBAL__N_122generic_error_category7messageB5cxx11Ei) - /* Unwind info 0x7ffff2ac */ + /* Unwind info 0x7ffff2a8 */ *(.text._Z20depth_27_percent_050v.isra.0) *(.text._Z38run_test_depth_05_error_04_cleanup_025v) - /* Unwind info 0x7ffff7ac */ + /* Unwind info 0x7ffff7a8 */ *(.text._ZN3hal7stm32f14uartD2Ev) - /* Unwind info 0x7ffff7b0 */ + /* Unwind info 0x7ffff7ac */ *(.text._ZN3hal7stm32f14uartD0Ev) /* Unwind info 0x80b108a9 */ *(.text.__wrap___cxa_rethrow) *(.text.__wrap___cxa_allocate_exception) - /* Unwind info 0x7ffff1e8 */ + /* Unwind info 0x7ffff1e4 */ *(.text._Z20depth_38_percent_100v.isra.0) *(.text._Z38run_test_depth_35_error_04_cleanup_000v) - /* Unwind info 0x7ffff23c */ + /* Unwind info 0x7ffff238 */ *(.text._Z20depth_45_percent_100v.isra.0) *(.text._Z38run_test_depth_01_error_04_cleanup_000v) - /* Unwind info 0x7ffff2b0 */ + /* Unwind info 0x7ffff2ac */ *(.text._Z20depth_29_percent_050v.isra.0) *(.text._Z20depth_50_percent_025v.isra.0) - /* Unwind info 0x7ffff2d4 */ + /* Unwind info 0x7ffff2d0 */ *(.text._Z20depth_47_percent_050v.isra.0) *(.text._Z20depth_13_percent_025v.isra.0) - /* Unwind info 0x7ffff290 */ + /* Unwind info 0x7ffff28c */ *(.text._Z20depth_13_percent_050v.isra.0) *(.text._Z38run_test_depth_15_error_04_cleanup_025v) - /* Unwind info 0x80b20fac */ - *(.text.__gnu_Unwind_Backtrace) - /* Unwind info 0x7ffff77c */ + /* Unwind info 0x7ffff778 */ *(.text._Z38run_test_depth_15_error_65_cleanup_100v) *(.text._ZN10__cxxabiv111__terminateEPFvvE) *(.text._ZNKSt3_V214error_category10_M_messageB5cxx11Ei) - /* Unwind info 0x7ffff794 */ + /* Unwind info 0x80aab0b0 */ + *(.text._ZN3hal7stm32f112gpio_manager6output16driver_configureERKNS_10output_pin8settingsE) + /* Unwind info 0x7ffff790 */ *(.text._ZNSt11logic_errorC2EPKc) + /* Unwind info 0x7ffff034 */ + *(.text._ZNSt5arrayISt8optionalIN6stdext16inplace_functionIFvbELj8ELj8EEEELj16EED2Ev) + *(.text._Z20depth_02_percent_100v.isra.0) /* Unwind info 0x8002abb0 */ *(.text._ZNSt12__cow_stringC2EPKcj) - /* Unwind info 0x803a3fab */ - *(.text.__gnu_Unwind_RaiseException) - /* Unwind info 0x7ffff744 */ - *(.text._Z38run_test_depth_50_error_65_cleanup_100v) - *(.text.__gnu_Unwind_Resume_or_Rethrow) - /* Unwind info 0x7fffff3c */ + /* Unwind info 0x7fffff48 */ *(.text.unlikely._ZSt24__throw_out_of_range_fmtPKcz) - /* Unwind info 0x8001aab0 */ - *(.text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj) - /* Unwind info 0x7ffff08c */ + /* Unwind info 0x7ffff088 */ *(.text._Z20depth_09_percent_100v.isra.0) - /* Unwind info 0x7ffff14c */ + /* Unwind info 0x7ffff148 */ *(.text._Z20depth_25_percent_100v.isra.0) - /* Unwind info 0x7ffff164 */ + /* Unwind info 0x7ffff160 */ *(.text._Z20depth_27_percent_100v.isra.0) - /* Unwind info 0x7ffff1f4 */ + /* Unwind info 0x7ffff1f0 */ *(.text._Z20depth_39_percent_100v.isra.0) - /* Unwind info 0x7ffff224 */ + /* Unwind info 0x7ffff220 */ *(.text._Z20depth_43_percent_100v.isra.0) - /* Unwind info 0x7ffff288 */ + /* Unwind info 0x7ffff284 */ *(.text._Z20depth_09_percent_050v.isra.0) - /* Unwind info 0x7ffff2a8 */ + /* Unwind info 0x7ffff2a4 */ *(.text._Z20depth_25_percent_050v.isra.0) - /* Unwind info 0x7ffff2e0 */ + /* Unwind info 0x7ffff2dc */ *(.text._Z20depth_09_percent_025v.isra.0) - /* Unwind info 0x7ffff05c */ + /* Unwind info 0x7ffff058 */ *(.text._Z20depth_05_percent_100v.isra.0) - /* Unwind info 0x7ffff0f8 */ + /* Unwind info 0x7ffff0f4 */ *(.text._Z20depth_18_percent_100v.isra.0) - /* Unwind info 0x7ffff104 */ + /* Unwind info 0x7ffff100 */ *(.text._Z20depth_19_percent_100v.isra.0) - /* Unwind info 0x7ffff134 */ + /* Unwind info 0x7ffff130 */ *(.text._Z20depth_23_percent_100v.isra.0) - /* Unwind info 0x7ffff140 */ + /* Unwind info 0x7ffff13c */ *(.text._Z20depth_24_percent_100v.isra.0) - /* Unwind info 0x7ffff194 */ + /* Unwind info 0x7ffff190 */ *(.text._Z20depth_31_percent_100v.isra.0) - /* Unwind info 0x7ffff1a0 */ + /* Unwind info 0x7ffff19c */ *(.text._Z20depth_32_percent_100v.isra.0) - /* Unwind info 0x7ffff1c4 */ + /* Unwind info 0x7ffff1c0 */ *(.text._Z20depth_35_percent_100v.isra.0) - /* Unwind info 0x7ffff1dc */ + /* Unwind info 0x7ffff1d8 */ *(.text._Z20depth_37_percent_100v.isra.0) - /* Unwind info 0x7ffff230 */ + /* Unwind info 0x7ffff22c */ *(.text._Z20depth_44_percent_100v.isra.0) - /* Unwind info 0x7ffff248 */ + /* Unwind info 0x7ffff244 */ *(.text._Z20depth_46_percent_100v.isra.0) - /* Unwind info 0x7ffff280 */ + /* Unwind info 0x7ffff27c */ *(.text._Z20depth_05_percent_050v.isra.0) - /* Unwind info 0x7ffff29c */ + /* Unwind info 0x7ffff298 */ *(.text._Z20depth_19_percent_050v.isra.0) - /* Unwind info 0x7ffff2a4 */ + /* Unwind info 0x7ffff2a0 */ *(.text._Z20depth_23_percent_050v.isra.0) - /* Unwind info 0x7ffff2b4 */ + /* Unwind info 0x7ffff2b0 */ *(.text._Z20depth_31_percent_050v.isra.0) - /* Unwind info 0x7ffff2d0 */ + /* Unwind info 0x7ffff2cc */ *(.text._Z20depth_45_percent_050v.isra.0) - /* Unwind info 0x7ffff2dc */ + /* Unwind info 0x7ffff2d8 */ *(.text._Z20depth_05_percent_025v.isra.0) - /* Unwind info 0x7ffff784 */ + /* Unwind info 0x7ffff780 */ *(.text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERjj) - /* Unwind info 0x7ffff044 */ + /* Unwind info 0x7ffff040 */ *(.text._Z20depth_03_percent_100v.isra.0) - /* Unwind info 0x7ffff050 */ + /* Unwind info 0x7ffff04c */ *(.text._Z20depth_04_percent_100v.isra.0) - /* Unwind info 0x7ffff068 */ + /* Unwind info 0x7ffff064 */ *(.text._Z20depth_06_percent_100v.isra.0) - /* Unwind info 0x7ffff074 */ + /* Unwind info 0x7ffff070 */ *(.text._Z20depth_07_percent_100v.isra.0) - /* Unwind info 0x7ffff080 */ + /* Unwind info 0x7ffff07c */ *(.text._Z20depth_08_percent_100v.isra.0) - /* Unwind info 0x7ffff098 */ + /* Unwind info 0x7ffff094 */ *(.text._Z20depth_10_percent_100v.isra.0) - /* Unwind info 0x7ffff0a4 */ + /* Unwind info 0x7ffff0a0 */ *(.text._Z20depth_11_percent_100v.isra.0) - /* Unwind info 0x7ffff0b0 */ + /* Unwind info 0x7ffff0ac */ *(.text._Z20depth_12_percent_100v.isra.0) - /* Unwind info 0x7ffff0c8 */ + /* Unwind info 0x7ffff0c4 */ *(.text._Z20depth_14_percent_100v.isra.0) - /* Unwind info 0x7ffff0d4 */ + /* Unwind info 0x7ffff0d0 */ *(.text._Z20depth_15_percent_100v.isra.0) - /* Unwind info 0x7ffff0e0 */ + /* Unwind info 0x7ffff0dc */ *(.text._Z20depth_16_percent_100v.isra.0) - /* Unwind info 0x7ffff110 */ + /* Unwind info 0x7ffff10c */ *(.text._Z20depth_20_percent_100v.isra.0) - /* Unwind info 0x7ffff128 */ + /* Unwind info 0x7ffff124 */ *(.text._Z20depth_22_percent_100v.isra.0) - /* Unwind info 0x7ffff158 */ + /* Unwind info 0x7ffff154 */ *(.text._Z20depth_26_percent_100v.isra.0) - /* Unwind info 0x7ffff170 */ + /* Unwind info 0x7ffff16c */ *(.text._Z20depth_28_percent_100v.isra.0) - /* Unwind info 0x7ffff1b8 */ + /* Unwind info 0x7ffff1b4 */ *(.text._Z20depth_34_percent_100v.isra.0) - /* Unwind info 0x7ffff1d0 */ + /* Unwind info 0x7ffff1cc */ *(.text._Z20depth_36_percent_100v.isra.0) - /* Unwind info 0x7ffff200 */ + /* Unwind info 0x7ffff1fc */ *(.text._Z20depth_40_percent_100v.isra.0) - /* Unwind info 0x7ffff20c */ + /* Unwind info 0x7ffff208 */ *(.text._Z20depth_41_percent_100v.isra.0) - /* Unwind info 0x7ffff218 */ + /* Unwind info 0x7ffff214 */ *(.text._Z20depth_42_percent_100v.isra.0) - /* Unwind info 0x7ffff254 */ + /* Unwind info 0x7ffff250 */ *(.text._Z20depth_47_percent_100v.isra.0) - /* Unwind info 0x7ffff26c */ + /* Unwind info 0x7ffff268 */ *(.text._Z20depth_49_percent_100v.isra.0) - /* Unwind info 0x7ffff278 */ + /* Unwind info 0x7ffff274 */ *(.text._Z20depth_50_percent_100v.isra.0) - /* Unwind info 0x7ffff27c */ + /* Unwind info 0x7ffff278 */ *(.text._Z20depth_03_percent_050v.isra.0) - /* Unwind info 0x7ffff284 */ + /* Unwind info 0x7ffff280 */ *(.text._Z20depth_07_percent_050v.isra.0) - /* Unwind info 0x7ffff28c */ + /* Unwind info 0x7ffff288 */ *(.text._Z20depth_11_percent_050v.isra.0) - /* Unwind info 0x7ffff294 */ + /* Unwind info 0x7ffff290 */ *(.text._Z20depth_15_percent_050v.isra.0) - /* Unwind info 0x7ffff2d8 */ + /* Unwind info 0x7ffff2d4 */ *(.text._Z20depth_49_percent_050v.isra.0) - /* Unwind info 0x7ffff798 */ + /* Unwind info 0x7ffff794 */ *(.text._Z38run_test_depth_05_error_65_cleanup_100v) *(.text._ZNSt3pmr12_GLOBAL__N_110null_res_t11do_allocateEjj) /* Unwind info 0x80971cab */ *(.text.unlikely._ZN9__gnu_cxx26__throw_insufficient_spaceEPKcS1_) - /* Unwind info 0x7ffff038 */ - *(.text._Z20depth_02_percent_100v.isra.0) - /* Unwind info 0x7ffff0bc */ + /* Unwind info 0x7ffff0b8 */ *(.text._Z20depth_13_percent_100v.isra.0) - /* Unwind info 0x7ffff0ec */ + /* Unwind info 0x7ffff0e8 */ *(.text._Z20depth_17_percent_100v.isra.0) - /* Unwind info 0x7ffff11c */ + /* Unwind info 0x7ffff118 */ *(.text._Z20depth_21_percent_100v.isra.0) - /* Unwind info 0x7ffff17c */ + /* Unwind info 0x7ffff178 */ *(.text._Z20depth_29_percent_100v.isra.0) - /* Unwind info 0x7ffff260 */ + /* Unwind info 0x7ffff25c */ *(.text._Z20depth_48_percent_100v.isra.0) - /* Unwind info 0x7ffff298 */ + /* Unwind info 0x7ffff294 */ *(.text._Z20depth_17_percent_050v.isra.0) - /* Unwind info 0x7ffff2a0 */ + /* Unwind info 0x7ffff29c */ *(.text._Z20depth_21_percent_050v.isra.0) - /* Unwind info 0x7ffff024 */ + /* Unwind info 0x7ffff020 */ *(.text._ZN3hal7stm32f18power_onENS0_10peripheralE) - /* Unwind info 0x7ffff188 */ + /* Unwind info 0x7ffff184 */ *(.text._Z20depth_30_percent_100v.isra.0) - /* Unwind info 0x7ffff1ac */ + /* Unwind info 0x7ffff1a8 */ *(.text._Z20depth_33_percent_100v.isra.0) - /* Unwind info 0x7ffff2b8 */ + /* Unwind info 0x7ffff2b4 */ *(.text._Z20depth_33_percent_050v.isra.0) - /* Unwind info 0x7ffff1cc */ + /* Unwind info 0x7ffff1c8 */ *(.text._Z38run_test_depth_50_error_04_cleanup_000v) - /* Unwind info 0x7ffff204 */ + /* Unwind info 0x7ffff200 */ *(.text._Z38run_test_depth_15_error_04_cleanup_000v) - /* Unwind info 0x7ffff220 */ + /* Unwind info 0x7ffff21c */ *(.text._Z38run_test_depth_05_error_04_cleanup_000v) - /* Unwind info 0x7ffff258 */ + /* Unwind info 0x7ffff254 */ *(.text._Z38run_test_depth_50_error_04_cleanup_025v) - /* Unwind info 0x7ffff274 */ + /* Unwind info 0x7ffff270 */ *(.text._Z38run_test_depth_35_error_04_cleanup_025v) - /* Unwind info 0x7ffff2e4 */ + /* Unwind info 0x7ffff2e0 */ *(.text._Z38run_test_depth_50_error_04_cleanup_050v) - /* Unwind info 0x7ffff300 */ + /* Unwind info 0x7ffff2fc */ *(.text._Z38run_test_depth_35_error_04_cleanup_050v) - /* Unwind info 0x7ffff31c */ + /* Unwind info 0x7ffff318 */ *(.text._Z38run_test_depth_15_error_04_cleanup_050v) - /* Unwind info 0x7ffff338 */ + /* Unwind info 0x7ffff334 */ *(.text._Z38run_test_depth_05_error_04_cleanup_050v) - /* Unwind info 0x7ffff354 */ + /* Unwind info 0x7ffff350 */ *(.text._Z38run_test_depth_50_error_04_cleanup_100v) - /* Unwind info 0x7ffff370 */ + /* Unwind info 0x7ffff36c */ *(.text._Z38run_test_depth_35_error_04_cleanup_100v) - /* Unwind info 0x7ffff38c */ + /* Unwind info 0x7ffff388 */ *(.text._Z38run_test_depth_15_error_04_cleanup_100v) - /* Unwind info 0x7ffff3a8 */ + /* Unwind info 0x7ffff3a4 */ *(.text._Z38run_test_depth_05_error_04_cleanup_100v) - /* Unwind info 0x7ffff3c4 */ + /* Unwind info 0x7ffff3c0 */ *(.text._Z38run_test_depth_50_error_16_cleanup_000v) - /* Unwind info 0x7ffff3e0 */ + /* Unwind info 0x7ffff3dc */ *(.text._Z38run_test_depth_35_error_16_cleanup_000v) - /* Unwind info 0x7ffff3fc */ + /* Unwind info 0x7ffff3f8 */ *(.text._Z38run_test_depth_15_error_16_cleanup_000v) - /* Unwind info 0x7ffff418 */ + /* Unwind info 0x7ffff414 */ *(.text._Z38run_test_depth_05_error_16_cleanup_000v) - /* Unwind info 0x7ffff434 */ + /* Unwind info 0x7ffff430 */ *(.text._Z38run_test_depth_01_error_16_cleanup_000v) - /* Unwind info 0x7ffff450 */ + /* Unwind info 0x7ffff44c */ *(.text._Z38run_test_depth_50_error_16_cleanup_025v) - /* Unwind info 0x7ffff46c */ + /* Unwind info 0x7ffff468 */ *(.text._Z38run_test_depth_35_error_16_cleanup_025v) - /* Unwind info 0x7ffff488 */ + /* Unwind info 0x7ffff484 */ *(.text._Z38run_test_depth_15_error_16_cleanup_025v) - /* Unwind info 0x7ffff4a4 */ + /* Unwind info 0x7ffff4a0 */ *(.text._Z38run_test_depth_05_error_16_cleanup_025v) - /* Unwind info 0x7ffff4c0 */ + /* Unwind info 0x7ffff4bc */ *(.text._Z38run_test_depth_01_error_16_cleanup_025v) - /* Unwind info 0x7ffff4dc */ + /* Unwind info 0x7ffff4d8 */ *(.text._Z38run_test_depth_50_error_16_cleanup_050v) - /* Unwind info 0x7ffff4f8 */ + /* Unwind info 0x7ffff4f4 */ *(.text._Z38run_test_depth_35_error_16_cleanup_050v) - /* Unwind info 0x7ffff514 */ + /* Unwind info 0x7ffff510 */ *(.text._Z38run_test_depth_15_error_16_cleanup_050v) - /* Unwind info 0x7ffff530 */ + /* Unwind info 0x7ffff52c */ *(.text._Z38run_test_depth_05_error_16_cleanup_050v) - /* Unwind info 0x7ffff54c */ + /* Unwind info 0x7ffff548 */ *(.text._Z38run_test_depth_50_error_16_cleanup_100v) - /* Unwind info 0x7ffff568 */ + /* Unwind info 0x7ffff564 */ *(.text._Z38run_test_depth_35_error_16_cleanup_100v) - /* Unwind info 0x7ffff584 */ + /* Unwind info 0x7ffff580 */ *(.text._Z38run_test_depth_15_error_16_cleanup_100v) - /* Unwind info 0x7ffff5a0 */ + /* Unwind info 0x7ffff59c */ *(.text._Z38run_test_depth_05_error_16_cleanup_100v) - /* Unwind info 0x7ffff5bc */ + /* Unwind info 0x7ffff5b8 */ *(.text._Z38run_test_depth_50_error_65_cleanup_000v) - /* Unwind info 0x7ffff5d8 */ + /* Unwind info 0x7ffff5d4 */ *(.text._Z38run_test_depth_35_error_65_cleanup_000v) - /* Unwind info 0x7ffff5f4 */ + /* Unwind info 0x7ffff5f0 */ *(.text._Z38run_test_depth_15_error_65_cleanup_000v) - /* Unwind info 0x7ffff610 */ + /* Unwind info 0x7ffff60c */ *(.text._Z38run_test_depth_05_error_65_cleanup_000v) - /* Unwind info 0x7ffff62c */ + /* Unwind info 0x7ffff628 */ *(.text._Z38run_test_depth_01_error_65_cleanup_000v) - /* Unwind info 0x7ffff648 */ + /* Unwind info 0x7ffff644 */ *(.text._Z38run_test_depth_50_error_65_cleanup_025v) - /* Unwind info 0x7ffff664 */ + /* Unwind info 0x7ffff660 */ *(.text._Z38run_test_depth_35_error_65_cleanup_025v) - /* Unwind info 0x7ffff680 */ + /* Unwind info 0x7ffff67c */ *(.text._Z38run_test_depth_15_error_65_cleanup_025v) - /* Unwind info 0x7ffff69c */ + /* Unwind info 0x7ffff698 */ *(.text._Z38run_test_depth_05_error_65_cleanup_025v) - /* Unwind info 0x7ffff6b8 */ + /* Unwind info 0x7ffff6b4 */ *(.text._Z38run_test_depth_01_error_65_cleanup_025v) - /* Unwind info 0x7ffff6d4 */ + /* Unwind info 0x7ffff6d0 */ *(.text._Z38run_test_depth_50_error_65_cleanup_050v) - /* Unwind info 0x7ffff6f0 */ + /* Unwind info 0x7ffff6ec */ *(.text._Z38run_test_depth_35_error_65_cleanup_050v) - /* Unwind info 0x7ffff70c */ + /* Unwind info 0x7ffff708 */ *(.text._Z38run_test_depth_15_error_65_cleanup_050v) - /* Unwind info 0x7ffff728 */ + /* Unwind info 0x7ffff724 */ *(.text._Z38run_test_depth_05_error_65_cleanup_050v) - /* Unwind info 0x7ffff760 */ + /* Unwind info 0x7ffff740 */ + *(.text._Z38run_test_depth_50_error_65_cleanup_100v) + /* Unwind info 0x7ffff75c */ *(.text._Z38run_test_depth_35_error_65_cleanup_100v) - /* Unwind info 0x7ffff7b4 */ + /* Unwind info 0x7ffff7b0 */ *(.text._ZN3hal7stm32f112gpio_manager6output12driver_levelEb) /* Unwind info 0x8003aab0 */ *(.text._ZNK10__cxxabiv117__class_type_info11__do_upcastEPKS0_PPv) - /* Unwind info 0x7fffff24 */ - *(.text.unlikely._ZSt19__throw_logic_errorPKc) /* Unwind info 0x7fffff30 */ + *(.text.unlikely._ZSt19__throw_logic_errorPKc) + /* Unwind info 0x7fffff3c */ *(.text.unlikely._ZSt20__throw_length_errorPKc) - /* Unwind info 0x7ffff7b8 */ + /* Unwind info 0x7ffff7b4 */ *(.text._ZN3hal7stm32f112gpio_manager6output12driver_levelEv) - /* Unwind info 0x7fffff20 */ + /* Unwind info 0x7fffff2c */ *(.text.unlikely._ZSt17__throw_bad_allocv) } } diff --git a/benchmark/platforms/stm32f103c8.cpp b/benchmark/platforms/stm32f103c8.cpp index bdf0c83..927c563 100644 --- a/benchmark/platforms/stm32f103c8.cpp +++ b/benchmark/platforms/stm32f103c8.cpp @@ -116,7 +116,7 @@ void log_start(std::string_view p_message) std::array cycles{}; auto cycle_index = 0Uz; -constexpr auto use_cycle_counter = true; +constexpr auto use_cycle_counter = false; void start() { diff --git a/benchmark/table_graph.csv b/benchmark/table_graph.csv new file mode 100644 index 0000000..49a35f3 --- /dev/null +++ b/benchmark/table_graph.csv @@ -0,0 +1,155 @@ +entry, address +0, 0 +1, 13396 +2, 17276 +3, 18756 +4, 20108 +5, 21300 +6, 22332 +7, 22908 +8, 23356 +9, 23724 +10, 24064 +11, 24380 +12, 24620 +13, 24820 +14, 25020 +15, 25204 +16, 25360 +17, 25516 +18, 25672 +19, 25828 +20, 25980 +21, 26132 +22, 26284 +23, 26436 +24, 26584 +25, 26732 +26, 26880 +27, 27028 +28, 27168 +29, 27304 +30, 27436 +31, 27564 +32, 27680 +33, 27788 +34, 27880 +35, 27964 +36, 28048 +37, 28132 +38, 28216 +39, 28300 +40, 28384 +41, 28468 +42, 28552 +43, 28632 +44, 28712 +45, 28792 +46, 28872 +47, 28952 +48, 29032 +49, 29112 +50, 29192 +51, 29272 +52, 29352 +53, 29432 +54, 29512 +55, 29592 +56, 29672 +57, 29752 +58, 29832 +59, 29912 +60, 29992 +61, 30068 +62, 30144 +63, 30220 +64, 30296 +65, 30372 +66, 30448 +67, 30524 +68, 30600 +69, 30676 +70, 30752 +71, 30828 +72, 30904 +73, 30980 +74, 31056 +75, 31132 +76, 31208 +77, 31284 +78, 31360 +79, 31436 +80, 31512 +81, 31588 +82, 31664 +83, 31740 +84, 31816 +85, 31892 +86, 31968 +87, 32044 +88, 32120 +89, 32196 +90, 32268 +91, 32340 +92, 32412 +93, 32484 +94, 32556 +95, 32628 +96, 32700 +97, 32772 +98, 32840 +99, 32908 +100, 32976 +101, 33044 +102, 33112 +103, 33180 +104, 33248 +105, 33316 +106, 33384 +107, 33452 +108, 33520 +109, 33588 +110, 33656 +111, 33724 +112, 33792 +113, 33860 +114, 33928 +115, 33996 +116, 34064 +117, 34132 +118, 34200 +119, 34268 +120, 34336 +121, 34404 +122, 34472 +123, 34540 +124, 34608 +125, 34676 +126, 34744 +127, 34812 +128, 34880 +129, 34948 +130, 35016 +131, 35084 +132, 35152 +133, 35220 +134, 35288 +135, 35356 +136, 35424 +137, 35492 +138, 35560 +139, 35628 +140, 35696 +141, 35764 +142, 35832 +143, 35900 +144, 35968 +145, 36036 +146, 36104 +147, 36172 +148, 36240 +149, 36300 +150, 36352 +151, 36400 +152, 36448 +153, 36492 diff --git a/benchmark/test_order.csv b/benchmark/test_order.csv index 49db255..9fb984e 100644 --- a/benchmark/test_order.csv +++ b/benchmark/test_order.csv @@ -1,5 +1,7 @@ executable,pulse_order build/stm32f103c8/builtin/Release/except.cpp.elf.bin,generated_tests/except.cpp.csv +build/stm32f103c8/builtin/Release/fast_gcc_unwind.cpp.elf,generated_tests/gcc_except_fast.csv +build/stm32f103c8/estell/Release/except.cpp.elf.bin,generated_tests/estell_except.cpp.csv build/stm32f103c8/estell/Release/except.cpp.elf.bin,generated_tests/estell_except.cpp.csv build/stm32f103c8/estell/Release/nearpoint.cpp.elf.bin,generated_tests/nearpoint.cpp.csv build/stm32f103c8/builtin/Release/result_error04.cpp.elf.bin,generated_tests/result_error04.cpp.csv diff --git a/datasheets/near_point/linear_near_point_entry.json b/datasheets/near_point/linear_near_point_entry.json index 3471713..521a078 100644 --- a/datasheets/near_point/linear_near_point_entry.json +++ b/datasheets/near_point/linear_near_point_entry.json @@ -1,13 +1,13 @@ { "reg": [ { - "bits": 24, - "name": "entry number offset", + "bits": 22, + "name": "starting entry number", "type": 5 }, { - "bits": 8, - "name": "average function size", + "bits": 10, + "name": "entry count", "type": 6 } ], diff --git a/linker_scripts/arm-none-eabi-gcc-14.2_discard.ld b/linker_scripts/arm-none-eabi-gcc-14.2_discard.ld index d4a6a2f..bcb8a03 100644 --- a/linker_scripts/arm-none-eabi-gcc-14.2_discard.ld +++ b/linker_scripts/arm-none-eabi-gcc-14.2_discard.ld @@ -1,12 +1,21 @@ SECTIONS { /DISCARD/ : { *unwind-c.o(.text*) + *(.text.next_unwind_byte) + *(.text.__gnu_unwind_execute) + *(.text.__gnu_unwind_pr_common) + *(.text.__aeabi_unwind_cpp_pr0) + *(.text.__aeabi_unwind_cpp_pr1) + *(.text.__aeabi_unwind_cpp_pr2) } .text : { __gcc_personality_v0 = .; - LONG(0) __gxx_personality_v0 = .; - LONG(0) + __aeabi_unwind_cpp_pr0 = .; + __aeabi_unwind_cpp_pr1 = .; + __aeabi_unwind_cpp_pr2 = .; + __gnu_unwind_pr_common = .; + LONG(0); } } INSERT BEFORE .text; \ No newline at end of file diff --git a/scripts/nearpoint.py b/scripts/nearpoint.py index f613f39..029019e 100644 --- a/scripts/nearpoint.py +++ b/scripts/nearpoint.py @@ -2,12 +2,11 @@ import subprocess import re import argparse -from typing import List +from typing import List, Tuple import logging import struct from pathlib import Path from collections import defaultdict -import statistics class FunctionInfo: @@ -123,7 +122,7 @@ def parse_object_map_line(line): return address, size, obj_file -def read_map_get_function(map_file_text: str) -> List[FunctionInfo]: +def get_functions_from_map_file(map_file_text: str) -> List[FunctionInfo]: actualized_functions: List[FunctionInfo] = [] lines = map_file_text.split('\n') text_only_lines: List[str] = [] @@ -175,6 +174,19 @@ def read_map_get_function(map_file_text: str) -> List[FunctionInfo]: return actualized_functions +def get_index_start(map_file_text: str) -> List[FunctionInfo]: + lines = map_file_text.split('\n') + for line in lines: + if "__exidx_start" in line: + sections = line.strip().split(maxsplit=3) + # 0x0800c8a0 PROVIDE (__exidx_start = .) + # Trim all lines before this line + return int(sections[0], 16) + + logging.error("Failed to find __exidx_start symbol in map file!") + exit(1) + + def get_substring_after(main_string, delimiter): index = main_string.rfind(delimiter) if index == -1: @@ -184,15 +196,32 @@ def get_substring_after(main_string, delimiter): class Block: - def __init__(self, start_entry: int, average_size: int): + def __init__(self, start_entry: int, entry_count: int): self.start = start_entry - self.average_size = average_size + self.entry_count = entry_count def __repr__(self): - return f"Block(start='{self.start}', avg_size={self.average_size})" + return f"Block(start={self.start}, count={self.entry_count})" - def as_c_bit_mask(self, block_power: int): - return f"({self.start} << {block_power}) | {self.average_size}" + def as_c_bit_mask_entry_count(self, block_power: int): + return f"({self.start} << {block_power}) | {self.entry_count}" + + def as_c_bit_mask_entry_average(self, block_power: int): + if self.entry_count == 1: + average_size = 0 + else: + average_size = (1 << block_power) // self.entry_count + + return f"({self.start} << {block_power}) | {average_size}" + + +def generate_csv_for_graphing(exception_index: List[FunctionGroup], file: Path): + csv = "entry, address\n" + location_counter = 0 + for entry_number, group in enumerate(exception_index): + csv += f"{entry_number}, {location_counter}\n" + location_counter += group.size() + file.write_text(csv) def break_into_blocks(exception_index: List[FunctionGroup], @@ -220,7 +249,7 @@ def break_into_blocks(exception_index: List[FunctionGroup], PROGRAM_LENGTH = LAST_FUNCTION - FIRST_FUNCTION NUMBER_OF_BLOCKS = (PROGRAM_LENGTH >> block_power) + 1 - block_list = [Block(0, 0)] * NUMBER_OF_BLOCKS + block_list = [Block(0, 1)] * NUMBER_OF_BLOCKS logging.info(f"Created {len(block_list)} blocks") # Lookup Table Region @@ -237,7 +266,7 @@ def break_into_blocks(exception_index: List[FunctionGroup], for i in range(starting_block, ending_block): if i < len(block_list): - block_list[i] = Block(index_cursor, 0) + block_list[i] = Block(index_cursor, 1) index_cursor += 1 @@ -255,28 +284,23 @@ def break_into_blocks(exception_index: List[FunctionGroup], BLOCK_END_ADDRESS = group_addresses[index_cursor] - FIRST_FUNCTION BLOCK_NUMBER_END = (BLOCK_END_ADDRESS >> block_power) BLOCK_INDEX_DELTA = (BLOCK_NUMBER_END - BLOCK_NUMBER_START) + BLOCK_ENTRY_COUNT = (index_cursor - start_index) + 1 if BLOCK_INDEX_DELTA == 1: if BLOCK_NUMBER_START < len(block_list): - EXCEPTION_SLICE = exception_index[start_index:index_cursor] - AVERAGE_SIZE = round(statistics.fmean( - entry.size() for entry in EXCEPTION_SLICE)) block_list[BLOCK_NUMBER_START] = Block( - start_index, AVERAGE_SIZE) + start_index, BLOCK_ENTRY_COUNT) start_index = index_cursor elif BLOCK_INDEX_DELTA > 1: logging.warning("Block delta > 1, adjusting...") # Setting final block - EXCEPTION_SLICE = exception_index[start_index:index_cursor] - AVERAGE_SIZE = round(statistics.fmean( - entry.size() for entry in exception_index[start_index:index_cursor])) - block_list[-1] = Block(start_index, AVERAGE_SIZE) + block_list[-1] = Block(start_index, (index_cursor - start_index) + 1) return block_list -def generate_cpp_table_file(filename: str, +def generate_cpp_table_file(filename: Path, block_power: int, blocks: List[Block], program_start: int): @@ -298,7 +322,7 @@ def generate_cpp_table_file(filename: str, code += f"std::array const _normal_table_data = {{\n" for block in blocks: - code += f" {block.as_c_bit_mask(block_power)}, // entry={block.start}, avg_size={block.average_size}\n" + code += f" {block.as_c_bit_mask_entry_count(block_power)}, // {block}\n" code += "};\n\n" code += "} // namespace\n\n" @@ -306,17 +330,84 @@ def generate_cpp_table_file(filename: str, code += "std::span normal_table = _normal_table_data;\n" code += "} // namespace ke::__except_abi::inline v1\n" - with open(filename, "w") as f: - f.write(code) - + filename.write_text(code) logging.info(f"C++ nearpoint tables written to: {filename}") +def parse_prel31(value): + """Convert PREL31 value to signed 32-bit integer""" + # Check if MSB (bit 30) is set for sign extension + if value & (1 << 30): + value |= 1 << 31 + return struct.unpack('>i', struct.pack('>I', value))[0] + + +def parse_exception_index(hex_data: str, + starting_location: int) -> List[Tuple[int, int]]: + """Parse exception index from hex dump""" + + # Remove hex dump formatting and convert to bytes + hex_lines = hex_data.strip().split('\n') + hex_bytes = [] + + # Line format below (address, 4x 32-bit word contents, ascii): + # 800835c 00000000 e07cff7f 10ffff7f ec7cff7f .....|.......|.. + for line in hex_lines: + # split the line between the double space that devices the numbers and + # the ASCII, and take the first part with the numbers. + line_segments = line.strip().split(' ')[0] + # Split up the line into the segments above shown by the format + line_segments = line_segments.split(' ') + # Remove the address word at the start + line_segments = line_segments[1:] + + for segment in line_segments: + SEGMENT_AS_HEX = bytes.fromhex(segment) + hex_bytes.append(struct.unpack('I', initial_address_text)[0] + logging.debug(f'initial address = {initial_address:08x}') + + WORD_OFFSET = (starting_location - initial_address) // 4 + hex_bytes = hex_bytes[WORD_OFFSET:] + + results: List[Tuple[int, int]] = [] + + # Process in pairs (skip first two, then take pairs) + for i in range(0, len(hex_bytes), 2): + if i + 1 >= len(hex_bytes): + # this should never happen though... + break + + addr_raw = hex_bytes[i] + offset_raw = hex_bytes[i + 1] + + # Apply PREL31 conversion + addr_prel31 = parse_prel31(addr_raw) + offset_prel31 = offset_raw + + entry_offset = i * 4 + absolute_address = (starting_location + entry_offset) + addr_prel31 + logging.debug( + f"addr_prel31 = {addr_prel31} -> {absolute_address:08x}\n") + + # We do not use the absolute address of the offset_prel31 because we + # actually do not want them to match currently. We do not have a good + # way to support matching LSDA data and the LD merge algorithm won't + # notice that they can be merged. + results.append((absolute_address, offset_prel31)) + + return results + + def main(): parser = argparse.ArgumentParser( description='Generate nearpoint exception tables and linker script from ELF file' ) - parser.add_argument('elf_file', help='Path to the ELF binary') + parser.add_argument('elf_file', type=Path, help='Path to the ELF binary') parser.add_argument('-o', '--output', help='Output name (default: elf_file.nearpoint.cpp)', default=None) @@ -328,14 +419,14 @@ def main(): help='Error threshold for small table generation (default: 8, min: 4) (NOT CURRENTLY SUPPORTED!)') parser.add_argument('--auto-optimize', action='store_true', help='Automatically find optimal block sizes (NOT CURRENTLY SUPPORTED!)') - parser.add_argument('--tool-prefix', type=str, default="", + parser.add_argument('--tool-prefix', type=Path, default="", help='Toolchain prefix for objdump/nm (e.g., "arm-none-eabi-" or full path)') parser.add_argument('-m', '--map', type=Path, required=True, help='Path to map file for executable') - parser.add_argument('-r', '--order_file', type=str, + parser.add_argument('-r', '--order_file', type=Path, default="order.ld", help='Path to where to store the ordering file.') - parser.add_argument('-n', '--nearpoint_file', type=str, + parser.add_argument('-n', '--nearpoint_file', type=Path, default="nearpoint.cpp", help='Path to where to store the nearpoint table.') parser.add_argument('-v', '--verbose', action='store_true', @@ -363,7 +454,7 @@ def main(): # ========================================================================== map_file = Path(args.map).read_text() - FINALIZED_FUNCTIONS = read_map_get_function( + FINALIZED_FUNCTIONS = get_functions_from_map_file( map_file) logging.debug(f"FINALIZED_FUNCTIONS = \n{FINALIZED_FUNCTIONS}") @@ -372,6 +463,7 @@ def main(): # ========================================================================== # I, for the life of me, cannot understand why they put the index in the # ordered area and the table in the ordered area. Might be a typo. + # TODO(kammce): when you get back, you need to fix this. EXCEPTION_INDEX_SECTION_NAME = ".except_unordered:\n" section_content = disassembly.split("Contents of section ") exception_index = "" @@ -382,72 +474,11 @@ def main(): exception_index_text = exception_index.removeprefix( EXCEPTION_INDEX_SECTION_NAME) - logging.debug(f"\n{exception_index_text}") - - def parse_prel31(value): - """Convert PREL31 value to signed 32-bit integer""" - # Check if MSB (bit 30) is set for sign extension - if value & (1 << 30): - value |= 1 << 31 - return struct.unpack('>i', struct.pack('>I', value))[0] - - def parse_exception_index(hex_data: str): - """Parse exception index from hex dump""" - # Remove hex dump formatting and convert to bytes - hex_lines = hex_data.strip().split('\n') - hex_bytes = [] - first_line = hex_lines[0].split()[0].zfill(8) - logging.debug(f'first_line = "{first_line}"') - initial_address_text = bytes.fromhex(first_line) - initial_address = struct.unpack('>I', initial_address_text)[0] - logging.debug(f'initial address = {initial_address:08x}') - # format: 800835c 00000000 e07cff7f 10ffff7f ec7cff7f .....|.......|.. - - # For some reason there is a set of 0s at the start which puts - # everything off by 1. We need to handle this line outside of the loop - for line in hex_lines: - # Extract hex values after the address - address_and_values = (line.split(" ")[0]).strip() - logging.debug(f'address_and_values = {address_and_values}') - parts = address_and_values.split(" ")[1:] - logging.debug(f'parts = {parts}') - hex_bytes.extend(parts) - - # Skip the first 0s in the index if it exists - if hex_bytes[0] == "00000000": - hex_bytes = hex_bytes[1:] - results: List[FunctionInfo] = [] - - # Process in pairs (skip first two, then take pairs) - for i in range(0, len(hex_bytes), 2): - if i + 1 >= len(hex_bytes): - break - - addr_hex = hex_bytes[i] - offset_hex = hex_bytes[i + 1] - logging.debug(f" addr_hex = {addr_hex}") - logging.debug(f"offset_hex = {offset_hex}") - - addr_bytes = bytes.fromhex(addr_hex) - offset_bytes = bytes.fromhex(offset_hex) - - addr_raw = struct.unpack(' {absolute_address:08x}\n") - - results.append((absolute_address, offset_prel31)) - - return results - - EXCEPTION_ENTRIES = parse_exception_index(exception_index_text) + logging.debug(f"\nexception_index_text = {exception_index_text}") + + index_starting_address = get_index_start(map_file) + EXCEPTION_ENTRIES = parse_exception_index( + exception_index_text, index_starting_address) hex_values = ' '.join( f'(function: {entry[0]:08x}, data: {entry[1]:08x}),\n' for entry in EXCEPTION_ENTRIES) @@ -466,7 +497,6 @@ def parse_exception_index(hex_data: str): f"entry_function: {entry_function:08x}, unwind: {unwind_info}, next_entry_function: {next_entry_function:08x}") if FINALIZED_FUNCTIONS[function_index].address != entry_function: - # TODO(kammce): figure out better exception logging.error( f"entry_function({i}): {entry_function:08x} != {FINALIZED_FUNCTIONS[function_index].address:08x}:{FINALIZED_FUNCTIONS[function_index].name}") @@ -499,6 +529,13 @@ def parse_exception_index(hex_data: str): for i in range(function_index, len(FINALIZED_FUNCTIONS)): FINALIZED_FUNCTIONS[i].unwind_info = last_unwind_info + # ========================================================================== + # Step 3.1: Remove GNU functions + # ========================================================================== + SKIP_PATTERNS = ["libunwind", "next_unwind_byte", "__gnu_unwind_execute"] + FINALIZED_FUNCTIONS = list(filter(lambda f: not any( + pattern in f.name for pattern in SKIP_PATTERNS), FINALIZED_FUNCTIONS)) + # ========================================================================== # Step 4: Collect functions into groups with common unwind info # ========================================================================== @@ -559,6 +596,8 @@ def parse_exception_index(hex_data: str): # ========================================================================== # Step 7: Generate nearpoint table # ========================================================================== + generate_csv_for_graphing( + sorted_unwind_groups_list, Path("table_graph.csv")) blocks = break_into_blocks( sorted_unwind_groups_list, block_power=args.block_power) logging.debug(f"blocks={blocks}") diff --git a/src/arm_cortex/estell/exception.cpp b/src/arm_cortex/estell/exception.cpp index 5b44fa4..38f4461 100644 --- a/src/arm_cortex/estell/exception.cpp +++ b/src/arm_cortex/estell/exception.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -137,6 +138,8 @@ struct nearpoint_descriptor { std::uint32_t normal_block_size = 0; std::uint32_t text_starting_address = 0; + std::uint32_t small_block_size = 0; + std::uint32_t small_starting_address = 0; }; [[gnu::weak]] std::span near_point_descriptor{}; [[gnu::weak]] std::span normal_table{}; @@ -147,50 +150,64 @@ struct nearpoint_descriptor std::uintptr_t near_point_guess_index(std::uintptr_t p_program_counter) { + auto const block_power = ke::__except_abi::near_point_descriptor[0]; auto const progarm_offset = ke::__except_abi::near_point_descriptor[1]; auto const pc = p_program_counter - progarm_offset; - auto const block_power = ke::__except_abi::near_point_descriptor[0]; + auto const inter_block_mask = (1U << block_power) - 1U; auto const inter_block_location = pc & inter_block_mask; auto const block_index = pc >> block_power; auto const linear_info = ke::__except_abi::normal_table[block_index]; + auto const average_size = linear_info & inter_block_mask; auto const entry_start = linear_info >> block_power; - auto const average_function_size = linear_info & inter_block_mask; - if (average_function_size == 0) { + if (average_size == 0) { return entry_start; } - auto const guess_offset = inter_block_location / average_function_size; + auto const guess_offset = inter_block_location / average_size; auto const location = entry_start + guess_offset; + return location; } index_entry_t const& get_index_entry_near_point(std::uint32_t p_program_counter) { auto const index_table = get_arm_exception_index(); - auto const initial_guess = near_point_guess_index(p_program_counter); - auto current = index_table[initial_guess].function(); - auto const go_left = p_program_counter < current; - - if (go_left) { - for (std::size_t iter = initial_guess; iter > 0; iter--) { - current = index_table[iter].function(); - auto next = index_table[iter + 1].function(); - if (current <= p_program_counter && p_program_counter < next) { - return index_table[iter]; - } - } - return index_table[0]; + + auto const block_power = ke::__except_abi::near_point_descriptor[0]; + auto const progarm_offset = ke::__except_abi::near_point_descriptor[1]; + auto const pc = p_program_counter - progarm_offset; + + auto const inter_block_mask = (1U << block_power) - 1U; + auto const inter_block_location = pc & inter_block_mask; + auto const block_index = pc >> block_power; + auto const linear_info = ke::__except_abi::normal_table[block_index]; + + auto const entry_start = linear_info >> block_power; + auto const entry_count = linear_info & inter_block_mask; + if (entry_count == 1) { + return index_table[entry_start]; + } + auto const scaled = inter_block_location * entry_count; + auto const guess_offset = scaled >> block_power; + auto const initial_guess = static_cast(entry_start + guess_offset); + + auto it = index_table.begin() + initial_guess; + + if (p_program_counter < it->function()) { + // Find the rightmost entry with function() <= p_program_counter + do { + --it; + } while (it->function() > p_program_counter); } else { - for (std::size_t iter = initial_guess; iter < index_table.size(); iter++) { - current = index_table[iter].function(); - auto next = index_table[iter + 1].function(); - if (current <= p_program_counter && p_program_counter < next) { - return index_table[iter]; - } - } - return index_table.end()[-1]; + // Find the leftmost entry with function() > p_program_counter, then back up + do { + ++it; + } while (it->function() <= p_program_counter); + --it; } + + return *it; } index_entry_t const& get_index_entry(std::uint32_t p_program_counter) @@ -762,18 +779,18 @@ inline void enter_function(exception_control_block& p_exception_object) action_decoder a_decoder( info.type_table_end, info.call_site_end, site_info.action); - for (auto const* type_info = a_decoder.get_next_catch_type(); - type_info != nullptr; - type_info = a_decoder.get_next_catch_type()) { + for (auto const* catch_type = a_decoder.get_next_catch_type(); + catch_type != nullptr; + catch_type = a_decoder.get_next_catch_type()) { // This is our dynamic cast :P auto position = std::ranges::find_if( - p_exception_object.type_info, [&type_info](auto const& element) -> bool { - return element.type_info == type_info; + p_exception_object.type_info, [&catch_type](auto const& element) { + return element.type_info == catch_type; }); if (position == p_exception_object.type_info.end() && - type_info != action_decoder::install_context_type()) { + catch_type != action_decoder::install_context_type()) { continue; } diff --git a/src/arm_cortex/estell/internal.hpp b/src/arm_cortex/estell/internal.hpp index 419536d..294c653 100644 --- a/src/arm_cortex/estell/internal.hpp +++ b/src/arm_cortex/estell/internal.hpp @@ -316,7 +316,7 @@ struct flattened_hierarchy std::array bases{}; std::uint32_t size = 0; - explicit flattened_hierarchy(std::type_info const* p_info) + explicit constexpr flattened_hierarchy(std::type_info const* p_info) { bases[0].type_info = p_info; bases[0].offset = 0; @@ -325,27 +325,27 @@ struct flattened_hierarchy flattened_hierarchy() = default; - [[nodiscard]] auto begin() + [[nodiscard]] constexpr auto begin() { return bases.begin(); } - [[nodiscard]] auto end() + [[nodiscard]] constexpr auto end() { return bases.begin() + size; } - [[nodiscard]] auto cbegin() const + [[nodiscard]] constexpr auto cbegin() const { return bases.cbegin(); } - [[nodiscard]] auto cend() const + [[nodiscard]] constexpr auto cend() const { return bases.cbegin() + size; } - void push_back(base_class_type_info const& p_info) + constexpr void push_back(base_class_type_info const& p_info) { if (size > max_count) { std::terminate(); @@ -353,7 +353,19 @@ struct flattened_hierarchy bases[size++] = p_info; } - void reset() + using iter = decltype(bases.end()); + + constexpr iter find(std::type_info const* p_type) + { + for (auto const base : bases) { + if (base->type_info == p_type) { + return &base; + } + } + return bases.end(); + } + + constexpr void reset() { size = 0; }