diff --git a/.gitignore b/.gitignore index 833049a0..e50d6bee 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,5 @@ build.xcore # Python cache information lib_mic_array.egg-info +examples/app_mic_array_basic/output.wav +examples/app_mic_array_basic/mic_array_output.bin diff --git a/Jenkinsfile b/Jenkinsfile index 13b738c2..b26580aa 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,6 @@ // This file relates to internal XMOS infrastructure and should be ignored by external users -@Library('xmos_jenkins_shared_library@v0.45.0') _ +@Library('xmos_jenkins_shared_library@v0.48.0') _ getApproval() pipeline { @@ -8,18 +8,23 @@ pipeline { parameters { string( - name: 'TOOLS_VERSION', + name: 'TOOLS_XS3_VERSION', defaultValue: '15.3.1', description: 'The XTC tools version' ) + string( + name: 'TOOLS_VX4_VERSION', + defaultValue: '-j --repo arch_vx_slipgate -b master -a XTC 116', + description: 'The XTC Slipgate tools version' + ) string( name: 'XMOSDOC_VERSION', defaultValue: 'v8.0.1', - description: 'The xmosdoc version') - + description: 'The xmosdoc version' + ) string( name: 'INFR_APPS_VERSION', - defaultValue: 'v3.3.0', + defaultValue: 'develop', //TODO pin after release description: 'The infr_apps version' ) choice( @@ -60,7 +65,7 @@ pipeline { stage('Examples build') { steps { dir("${REPO_NAME}/examples") { - xcoreBuild() + xcoreBuild(toolsVersion: params.TOOLS_XS3_VERSION) } } } @@ -106,7 +111,7 @@ pipeline { dir("tests") { createVenv(reqFile: "requirements.txt") withVenv { - xcoreBuild() + xcoreBuild(toolsVersion: params.TOOLS_XS3_VERSION) stash includes: '**/*.xe', name: 'test_bin', useDefaultExcludes: false } } @@ -124,7 +129,7 @@ pipeline { sh "git clone git@github.com:xmos/xmos_cmake_toolchain.git --branch v1.0.0" dir(REPO_NAME) { checkoutScmShallow() - withTools(params.TOOLS_VERSION) { + withTools(params.TOOLS_XS3_VERSION) { sh "cmake -B build.xcore -DDEV_LIB_MIC_ARRAY=1 -DCMAKE_TOOLCHAIN_FILE=../xmos_cmake_toolchain/xs3a.cmake" sh "cd build.xcore && make all -j 16" } @@ -136,13 +141,11 @@ pipeline { } } } // stage('Custom CMake build') - + stage('Tests') { parallel { - stage('XS3 tests') { - agent { - label 'xcore.ai' - } + stage('XS3 Tests') { + agent {label 'xcore.ai'} stages { stage("Checkout and Build") { steps { @@ -159,7 +162,7 @@ pipeline { stage('Run tests') { steps { dir("${REPO_NAME}/tests") { - withTools(params.TOOLS_VERSION) { + withTools(params.TOOLS_XS3_VERSION) { withVenv { // This ensures a project for XS2 can be built and runs OK @@ -183,7 +186,7 @@ pipeline { if(params.TEST_LEVEL == 'smoke') { echo "Running tests with fixed seed 12345" - sh "pytest -v --junitxml=pytest_basic_mic.xml --seed 12345 --level ${params.TEST_LEVEL} " + sh "pytest -v --junitxml=pytest_basic_mic.xml --seed 12345 --level ${params.TEST_LEVEL} -k 'not 16frame-8n'" } else { @@ -209,11 +212,51 @@ pipeline { } // stage('Run tests') } // stages post { - cleanup { - xcoreCleanSandbox() - } - } - } // stage('HW tests') + cleanup {xcoreCleanSandbox()} + } // post + } // XS3 Tests + + stage('VX4 Tests') { + agent {label "vx4"} + stages { + stage("Checkout and Build") { + steps { + dir(REPO_NAME){ + checkoutScmShallow() + dir("tests") { + createVenv(reqFile: "requirements.txt") + withVenv { + dir("unit") { + xcoreBuild(toolsVersion: params.TOOLS_VX4_VERSION) + } + dir ("signal/BasicMicArray") { + withTools(params.TOOLS_VX4_VERSION){ + xcoreBuild(toolsVersion: params.TOOLS_VX4_VERSION, jobs:8) + } + } + } // withVenv + } // dir("tests") + } // dir(REPO_NAME) + } // steps + } // stage("Checkout and Build") + stage('Run tests') { + steps { + dir("${REPO_NAME}/tests") { + withVenv { + dir("unit") { + withTools(params.TOOLS_VX4_VERSION) {sh "xrun --xscope bin/tests-unit.xe"} + } + dir("signal/BasicMicArray") { + withTools(params.TOOLS_VX4_VERSION) {sh 'python -m pytest --level nightly --seed 12345 -k "(0_isr or lowpower) and not 16frame-8n" -v'} // Skipping 16frame-8n. See https://github.com/xmos/lib_mic_array/issues/288 + } + } // withVenv + }}} // stage('Run tests') + } // stages + post { + cleanup {xcoreCleanSandbox()} + } //post + } // VX4 Tests + } // parallel } // stage('Tests') diff --git a/doc/exclude_patterns.inc b/doc/exclude_patterns.inc index 3a948a03..f5a5bea5 100644 --- a/doc/exclude_patterns.inc +++ b/doc/exclude_patterns.inc @@ -7,3 +7,4 @@ LICENSE.rst build.xcore tests/**/.pytest_cache/*.md tests/.pytest_cache/*.md +**/app_mic_array_basic*/*.md diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 2e687aa0..99700f7d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.21) include($ENV{XMOS_CMAKE_PATH}/xcommon.cmake) project(mic_array_examples) add_subdirectory(app_mic_array) +add_subdirectory(app_mic_array_basic) add_subdirectory(app_shutdown) add_subdirectory(app_par_decimator) add_subdirectory(app_custom_filter) diff --git a/examples/app_custom_filter/src/main.xc b/examples/app_custom_filter/src/main.xc index 62f9fa7d..85584763 100644 --- a/examples/app_custom_filter/src/main.xc +++ b/examples/app_custom_filter/src/main.xc @@ -72,6 +72,8 @@ void init_mic_conf(mic_array_conf_t &mic_array_conf, mic_array_filter_conf_t (&f mic_array_conf.pdmrx_conf.pdm_out_block = (uint32_t*)pdmrx_out_block; mic_array_conf.pdmrx_conf.pdm_in_double_buf = (uint32_t*)pdmrx_out_block_double_buf; mic_array_conf.pdmrx_conf.channel_map = channel_map; + mic_array_conf.pdmrx_conf.num_channels_in = APP_MIC_COUNT; + mic_array_conf.pdmrx_conf.num_channels_out = APP_MIC_COUNT; } int main() { diff --git a/examples/app_mic_array_basic/CMakeLists.txt b/examples/app_mic_array_basic/CMakeLists.txt new file mode 100644 index 00000000..0b35ca79 --- /dev/null +++ b/examples/app_mic_array_basic/CMakeLists.txt @@ -0,0 +1,42 @@ +cmake_minimum_required(VERSION 3.21) +include($ENV{XMOS_CMAKE_PATH}/xcommon.cmake) +project(app_mic_array) + +set(XMOS_SANDBOX_DIR ${CMAKE_CURRENT_LIST_DIR}/../../..) + +# conditional depending on target +set(APP_C_SRCS src/app.c) + +if(CMAKE_C_COMPILER_VERSION VERSION_EQUAL "3.6.0") + set(APP_HW_TARGET xs3/XK-EVK-XU316-AIV.xn) + set(APP_INCLUDES src xs3) + list(APPEND APP_C_SRCS + xs3/device_pll_ctrl.c + ) + list(APPEND APP_XC_SRCS + xs3/mapfile.xc + ) +else() + set(APP_HW_TARGET XK-EVK-XU416) + set(APP_INCLUDES src vx4) + list(APPEND APP_C_SRCS + vx4/device_pll_ctrl.c + vx4/mapfile.c + ) +endif() + +set(APP_DEPENDENT_MODULES "lib_mic_array") + +set(APP_COMPILER_FLAGS + -Os + -g + -report + -Wall + -fxscope + # Mic array config + -DMIC_ARRAY_CONFIG_SAMPLES_PER_FRAME=320 + -DMIC_ARRAY_CONFIG_MIC_COUNT=1 + -DMIC_ARRAY_CONFIG_USE_PDM_ISR=0 +) + +XMOS_REGISTER_APP() diff --git a/examples/app_mic_array_basic/README.md b/examples/app_mic_array_basic/README.md new file mode 100644 index 00000000..3c305b91 --- /dev/null +++ b/examples/app_mic_array_basic/README.md @@ -0,0 +1,30 @@ +# Basic Mic Array Example + +## Hardware Required + +- **XMS0016** + +## Compile + +```sh +cmake -G "Unix Makefiles" -B build +xmake -C build +``` + +## Run + +```sh +xrun --xscope bin/app_mic_array.xe +``` + +## Convert Binary Data to WAV + +```sh +python convert.py +``` + +**Output:** + +``` +Converted mic_array_output.bin to output.wav with 1 channels, 16000 Hz sample rate, and 32 bits per sample. +``` diff --git a/examples/app_mic_array_basic/convert.py b/examples/app_mic_array_basic/convert.py new file mode 100644 index 00000000..d415dd23 --- /dev/null +++ b/examples/app_mic_array_basic/convert.py @@ -0,0 +1,27 @@ +# Copyright 2026 XMOS LIMITED. +# This Software is subject to the terms of the XMOS Public Licence: Version 1. + +import numpy as np +import wave +import soundfile as sf + + +def convert_to_wav( + input_file, output_file, num_channels=1, sample_rate=16000, bits_per_sample=32 +): + with open(input_file, "rb") as inp_f: + data = inp_f.read() + data = np.frombuffer(data, dtype=np.int32) + + sf.write(output_file, data, sample_rate, subtype='PCM_32') + print(f"Converted {input_file} to {output_file} with {num_channels} channels, {sample_rate} Hz sample rate, and {bits_per_sample} bits per sample.") + + +if __name__ == "__main__": + convert_to_wav( + input_file="mic_array_output.bin", + output_file="output.wav", + num_channels=1, + sample_rate=12000, + bits_per_sample=32 + ) diff --git a/examples/app_mic_array_basic/src/app.c b/examples/app_mic_array_basic/src/app.c new file mode 100644 index 00000000..492be761 --- /dev/null +++ b/examples/app_mic_array_basic/src/app.c @@ -0,0 +1,151 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "app_config.h" +#include "mic_array.h" +#include "device_pll_ctrl.h" +#include "small_768k_to_12k_filter.h" + +#define APP_FILENAME ("mic_array_output.bin") + +DECLARE_JOB(user_mic, (chanend_t)); +DECLARE_JOB(user_audio, (chanend_t)); + +static pdm_rx_resources_t pdm_res = PDM_RX_RESOURCES_SDR( + MIC_ARRAY_CONFIG_PORT_MCLK, + MIC_ARRAY_CONFIG_PORT_PDM_CLK, + MIC_ARRAY_CONFIG_PORT_PDM_DATA, + MIC_ARRAY_CONFIG_MCLK_FREQ, + MIC_ARRAY_CONFIG_PDM_FREQ, + MIC_ARRAY_CONFIG_CLOCK_BLOCK_A); + +void init_mic_conf(mic_array_conf_t *mic_array_conf, mic_array_filter_conf_t filter_conf[2], unsigned *channel_map) +{ + static int32_t stg1_filter_state[APP_MIC_COUNT][8]; + static int32_t stg2_filter_state[APP_MIC_COUNT][SMALL_768K_TO_12K_FILTER_STG2_TAP_COUNT]; + memset(mic_array_conf, 0, sizeof(mic_array_conf_t)); + + //decimator + mic_array_conf->decimator_conf.filter_conf = &filter_conf[0]; + mic_array_conf->decimator_conf.num_filter_stages = 2; + // filter stage 1 + filter_conf[0].coef = (int32_t*)small_768k_to_12k_filter_stg1_coef; + filter_conf[0].num_taps = SMALL_768K_TO_12K_FILTER_STG1_TAP_COUNT; + filter_conf[0].decimation_factor = SMALL_768K_TO_12K_FILTER_STG1_DECIMATION_FACTOR; + filter_conf[0].state = (int32_t*)stg1_filter_state; + filter_conf[0].shr = SMALL_768K_TO_12K_FILTER_STG1_SHR; + filter_conf[0].state_words_per_channel = filter_conf[0].num_taps/32; // works on 1-bit samples + // filter stage 2 + filter_conf[1].coef = (int32_t*)small_768k_to_12k_filter_stg2_coef; + filter_conf[1].num_taps = SMALL_768K_TO_12K_FILTER_STG2_TAP_COUNT; + filter_conf[1].decimation_factor = SMALL_768K_TO_12K_FILTER_STG2_DECIMATION_FACTOR; + filter_conf[1].state = (int32_t*)stg2_filter_state; + filter_conf[1].shr = SMALL_768K_TO_12K_FILTER_STG2_SHR; + filter_conf[1].state_words_per_channel = SMALL_768K_TO_12K_FILTER_STG2_TAP_COUNT; + + // pdm rx + static uint32_t pdmrx_out_block[APP_MIC_COUNT][SMALL_768K_TO_12K_FILTER_STG2_DECIMATION_FACTOR]; + static uint32_t pdmrx_out_block_double_buf[2][APP_MIC_COUNT * SMALL_768K_TO_12K_FILTER_STG2_DECIMATION_FACTOR] __attribute__((aligned(8))); + mic_array_conf->pdmrx_conf.pdm_out_words_per_channel = SMALL_768K_TO_12K_FILTER_STG2_DECIMATION_FACTOR; + mic_array_conf->pdmrx_conf.pdm_out_block = (uint32_t*)pdmrx_out_block; + mic_array_conf->pdmrx_conf.pdm_in_double_buf = (uint32_t*)pdmrx_out_block_double_buf; + mic_array_conf->pdmrx_conf.channel_map = channel_map; + mic_array_conf->pdmrx_conf.num_channels_in = APP_MIC_COUNT; + mic_array_conf->pdmrx_conf.num_channels_out = APP_MIC_COUNT; +} + +void user_mic(chanend_t c_mic_audio) +{ + printf("mic init\n"); + device_pll_init(); + unsigned channel_map[1] = {0}; + mic_array_conf_t mic_array_conf; + mic_array_filter_conf_t filter_conf[2]; + init_mic_conf(&mic_array_conf, filter_conf, channel_map); + mic_array_init_custom_filter(&pdm_res, &mic_array_conf); + mic_array_start(c_mic_audio); +} + +void user_audio(chanend_t c_mic_audio) +{ + static int32_t WORD_ALIGNED tmp_buff[APP_BUFF_SIZE] = {0}; + int32_t *buff_ptr = &tmp_buff[0]; + unsigned frame_counter = APP_N_FRAMES; + + hwtimer_t tmr = hwtimer_alloc(); + unsigned t0 = 0, t1 = 0; + unsigned t2 = 0, t3 = 0; + uint64_t num = 0; + uint64_t den = 0; + + printf("mic start\n"); + t2 = hwtimer_get_time(tmr); + while (frame_counter--) + { + t0 = hwtimer_get_time(tmr); + ma_frame_rx(buff_ptr, (chanend_t)c_mic_audio, MIC_ARRAY_CONFIG_MIC_COUNT, APP_N_SAMPLES); + buff_ptr += APP_N_SAMPLES; + t1 = hwtimer_get_time(tmr); + num += (t1 - t0); + den += 1; + } + t3 = hwtimer_get_time(tmr); + printf("mic end\n"); + + // Profile the average time taken per frame + const float ma_expected = (float)(APP_N_SAMPLES) / (float)(APP_OUT_FREQ_HZ); + const float tilef = 600.0; + const float ref = tilef / (5.0 + 1.0); + + float avg = (float)num / (float)den; + float total = (float)(t3 - t2); + float avg_us = avg / ref; + float total_us = total / ref; + float ma_exp_us = ma_expected * 1e6; + float perc_err = ((avg_us - ma_exp_us) / ma_exp_us) * 100.0; + + printf("Tile freq: %.2f MHz\n", tilef); + printf("Reference freq: %.2f MHz\n", ref); + printf("ma_frame_rx avg: %.2f ticks\n", avg); + printf("ma_frame_rx avg: %.2f us\n", avg_us); + printf("ma_frame_rx expected: %.2f us\n", ma_exp_us); + printf("ma_frame_rx error: %.2f %%\n", perc_err); + printf("total ticks: %.2f\n", total); + printf("total us: %.2f us\n", total_us); + + // write samples to a binary file + printf("Writing output to %s\n", APP_FILENAME); + FILE *f = fopen(APP_FILENAME, "wb"); + assert(f != NULL); + fwrite(tmp_buff, sizeof(int32_t), APP_BUFF_SIZE, f); + fclose(f); + ma_shutdown(c_mic_audio); + printf("Done\n"); +} + +void main_tile_1(){ + channel_t c_mic_audio = chan_alloc(); + xscope_mode_lossless(); + + // Parallel Jobs + PAR_JOBS( + PJOB(user_mic, (c_mic_audio.end_a)), + PJOB(user_audio, (c_mic_audio.end_b)) + ); + chan_free(c_mic_audio); +} + +void main_tile_0(){ + // intentionally left empty + return; +} diff --git a/examples/app_mic_array_basic/src/app_config.h b/examples/app_mic_array_basic/src/app_config.h new file mode 100644 index 00000000..c864943f --- /dev/null +++ b/examples/app_mic_array_basic/src/app_config.h @@ -0,0 +1,32 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#pragma once + +#if defined(__VX4B__) +#include +#define PORT_MCLK_IN VX_PORT_1D +#define PORT_PDM_CLK VX_PORT_1G +#define PORT_PDM_DATA VX_PORT_1F +#elif defined(__XS3A__) +#include +#define PORT_MCLK_IN XS1_PORT_1D +#define PORT_PDM_CLK PORT_MIC_CLK +#define PORT_PDM_DATA PORT_MIC_DATA +#endif + +// -------------------- Frecuency and Port definitions -------------------- +#define MIC_ARRAY_CONFIG_MCLK_FREQ (24576000) /* 24 MHz */ +#define MIC_ARRAY_CONFIG_PDM_FREQ (768000) /* 768 KHz */ +#define MIC_ARRAY_CONFIG_PORT_MCLK PORT_MCLK_IN /* X0D11, J14 - Pin 15, '11' */ +#define MIC_ARRAY_CONFIG_PORT_PDM_CLK PORT_PDM_CLK /* X0D00, J14 - Pin 2, '00' */ +#define MIC_ARRAY_CONFIG_PORT_PDM_DATA PORT_PDM_DATA /* X0D14..X0D21 | J14 - Pin 3,5,12,14 and Pin 6,7,10,11 */ +#define MIC_ARRAY_CONFIG_CLOCK_BLOCK_A XS1_CLKBLK_2 + +// ------------------------- App Definitions ----------------------------------- +#define APP_N_SAMPLES (MIC_ARRAY_CONFIG_SAMPLES_PER_FRAME) +#define APP_OUT_FREQ_HZ (12000) // 12KHz +#define APP_SAMPLE_SECONDS (5) +#define APP_N_FRAMES (APP_OUT_FREQ_HZ * APP_SAMPLE_SECONDS / APP_N_SAMPLES) +#define APP_BUFF_SIZE (APP_N_FRAMES * APP_N_SAMPLES) +#define APP_MIC_COUNT (MIC_ARRAY_CONFIG_MIC_COUNT) diff --git a/examples/app_mic_array_basic/src/config.xscope b/examples/app_mic_array_basic/src/config.xscope new file mode 100644 index 00000000..d3a3da63 --- /dev/null +++ b/examples/app_mic_array_basic/src/config.xscope @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/examples/app_mic_array_basic/src/device_pll_ctrl.h b/examples/app_mic_array_basic/src/device_pll_ctrl.h new file mode 100644 index 00000000..ecc0c83e --- /dev/null +++ b/examples/app_mic_array_basic/src/device_pll_ctrl.h @@ -0,0 +1,6 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#pragma once + +void device_pll_init(void); diff --git a/examples/app_mic_array_basic/src/small_768k_to_12k_filter.h b/examples/app_mic_array_basic/src/small_768k_to_12k_filter.h new file mode 100644 index 00000000..867c2290 --- /dev/null +++ b/examples/app_mic_array_basic/src/small_768k_to_12k_filter.h @@ -0,0 +1,59 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#ifndef SMALL_768K_TO_12K_FILTER_H +#define SMALL_768K_TO_12K_FILTER_H + +/* Autogenerated by running 'python combined.py small_768k_to_12k_filter_int.pkl -fp small_768k_to_12k_filter'. Do not edit */ + +#include + + +#define SMALL_768K_TO_12K_FILTER_STG1_DECIMATION_FACTOR 32 +#define SMALL_768K_TO_12K_FILTER_STG1_TAP_COUNT 256 +#define SMALL_768K_TO_12K_FILTER_STG1_SHR 0 /*shr not relevant for stage 1*/ + + +uint32_t small_768k_to_12k_filter_stg1_coef[128] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF2DBBA, 0x1E443FC2, 0x2788F9F1, 0x1E443FC2, 0x2785DDB4, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF86BEB, 0x1C91CEC9, 0x8DC6F6F6, 0x3B193738, 0x938D7D61, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFDBC29, 0x211BF8E9, 0x323BF6FD, 0xC4C971FD, 0x884943DB, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE89A2, 0x721D515E, 0x02D0A650, 0xB407A8AB, 0x84E45917, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF26BF, 0x614B35F7, 0xE678C631, 0xE67EFACD, 0x286FD64F, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFCA48, 0x0C0BC045, 0x42E8F9F1, 0x742A203D, 0x0301253F, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF358, 0x5EE51139, 0x80C16668, 0x3019C88A, 0x77A1ACFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC6D, 0x3F5E4E54, 0xAB2F696F, 0x4D52A727, 0xAFCB63FF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF8E, 0x553F9533, 0x994F30CF, 0x299CCA9F, 0xCAA71FFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x66554CF0, 0x78DA4025, 0xB1E0F32A, 0xA660FFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x879996A5, 0x5293801C, 0x94AA5699, 0x9E1FFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF81E18C6, 0x631C0003, 0x8C663187, 0x81FFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE01F07, 0x83E00000, 0x7C1E0F80, 0x7FFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE007, 0xFC000000, 0x03FE007F, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x0001FFFF, 0xFFFFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + + +#define SMALL_768K_TO_12K_FILTER_STG2_DECIMATION_FACTOR 2 +#define SMALL_768K_TO_12K_FILTER_STG2_TAP_COUNT 48 +#define SMALL_768K_TO_12K_FILTER_STG2_SHR 1 + + +int32_t small_768k_to_12k_filter_stg2_coef[48] = { +-0x6b2e, 0x9bb0, 0x867bf, 0x6abc3, +-0x1d6951, -0x37fde1, 0x1b8845, 0xad6445, +0x6737ac, -0x11a7f35, -0x1d79ea4, 0x7ee25c, +0x3e05795, 0x27d0754, -0x49e8388, -0x834e523, +0xb8e3a0, 0xe48a501, 0xb3d7d09, -0xe33d15c, +-0x212034e8, -0x6b83320, 0x408190d3, 0x7fffffff, +0x7fffffff, 0x408190d3, -0x6b83320, -0x212034e8, +-0xe33d15c, 0xb3d7d09, 0xe48a501, 0xb8e3a0, +-0x834e523, -0x49e8388, 0x27d0754, 0x3e05795, +0x7ee25c, -0x1d79ea4, -0x11a7f35, 0x6737ac, +0xad6445, 0x1b8845, -0x37fde1, -0x1d6951, +0x6abc3, 0x867bf, 0x9bb0, -0x6b2e, +}; + +#define NUM_DECIMATION_STAGES (2) + +#endif diff --git a/examples/app_mic_array_basic/vx4/device_pll_ctrl.c b/examples/app_mic_array_basic/vx4/device_pll_ctrl.c new file mode 100644 index 00000000..e9262b51 --- /dev/null +++ b/examples/app_mic_array_basic/vx4/device_pll_ctrl.c @@ -0,0 +1,84 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "device_pll_ctrl.h" + +static +void delay_1ms(){ + hwtimer_t tmr = hwtimer_alloc(); + assert(tmr != 0); + hwtimer_delay(tmr, 100000); // 1ms with 100 MHz timer tick + hwtimer_free(tmr); +} + +/* + * PLL1 Control Register Fields: + * + * PLL1_R_DIVIDER - Input divisor value. + * PLL1_F_MULTIPLIER - Feedback multiplier value. + * PLL1_OD_DIVIDER - Output divider value. + * PLL1_DISABLE - Disable the PLL when this is 1. + * PLL1_BYPASS - When set to 1 the PLL will be bypassed. + * PLL1_NLOCK - If set to 1 the chip will not wait for the PLL to relock. + */ + +void device_pll_init(void) +{ + printf("Initializing PLL\n"); + xsystem_tile_id_t tileid = get_local_tile_id(); + + // [0] PLL CTL DISABLE + uint32_t DEVICE_PLL_DISABLE = 0x00000000; + DEVICE_PLL_DISABLE = VX_PLL1_DISABLE_SET(DEVICE_PLL_DISABLE, 0); + + // [1] Mux + uint32_t DEVICE_PLL_MUX_VAL = 0x00000000; + DEVICE_PLL_MUX_VAL = VX_APP_CLK1_MUX_BIT_SET(DEVICE_PLL_MUX_VAL, 1); + DEVICE_PLL_MUX_VAL = VX_APP_CLK_IN_PHASE_BIT_SET(DEVICE_PLL_MUX_VAL, 1); + + // [2] PLL CTL + uint32_t DEVICE_PLL_CTL_VAL = 0x00000000; + DEVICE_PLL_CTL_VAL = VX_PLL1_R_DIVIDER_SET(DEVICE_PLL_CTL_VAL, 0); // input divider: 24 MHz ref / R=1 -> 24 MHz + DEVICE_PLL_CTL_VAL = VX_PLL1_F_MULTIPLIER_SET(DEVICE_PLL_CTL_VAL, 101); // feedback mult: 24 MHz * (F + 1 + 2/5 = 102.4) -> 2457.60 MHz + DEVICE_PLL_CTL_VAL = VX_PLL1_OD_DIVIDER_SET(DEVICE_PLL_CTL_VAL, 4); // output divider: 2457.60 MHz / (OD + 1) / 2 -> 245.76 MHz + DEVICE_PLL_CTL_VAL = VX_PLL1_DISABLE_SET(DEVICE_PLL_CTL_VAL, 0); // disable PLL before configuration + DEVICE_PLL_CTL_VAL = VX_PLL1_BYPASS_SET(DEVICE_PLL_CTL_VAL, 0); // no bypass + DEVICE_PLL_CTL_VAL = VX_PLL1_NLOCK_SET(DEVICE_PLL_CTL_VAL, 1); // wait for PLL lock + + // [3] FRAC (2/5) + uint32_t DEVICE_PLL_FRAC_NOM = 0x00000000; + DEVICE_PLL_FRAC_NOM = VX_SS_FRAC_N_ENABLE_SET(DEVICE_PLL_FRAC_NOM, 1); // enable fractional mode + DEVICE_PLL_FRAC_NOM = VX_SS_FRAC_N_PERIOD_CYC_CNT_SET(DEVICE_PLL_FRAC_NOM, 4); // +1 -> 5 + DEVICE_PLL_FRAC_NOM = VX_SS_FRAC_N_F_HIGH_CYC_CNT_SET(DEVICE_PLL_FRAC_NOM, 1); // +1 -> 2 + + // [4] APP DIVIDER + uint32_t DEVICE_PLL_DIV_0 = 0x00000000; + DEVICE_PLL_DIV_0 = VX_APP_CLK_DIV_ENABLE_SET(DEVICE_PLL_DIV_0, 1); // enable app clock divider + DEVICE_PLL_DIV_0 = VX_APP_CLK_DIV_VALUE_SET(DEVICE_PLL_DIV_0, 4); // set divider to 4 -> 245.76 MHz / (4 + 1) / 2 -> 24.576 MHz + + // print reg values + printf("PLL Configuration:\n"); + printf("PLL DISABLE: 0x%08lX\n", DEVICE_PLL_DISABLE); + printf("PLL MUX VAL: 0x%08lX\n", DEVICE_PLL_MUX_VAL); + printf("PLL CTL VAL: 0x%08lX\n", DEVICE_PLL_CTL_VAL); + printf("PLL DIV VAL: 0x%08lX\n", DEVICE_PLL_DIV_0); + printf("PLL FRAC_NOM: 0x%08lX\n", DEVICE_PLL_FRAC_NOM); + + // CONFIGURE + sswitch_reg_try_write(tileid, VX_SSB_CSR_PLL1_CTRL_NUM, DEVICE_PLL_DISABLE); // disable PLL before configuration + sswitch_reg_try_write(tileid, VX_SSB_CSR_CLK_SWITCH_CTRL_NUM, DEVICE_PLL_MUX_VAL); // switch app clock to PLL1 output + sswitch_reg_try_write(tileid, VX_SSB_CSR_PLL1_CTRL_NUM, DEVICE_PLL_CTL_VAL); // configure PLL control register + sswitch_reg_try_write(tileid, VX_SSB_CSR_PLL1_FRACN_CTRL_NUM, DEVICE_PLL_FRAC_NOM); // configure PLL fractional control register + sswitch_reg_try_write(tileid, VX_SSB_CSR_APP_CLK1_DIV_NUM, DEVICE_PLL_DIV_0); // configure app clock divider + delay_1ms(); +} diff --git a/examples/app_mic_array_basic/vx4/mapfile.c b/examples/app_mic_array_basic/vx4/mapfile.c new file mode 100644 index 00000000..7066fc17 --- /dev/null +++ b/examples/app_mic_array_basic/vx4/mapfile.c @@ -0,0 +1,11 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#include + +extern void main_tile_0(); +extern void main_tile_1(); + +NETWORK_MAIN( + TILE_MAIN(main_tile_1, 1, ()), + TILE_MAIN(main_tile_0, 0, ()) +) diff --git a/examples/app_mic_array_basic/xs3/XK-EVK-XU316-AIV.xn b/examples/app_mic_array_basic/xs3/XK-EVK-XU316-AIV.xn new file mode 100644 index 00000000..b4eb8fff --- /dev/null +++ b/examples/app_mic_array_basic/xs3/XK-EVK-XU316-AIV.xn @@ -0,0 +1,66 @@ + + + Board + xcore.ai Vision Development Kit + + + tileref tile[2] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/app_mic_array_basic/xs3/device_pll_ctrl.c b/examples/app_mic_array_basic/xs3/device_pll_ctrl.c new file mode 100644 index 00000000..a50d04f8 --- /dev/null +++ b/examples/app_mic_array_basic/xs3/device_pll_ctrl.c @@ -0,0 +1,35 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#include +#include +#include +#include +#include "device_pll_ctrl.h" + + +#define DEVICE_PLL_CTL_VAL 0x0A019803 // Valid for all fractional values +#define DEVICE_PLL_FRAC_NOM 0x800095F9 // 24.576000 MHz + +void device_pll_init(void) +{ + unsigned tileid = get_local_tile_id(); + + const unsigned DEVICE_PLL_DISABLE = 0x0201FF04; + const unsigned DEVICE_PLL_DIV_0 = 0x80000004; + + write_sswitch_reg(tileid, XS1_SSWITCH_SS_APP_PLL_CTL_NUM, + DEVICE_PLL_DISABLE); + + hwtimer_t tmr = hwtimer_alloc(); + { + xassert(tmr != 0); + hwtimer_delay(tmr, 100000); // 1ms with 100 MHz timer tick + } + hwtimer_free(tmr); + + write_sswitch_reg(tileid, XS1_SSWITCH_SS_APP_PLL_CTL_NUM, DEVICE_PLL_CTL_VAL); + write_sswitch_reg(tileid, XS1_SSWITCH_SS_APP_PLL_CTL_NUM, DEVICE_PLL_CTL_VAL); + write_sswitch_reg(tileid, XS1_SSWITCH_SS_APP_PLL_FRAC_N_DIVIDER_NUM, DEVICE_PLL_FRAC_NOM); + write_sswitch_reg(tileid, XS1_SSWITCH_SS_APP_CLK_DIVIDER_NUM, DEVICE_PLL_DIV_0); +} diff --git a/examples/app_mic_array_basic/xs3/mapfile.xc b/examples/app_mic_array_basic/xs3/mapfile.xc new file mode 100644 index 00000000..2c695375 --- /dev/null +++ b/examples/app_mic_array_basic/xs3/mapfile.xc @@ -0,0 +1,25 @@ +// Copyright 2023-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#include +#include +#include +#include + +#include +#include + +extern "C" { + void main_tile_0(); + void main_tile_1(); +} + +int main(void) +{ + // Initialize parallel tasks + par{ + on tile[0]: main_tile_0(); + on tile[1]: main_tile_1(); + } + return 0; +} diff --git a/examples/app_par_decimator/src/app.cpp b/examples/app_par_decimator/src/app.cpp index 2b3f5874..1b01f7e4 100644 --- a/examples/app_par_decimator/src/app.cpp +++ b/examples/app_par_decimator/src/app.cpp @@ -43,8 +43,12 @@ pdm_rx_resources_t pdm_res = PDM_RX_RESOURCES_DDR( #define APP_N_MICS_IN APP_N_MICS #endif #define STAGE2_DEC_FACTOR_48KHZ 2 -#define CLRSR(c) asm volatile("clrsr %0" : : "n"(c)); -#define CLEAR_KEDI() CLRSR(XS1_SR_KEDI_MASK) + +#if defined(__XS3A__) +#define CLEAR_KEDI() asm volatile("clrsr %0" : : "n"(XS1_SR_KEDI_MASK)); +#else +#define CLEAR_KEDI() ((void)0) // not defined in !xs3a +#endif using TMicArray = mic_array::MicArray 0; k--) { + buff[k] = buff[k-1]; + } + #endif } diff --git a/lib_mic_array/api/mic_array/cpp/Decimator.hpp b/lib_mic_array/api/mic_array/cpp/Decimator.hpp index 06003b59..7a26eac4 100644 --- a/lib_mic_array/api/mic_array/cpp/Decimator.hpp +++ b/lib_mic_array/api/mic_array/cpp/Decimator.hpp @@ -63,8 +63,14 @@ class TwoStageDecimator * Per-mic channel filter state (PDM history) size in 32-bit words for stage-1 filter. */ unsigned pdm_history_sz; + + unsigned pdm_out_words_per_mic; } stage1; + public: + chanend_t c_decimator; + constexpr TwoStageDecimator() noexcept { } + /** * Stage 2 decimation configuration and state. */ @@ -79,10 +85,6 @@ class TwoStageDecimator unsigned decimation_factor; } stage2; - public: - - constexpr TwoStageDecimator() noexcept { } - /** * @brief Initialize the two-stage decimator from a configuration struct * @ref mic_array_decimator_conf_t @p decimator_conf @@ -95,7 +97,7 @@ class TwoStageDecimator * * @param decimator_conf Decimator pipeline configuration. */ - void Init(mic_array_decimator_conf_t &decimator_conf); + void Init(mic_array_decimator_conf_t &decimator_conf, unsigned pdm_out_words_per_mic); /** * @brief Process one block of PDM data. @@ -126,6 +128,22 @@ class TwoStageDecimator void ProcessBlock( int32_t sample_out[MIC_COUNT], uint32_t *pdm_block); + + /** + * @brief Process a single mic, 2 sample PDM block using only the 1st stage decimation filters + * + * Consumes two PDM words from `pdm_block` and runs the + * stage-1 FIR twice. Two output samples are written to + * `sample_out[0]` and `sample_out[1]`. This path is used in low-power + * configurations where only the stage-1 filter is active. + * + * @param sample_out Output sample vector with two consecutive samples. + * @param pdm_block PDM data to be processed (two words). + */ + void ProcessBlockSingleStage( + int32_t *sample_out, + uint32_t *pdm_block); + }; } @@ -134,20 +152,25 @@ class TwoStageDecimator ////////////////////////////////////////////// template -void mic_array::TwoStageDecimator::Init( - mic_array_decimator_conf_t &decimator_conf) +void mic_array::TwoStageDecimator + ::Init( + mic_array_decimator_conf_t &decimator_conf, + unsigned pdm_out_words_per_mic) { this->stage1.filter_coef = (const uint32_t*)decimator_conf.filter_conf[0].coef; this->stage1.pdm_history_ptr = (uint32_t*)decimator_conf.filter_conf[0].state; this->stage1.pdm_history_sz = decimator_conf.filter_conf[0].state_words_per_channel; + this->stage1.pdm_out_words_per_mic = pdm_out_words_per_mic; memset(this->stage1.pdm_history_ptr, 0x55, sizeof(int32_t) * MIC_COUNT * this->stage1.pdm_history_sz); - for(int k = 0; k < MIC_COUNT; k++){ - filter_fir_s32_init(&this->stage2.filters[k], decimator_conf.filter_conf[1].state + (k * decimator_conf.filter_conf[1].state_words_per_channel), - decimator_conf.filter_conf[1].num_taps, decimator_conf.filter_conf[1].coef, decimator_conf.filter_conf[1].shr); + if(decimator_conf.num_filter_stages == 2) { + for(int k = 0; k < MIC_COUNT; k++){ + filter_fir_s32_init(&this->stage2.filters[k], decimator_conf.filter_conf[1].state + (k * decimator_conf.filter_conf[1].state_words_per_channel), + decimator_conf.filter_conf[1].num_taps, decimator_conf.filter_conf[1].coef, decimator_conf.filter_conf[1].shr); + } + this->stage2.decimation_factor = decimator_conf.filter_conf[1].decimation_factor; } - this->stage2.decimation_factor = decimator_conf.filter_conf[1].decimation_factor; } @@ -175,11 +198,32 @@ void mic_array::TwoStageDecimator } +template +void mic_array::TwoStageDecimator + ::ProcessBlockSingleStage( + int32_t *sample_out, + uint32_t *pdm_block) +{ + uint32_t* hist = this->stage1.pdm_history_ptr; + for(unsigned k = 0; k < this->stage1.pdm_out_words_per_mic; k++) { + hist[0] = pdm_block[k]; + sample_out[k] = fir_1x16_bit(hist, this->stage1.filter_coef); + shift_buffer(hist); + } +} + static inline void mic_array::shift_buffer(uint32_t* buff) { #if defined(__XS3A__) uint32_t* src = &buff[-1]; asm volatile("vldd %0[0]; vstd %1[0];" :: "r"(src), "r"(buff) : "memory" ); - #endif // __XS3A__ + #elif defined(__VX4B__) + uint32_t* src = &buff[-1]; + asm volatile("xm.vldd %0; xm.vstd %1;" :: "r"(src), "r"(buff) : "memory" ); + #else // C fallback + for (unsigned k = 7; k > 0; k--) { + buff[k] = buff[k-1]; + } + #endif } diff --git a/lib_mic_array/api/mic_array/cpp/MicArray.hpp b/lib_mic_array/api/mic_array/cpp/MicArray.hpp index f9bf9dbf..8cb7d72d 100644 --- a/lib_mic_array/api/mic_array/cpp/MicArray.hpp +++ b/lib_mic_array/api/mic_array/cpp/MicArray.hpp @@ -5,8 +5,9 @@ #include #include -#include +#include #include +#include #include #include @@ -177,6 +178,19 @@ namespace mic_array { * OutputHandler. */ void ThreadEntry(); + + /** + * @brief Entry point for the low-power single-stage decimation thread. + * + * This function loops, collecting PDM + * blocks from @ref PdmRx and running the single-stage decimator. Each + * block produces two output samples which are delivered sequentially + * through @ref OutputHandler. On shutdown it calls @ref PdmRx::Shutdown() + * and then completes the output shutdown handshake. + */ + void ThreadEntryLowPower_1Mic1StgDecimator(); + + static constexpr unsigned MAX_PDM_OUT_WORDS_PER_CHANNEL = 10; }; } @@ -208,3 +222,30 @@ void mic_array::MicArray +void mic_array::MicArray::ThreadEntryLowPower_1Mic1StgDecimator() +{ + volatile bool shutdown = false; + chanend_t c_frame_out = OutputHandler.FrameTx.GetChannel(); + unsigned pdm_out_words_per_channel = PdmRx.pdm_out_words_per_channel; + int32_t sample_out[MAX_PDM_OUT_WORDS_PER_CHANNEL]; + + while(!shutdown){ + uint32_t *pdm_samples = PdmRx.GetPdmBlockLowPowerOneMic(); + Decimator.ProcessBlockSingleStage(sample_out, pdm_samples); + shutdown = ma_frame_tx(c_frame_out, + reinterpret_cast(sample_out), + 1, pdm_out_words_per_channel); + } + PdmRx.Shutdown(); + OutputHandler.CompleteShutdown(); // Exchange end token with the app to close channel and indicate completion. + // ma_shutdown() will now return + return; +} \ No newline at end of file diff --git a/lib_mic_array/api/mic_array/cpp/PdmRx.hpp b/lib_mic_array/api/mic_array/cpp/PdmRx.hpp index 938b1968..31154817 100644 --- a/lib_mic_array/api/mic_array/cpp/PdmRx.hpp +++ b/lib_mic_array/api/mic_array/cpp/PdmRx.hpp @@ -164,9 +164,10 @@ extern "C" { : : "r"(p_pdm_mics), "r"(XS1_SETC_IE_MODE_INTERRUPT) : "r11" ); - #endif // __XS3A__ + #else + #warning "PDM rx ISR not supported yet on this architecture." + #endif } - } @@ -325,8 +326,9 @@ namespace mic_array { uint32_t* blocks[2]; volatile bool shutdown = false; volatile bool shutdown_complete = false; - uint32_t pdm_out_words_per_channel; // number of 32-sample subblocks per channel uint32_t num_phases; + uint32_t num_channels_in; + uint32_t num_channels_out; /** * @brief Streaming channel over which PDM blocks are sent. @@ -350,21 +352,6 @@ namespace mic_array { public: - /** - * @brief Read a word of PDM data from the port. - * - * @return A `uint32_t` containing 32 PDM samples. If `MIC_COUNT >= 2` the - * samples from each port will be interleaved together. - */ - uint32_t ReadPort(); - - /** - * @brief Send a block of PDM data to a listener. - * - * @param block PDM data to send. - */ - void SendBlock(uint32_t *block); - /** * @brief Initialize the PDM RX service. * @@ -441,6 +428,8 @@ namespace mic_array { */ uint32_t* GetPdmBlock(); + uint32_t* GetPdmBlockLowPowerOneMic(); + /** * @brief Set whether dropped PDM samples should cause an assertion. * @@ -465,8 +454,9 @@ namespace mic_array { * every iteration. */ void ThreadEntry(); - }; + uint32_t pdm_out_words_per_channel; // number of 32-sample subblocks per channel + }; } ////////////////////////////////////////////// @@ -488,7 +478,7 @@ template void mic_array::StandardPdmRxService::ThreadEntry() { while(1){ - this->blocks[0][--phase] = this->ReadPort(); + this->blocks[0][--phase] = port_in(this->p_pdm_mics); if(!phase){ this->phase = this->num_phases; @@ -496,7 +486,7 @@ void mic_array::StandardPdmRxService::ThreadEntry() this->blocks[0] = this->blocks[1]; this->blocks[1] = ready_block; - this->SendBlock(ready_block); + s_chan_out_word(this->c_pdm_blocks.end_a, reinterpret_cast(ready_block)); // Check for shutdown only after sending a block so we know there's atleast one pending block at the time of shutdown if(this->shutdown) { @@ -508,29 +498,15 @@ void mic_array::StandardPdmRxService::ThreadEntry() } -template -uint32_t mic_array::StandardPdmRxService - ::ReadPort() -{ - return port_in(this->p_pdm_mics); -} - - -template -void mic_array::StandardPdmRxService - ::SendBlock(uint32_t *block) -{ - s_chan_out_word(this->c_pdm_blocks.end_a, - reinterpret_cast( &block[0] )); -} - template void mic_array::StandardPdmRxService ::Init(port_t p_pdm_mics, pdm_rx_conf_t &pdm_rx_config) { + this->num_channels_in = pdm_rx_config.num_channels_in; + this->num_channels_out = pdm_rx_config.num_channels_out; this->pdm_out_block_ptr = pdm_rx_config.pdm_out_block; this->pdm_out_words_per_channel = pdm_rx_config.pdm_out_words_per_channel; - this->num_phases = CHANNELS_IN * this->pdm_out_words_per_channel; + this->num_phases = this->num_channels_in * this->pdm_out_words_per_channel; this->phase = this->num_phases; @@ -595,14 +571,15 @@ template uint32_t* mic_array::StandardPdmRxService ::GetPdmBlock() { - // Has to be in a critical section to avoid race conditions with ISR. - interrupt_mask_all(); - // Limiting credit to 1 prevents the ISR from attempting to enqueue an additional block - // while two buffers are already occupied (which would happen if the ISR gets triggered between interrupt_unmask_all() - // and s_chan_in_word()), thereby avoiding deadlock. - pdm_rx_isr_context.credit = 1; - interrupt_unmask_all(); - + if(this->isr_used) { + // Has to be in a critical section to avoid race conditions with ISR. + interrupt_mask_all(); + // Limiting credit to 1 prevents the ISR from attempting to enqueue an additional block + // while two buffers are already occupied (which would happen if the ISR gets triggered between interrupt_unmask_all() + // and s_chan_in_word()), thereby avoiding deadlock. + pdm_rx_isr_context.credit = 1; + interrupt_unmask_all(); + } uint32_t* full_block = (uint32_t*) s_chan_in_word(this->c_pdm_blocks.end_b); mic_array::deinterleave_pdm_samples(full_block, this->pdm_out_words_per_channel); @@ -619,6 +596,36 @@ uint32_t* mic_array::StandardPdmRxService return this->pdm_out_block_ptr; } +template +uint32_t* mic_array::StandardPdmRxService + ::GetPdmBlockLowPowerOneMic() +{ + if(this->isr_used) { + // Has to be in a critical section to avoid race conditions with ISR. + interrupt_mask_all(); + // Limiting credit to 1 prevents the ISR from attempting to enqueue an additional block + // while two buffers are already occupied (which would happen if the ISR gets triggered between interrupt_unmask_all() + // and s_chan_in_word()), thereby avoiding deadlock. + pdm_rx_isr_context.credit = 1; + interrupt_unmask_all(); + } + + uint32_t* full_block = (uint32_t*) s_chan_in_word(this->c_pdm_blocks.end_b); + mic_array::deinterleave_pdm_samples<1>(full_block, this->pdm_out_words_per_channel); + + uint32_t (*block)[1] = (uint32_t (*)[1]) full_block; + uint32_t *out_ptr; + for(int ch = 0; ch < 1; ch++) { + out_ptr = this->pdm_out_block_ptr + (ch * this->pdm_out_words_per_channel); + for(int sb = 0; sb < this->pdm_out_words_per_channel; sb++) { + unsigned d = this->channel_map[ch]; + out_ptr[sb] = block[this->pdm_out_words_per_channel - 1 - sb][d]; + } + } + return this->pdm_out_block_ptr; +} + + template void mic_array::StandardPdmRxService ::Shutdown() { @@ -641,7 +648,8 @@ void mic_array::StandardPdmRxService continue; } // Now that we're sure that PdmRx thread has exited, drain any pending blocks - SELECT_RES(CASE_THEN(this->c_pdm_blocks.end_b, rx_pending_block), + chanend_t c_pdm_blocks_end_b = this->c_pdm_blocks.end_b; + SELECT_RES(CASE_THEN(c_pdm_blocks_end_b, rx_pending_block), DEFAULT_THEN(empty)) { rx_pending_block: diff --git a/lib_mic_array/api/mic_array/cpp/ThreeStageDecimator.hpp b/lib_mic_array/api/mic_array/cpp/ThreeStageDecimator.hpp index 57cc436e..5510bf53 100644 --- a/lib_mic_array/api/mic_array/cpp/ThreeStageDecimator.hpp +++ b/lib_mic_array/api/mic_array/cpp/ThreeStageDecimator.hpp @@ -52,6 +52,8 @@ class ThreeStageDecimator * Per-mic channel filter state (PDM history) size in 32-bit words for stage-1 filter. */ unsigned pdm_history_sz; + + unsigned pdm_out_words_per_mic; } stage1; /** @@ -98,7 +100,7 @@ class ThreeStageDecimator * * @param decimator_conf Decimator pipeline configuration. */ - void Init(mic_array_decimator_conf_t &decimator_conf); + void Init(mic_array_decimator_conf_t &decimator_conf, unsigned pdm_out_words_per_mic); /** * @brief Process one block of PDM data. @@ -137,12 +139,15 @@ class ThreeStageDecimator ////////////////////////////////////////////// template -void mic_array::ThreeStageDecimator::Init( - mic_array_decimator_conf_t &decimator_conf) +void mic_array::ThreeStageDecimator + ::Init( + mic_array_decimator_conf_t &decimator_conf, + unsigned pdm_out_words_per_mic) { this->stage1.filter_coef = (const uint32_t*)decimator_conf.filter_conf[0].coef; this->stage1.pdm_history_ptr = (uint32_t*)decimator_conf.filter_conf[0].state; this->stage1.pdm_history_sz = decimator_conf.filter_conf[0].state_words_per_channel; + this->stage1.pdm_out_words_per_mic = pdm_out_words_per_mic; memset(this->stage1.pdm_history_ptr, 0x55, sizeof(int32_t) * MIC_COUNT * this->stage1.pdm_history_sz); diff --git a/lib_mic_array/api/mic_array/etc/xcore_compat.h b/lib_mic_array/api/mic_array/etc/xcore_compat.h index 2d70e63d..c4c5e9b2 100644 --- a/lib_mic_array/api/mic_array/etc/xcore_compat.h +++ b/lib_mic_array/api/mic_array/etc/xcore_compat.h @@ -32,11 +32,9 @@ extern "C" { #else //__XC__ -#include #include #include #include #include #endif //__XC__ - diff --git a/lib_mic_array/api/mic_array/impl/setup_impl.h b/lib_mic_array/api/mic_array/impl/setup_impl.h index cf967363..1eb68780 100644 --- a/lib_mic_array/api/mic_array/impl/setup_impl.h +++ b/lib_mic_array/api/mic_array/impl/setup_impl.h @@ -14,4 +14,4 @@ unsigned mic_array_mclk_divider( return master_clock_freq / pdm_clock_freq; } -#endif \ No newline at end of file +#endif diff --git a/lib_mic_array/api/mic_array/mic_array_conf_struct.h b/lib_mic_array/api/mic_array/mic_array_conf_struct.h index 3eff142a..d3ad27e2 100644 --- a/lib_mic_array/api/mic_array/mic_array_conf_struct.h +++ b/lib_mic_array/api/mic_array/mic_array_conf_struct.h @@ -136,6 +136,9 @@ typedef struct { * stage decimation filter's decimation factor (in case of a 2 stage decimator). */ unsigned pdm_out_words_per_channel; // per channel pdm rx output block (input to the decimator) size + + unsigned num_channels_in; + unsigned num_channels_out; }pdm_rx_conf_t; diff --git a/lib_mic_array/api/mic_array/mic_array_task.h b/lib_mic_array/api/mic_array/mic_array_task.h index 8b64e171..19d920a4 100644 --- a/lib_mic_array/api/mic_array/mic_array_task.h +++ b/lib_mic_array/api/mic_array/mic_array_task.h @@ -56,6 +56,8 @@ void mic_array_init(pdm_rx_resources_t *pdm_res, const unsigned *channel_map, un MA_C_API void mic_array_init_custom_filter(pdm_rx_resources_t* pdm_res, mic_array_conf_t* mic_array_conf); +MA_C_API +void mic_array_init_custom_filter_1mic_1stg_decimator(pdm_rx_resources_t* pdm_res, mic_array_conf_t* mic_array_conf); /** * @brief Start the mic array task * diff --git a/lib_mic_array/api/mic_array/setup.h b/lib_mic_array/api/mic_array/setup.h index 479d2f2f..64746883 100644 --- a/lib_mic_array/api/mic_array/setup.h +++ b/lib_mic_array/api/mic_array/setup.h @@ -122,4 +122,4 @@ unsigned mic_array_mclk_divider( #include "mic_array/impl/setup_impl.h" -C_API_END \ No newline at end of file +C_API_END diff --git a/lib_mic_array/lib_build_info.cmake b/lib_mic_array/lib_build_info.cmake index b4c7e5ba..9a5cd3f2 100644 --- a/lib_mic_array/lib_build_info.cmake +++ b/lib_mic_array/lib_build_info.cmake @@ -1,6 +1,6 @@ set(LIB_NAME lib_mic_array) set(LIB_VERSION 6.0.0) -set(LIB_DEPENDENT_MODULES "lib_xcore_math(2.4.0)") +set(LIB_DEPENDENT_MODULES "lib_xcore_math(develop)") #TODO pin version set(LIB_INCLUDES api api/mic_array diff --git a/lib_mic_array/src/deinterleave16.S b/lib_mic_array/src/deinterleave16.S index efebb43e..2860f442 100644 --- a/lib_mic_array/src/deinterleave16.S +++ b/lib_mic_array/src/deinterleave16.S @@ -102,7 +102,7 @@ deinterleave16: std f, b, x[6] std h, d, x[7] - + // part2 ldd a, b, x[0] ldd c, d, x[4] unzip b, d, 0 @@ -143,3 +143,127 @@ deinterleave16: .size deinterleave16, .L_end - deinterleave16 #endif // __XS3A__ + +#if defined(__VX4A__) || defined(__VX4B__) + +#define FUNCTION_NAME deinterleave16 +#define NSTACK_WORDS 8 +#define NSTACK_BYTES (NSTACK_WORDS*4) + +#define x a0 +#define a a1 +#define b a2 + +#define c s2 +#define d s3 +#define e s4 +#define f s5 +#define g s6 +#define h s7 + +// Note: ldd and std are reversed in vx4 + +.p2align 4 +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + // save regs + xm.entsp NSTACK_BYTES + xm.stdsp s3,s2,0 + xm.stdsp s5,s4,8 + xm.stdsp s7,s6,16 + + // Lower half + xm.ldd b, a, (8*3)(x) + xm.ldd d, c, (8*2)(x) + xm.ldd f, e, (8*1)(x) + xm.ldd h, g, (8*0)(x) + + xm.unzip b, a, 2 + xm.unzip d, c, 2 + xm.unzip f, e, 2 + xm.unzip h, g, 2 + + xm.unzip c, a, 1 + xm.unzip d, b, 1 + xm.unzip g, e, 1 + xm.unzip h, f, 1 + + xm.unzip e, a, 0 + xm.unzip f, b, 0 + xm.unzip g, c, 0 + xm.unzip h, d, 0 + + xm.std a, e, (8*0)(x) + xm.std c, g, (8*1)(x) + xm.std b, f, (8*2)(x) + xm.std d, h, (8*3)(x) + + // Upper half + xm.ldd b, a, (8*7)(x) + xm.ldd d, c, (8*6)(x) + xm.ldd f, e, (8*5)(x) + xm.ldd h, g, (8*4)(x) + + xm.unzip b, a, 2 + xm.unzip d, c, 2 + xm.unzip f, e, 2 + xm.unzip h, g, 2 + + xm.unzip c, a, 1 + xm.unzip d, b, 1 + xm.unzip g, e, 1 + xm.unzip h, f, 1 + + xm.unzip e, a, 0 + xm.unzip f, b, 0 + xm.unzip g, c, 0 + xm.unzip h, d, 0 + + xm.std a, e, (8*4)(x) + xm.std c, g, (8*5)(x) + xm.std b, f, (8*6)(x) + xm.std d, h, (8*7)(x) + + // part2 + xm.ldd b, a, (8*0)(x) + xm.ldd d, c, (8*4)(x) + xm.unzip b, d, 0 + xm.unzip a, c, 0 + xm.std b, a, (8*4)(x) + xm.std d, c, (8*0)(x) + + xm.ldd b, a, (8*1)(x) + xm.ldd d, c, (8*5)(x) + xm.unzip b, d, 0 + xm.unzip a, c, 0 + xm.std b, a, (8*5)(x) + xm.std d, c, (8*1)(x) + + xm.ldd b, a, (8*2)(x) + xm.ldd d, c, (8*6)(x) + xm.unzip b, d, 0 + xm.unzip a, c, 0 + xm.std b, a, (8*6)(x) + xm.std d, c, (8*2)(x) + + xm.ldd b, a, (8*3)(x) + xm.ldd d, c, (8*7)(x) + xm.unzip b, d, 0 + xm.unzip a, c, 0 + xm.std b, a, (8*7)(x) + xm.std d, c, (8*3)(x) + + // restore regs + xm.lddsp s3,s2,0 + xm.lddsp s5,s4,8 + xm.lddsp s7,s6,16 + xm.retsp NSTACK_BYTES + +.size FUNCTION_NAME, . -FUNCTION_NAME +.resource_const FUNCTION_NAME, "stack_frame_bytes", NSTACK_BYTES +.resource_list_empty FUNCTION_NAME, "callees" +.resource_list_empty FUNCTION_NAME, "tail_callees" +.resource_list_empty FUNCTION_NAME, "parallel_callees" + +#endif diff --git a/lib_mic_array/src/deinterleave2.S b/lib_mic_array/src/deinterleave2.S index 6c08f352..923c5461 100644 --- a/lib_mic_array/src/deinterleave2.S +++ b/lib_mic_array/src/deinterleave2.S @@ -41,3 +41,28 @@ deinterleave2: .size deinterleave2, .L_end - deinterleave2 #endif // __XS3A__ + +#if defined(__VX4A__) || defined(__VX4B__) + +#define FUNCTION_NAME deinterleave2 +#define NSTACK_BYTES 16 // minimum + +// Note: ldd and std are reversed in vx4 + +.p2align 1 +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + xm.entsp NSTACK_BYTES + xm.ldd a2, a1, 0(a0) + xm.unzip a2, a1, 0 + xm.std a1, a2, 0(a0) + xm.retsp NSTACK_BYTES + +.size FUNCTION_NAME, . -FUNCTION_NAME +.resource_const FUNCTION_NAME, "stack_frame_bytes", NSTACK_BYTES +.resource_list_empty FUNCTION_NAME, "callees" +.resource_list_empty FUNCTION_NAME, "tail_callees" +.resource_list_empty FUNCTION_NAME, "parallel_callees" + +#endif // __VX4A__ || __VX4B__ diff --git a/lib_mic_array/src/deinterleave4.S b/lib_mic_array/src/deinterleave4.S index 0d383e9c..5a4a8ef9 100644 --- a/lib_mic_array/src/deinterleave4.S +++ b/lib_mic_array/src/deinterleave4.S @@ -85,3 +85,49 @@ deinterleave4: .size deinterleave4, .L_end - deinterleave4 #endif // __XS3A__ + +#if defined(__VX4A__) || defined(__VX4B__) + +#define FUNCTION_NAME deinterleave4 +#define NSTACK_WORDS 4 +#define NSTACK_BYTES (NSTACK_WORDS*4) + +#define x a0 +#define a a1 +#define b a2 +#define c s2 +#define d s3 + +// Note: ldd and std are reversed in vx4 + +.p2align 1 +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + xm.entsp NSTACK_BYTES + xm.stdsp s2, s3, 0*8 + + // Save and Load + xm.ldd b, a, 8(a0) + xm.ldd d, c, 0(a0) + + // Deinterleave + xm.unzip b, a, 1 + xm.unzip d, c, 1 + xm.unzip c, a, 0 + xm.unzip d, b, 0 + + // Store and Restore regs + xm.std a, c, 0(a0) + xm.std b, d, 8(a0) + + xm.lddsp s2, s3, 0*8 + xm.retsp NSTACK_BYTES + +.size FUNCTION_NAME, . -FUNCTION_NAME +.resource_const FUNCTION_NAME, "stack_frame_bytes", NSTACK_BYTES +.resource_list_empty FUNCTION_NAME, "callees" +.resource_list_empty FUNCTION_NAME, "tail_callees" +.resource_list_empty FUNCTION_NAME, "parallel_callees" + +#endif diff --git a/lib_mic_array/src/deinterleave8.S b/lib_mic_array/src/deinterleave8.S index c3d6a955..cc9a876c 100644 --- a/lib_mic_array/src/deinterleave8.S +++ b/lib_mic_array/src/deinterleave8.S @@ -115,3 +115,73 @@ deinterleave8: .size deinterleave8, .L_end - deinterleave8 #endif // __XS3A__ + + +#if defined(__VX4A__) || defined(__VX4B__) + +#define FUNCTION_NAME deinterleave8 +#define NSTACK_WORDS 8 +#define NSTACK_BYTES (NSTACK_WORDS*4) + +// Note: ldd and std are reversed in vx4 + +#define x a0 +#define a a1 +#define b a2 + +#define c s2 +#define d s3 +#define e s4 +#define f s5 +#define g s6 +#define h s7 + +.p2align 1 +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + // save regs + xm.entsp NSTACK_BYTES + xm.stdsp c, d, 0*8 + xm.stdsp e, f, 1*8 + xm.stdsp g, h, 2*8 + + // deinterleave + xm.ldd b, a, 24(x) + xm.ldd d, c, 16(x) + xm.ldd f, e, 8(x) + xm.ldd h, g, 0(x) + + xm.unzip b, a, 2 + xm.unzip d, c, 2 + xm.unzip f, e, 2 + xm.unzip h, g, 2 + + xm.unzip c, a, 1 + xm.unzip d, b, 1 + xm.unzip g, e, 1 + xm.unzip h, f, 1 + + xm.unzip e, a, 0 + xm.unzip f, b, 0 + xm.unzip g, c, 0 + xm.unzip h, d, 0 + + xm.std a, e, 0(a0) + xm.std c, g, 8(a0) + xm.std b, f, 16(a0) + xm.std d, h, 24(a0) + + // restore regs + xm.lddsp c, d, 0*8 + xm.lddsp e, f, 1*8 + xm.lddsp g, h, 2*8 + xm.retsp NSTACK_BYTES + +.size FUNCTION_NAME, . -FUNCTION_NAME +.resource_const FUNCTION_NAME, "stack_frame_bytes", NSTACK_BYTES +.resource_list_empty FUNCTION_NAME, "callees" +.resource_list_empty FUNCTION_NAME, "tail_callees" +.resource_list_empty FUNCTION_NAME, "parallel_callees" + +#endif // __VX4A__ || __VX4B__ diff --git a/lib_mic_array/src/fir_1x16_bit.S b/lib_mic_array/src/fir_1x16_bit.S index 576d4ef9..833f0ee3 100644 --- a/lib_mic_array/src/fir_1x16_bit.S +++ b/lib_mic_array/src/fir_1x16_bit.S @@ -72,3 +72,74 @@ macc_coeffs: .cc_bottom fir_1x16_bit.func #endif // __XS3A__ + + +#if defined(__VX4A__) || defined(__VX4B__) + +/** + * This function is the optimal FIR on a 1-bit signal with 16-bit coefficients. + * + * NOTE: This version is optimized for the mic array and takes only a single block of coefficients + * + * r0: argument 1, signal (word aligned) + * r1: argument 2, coefficients (arranged as 16 1-bit arrays, word aligned) + * r2: spare + * r3: spare + * r11: spare +*/ + +#define FUNCTION_NAME fir_1x16_bit +#define NSTACK_WORDS 16 +#define NSTACK_BYTES (NSTACK_WORDS*4) + +.p2align 4 +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + { li a3, 32 ; xm.entsp NSTACK_BYTES} + { slli t3, a3, 3 ; xm.vclrdr} + { xm.nop ; xm.vsetc t3} + { xm.nop ; xm.vldc a0} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { add a1, a1, a3 ; xm.vlmaccrb a1} + { addi t3,sp, 0 ; xm.vlmaccrb a1} + //TODO Below we could save max of 2 cycles? + xm.vstr t3 + { xm.vclrdr; addi a2, sp, 0} + xm.vldc t3 + xm.ldap t3, macc_coeffs + xm.vlmaccr0 t3 + xm.vlmaccr1 t3 + { addi a2, a2, 4 ; xm.vstr a2} + xm.vstd a2 + xm.lddsp a0, a1, 0 + xm.zip a1, a0, 4 + slli a0, a0, 8 + xm.retsp NSTACK_BYTES + +.size FUNCTION_NAME, . -FUNCTION_NAME +.resource_const FUNCTION_NAME, "stack_frame_bytes", NSTACK_BYTES +.resource_list_empty FUNCTION_NAME, "callees" +.resource_list_empty FUNCTION_NAME, "tail_callees" +.resource_list_empty FUNCTION_NAME, "parallel_callees" + +// The order of these coefficients tells us that whatever gets VLMACCR1'ed last is going to be multiplied by +// the largest coefficient. Thus, if the bipolar coefficient matrix B[,] has shape 16x32, then B[0,:] must +// correspond to the LEAST significant bits of each coefficient +macc_coeffs: + .short 0x7fff, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001 + +#endif // __VX4A__ || __VX4B__ diff --git a/lib_mic_array/src/mic_array_setup.c b/lib_mic_array/src/mic_array_setup.c index 3a9ceac6..2bb4a136 100644 --- a/lib_mic_array/src/mic_array_setup.c +++ b/lib_mic_array/src/mic_array_setup.c @@ -48,11 +48,16 @@ void mic_array_resources_configure( static inline void mic_array_inpw8(const port_t p_pdm_mics) { - #if defined(__XS3A__) uint32_t tmp; + #if defined(__XS3A__) asm volatile("inpw %0, res[%1], 8" : "=r"(tmp) : "r" (p_pdm_mics)); - #endif // __XS3A__ + #elif defined(__VX4B__) + asm volatile("xm.inpw %0, %1, 8": "=r"(tmp): "r"(p_pdm_mics)); + #else + #warning "mic_array_inpw8 not supported yet on this architecture." + (void) tmp; + #endif } void mic_array_pdm_clock_start( diff --git a/lib_mic_array/src/mic_array_task.c b/lib_mic_array/src/mic_array_task.c new file mode 100644 index 00000000..85fdda5b --- /dev/null +++ b/lib_mic_array/src/mic_array_task.c @@ -0,0 +1,106 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#include +#include +#include +#include + +#include "mic_array.h" +#include "mic_array_task_internal.hpp" + +//////////////////// +// Mic array init // +//////////////////// +void mic_array_init(pdm_rx_resources_t *pdm_res, const unsigned *channel_map, unsigned output_samp_freq) +{ + unsigned stg2_decimation_factor = (pdm_res->pdm_freq/STAGE1_DEC_FACTOR)/output_samp_freq; + assert ((output_samp_freq*STAGE1_DEC_FACTOR*stg2_decimation_factor) == pdm_res->pdm_freq); // assert if it doesn't divide cleanly + // assert if unsupported decimation factor. (for example. when starting with a pdm_freq of 3.072MHz, supported + // output sampling freqs are [48000, 32000, 16000] + assert ((stg2_decimation_factor == 2) || (stg2_decimation_factor == 3) || (stg2_decimation_factor == 6)); + + bool use_3_stg_decimator = false; + init_mic_array_storage(use_3_stg_decimator); + init_mics_default_filter(pdm_res, channel_map, stg2_decimation_factor); +} + +void mic_array_init_custom_filter(pdm_rx_resources_t* pdm_res, mic_array_conf_t* mic_array_conf) +{ + assert(pdm_res); + assert(mic_array_conf); + assert(mic_array_conf->decimator_conf.num_filter_stages == 2 || + mic_array_conf->decimator_conf.num_filter_stages == 3); + + init_mic_array_storage(mic_array_conf->decimator_conf.num_filter_stages == 3); + init_mics_custom_filter(pdm_res, mic_array_conf); + + // Configure and start clocks + const unsigned divide = pdm_res->mclk_freq / pdm_res->pdm_freq; + mic_array_resources_configure(pdm_res, divide); + mic_array_pdm_clock_start(pdm_res); +} + +void mic_array_init_custom_filter_1mic_1stg_decimator(pdm_rx_resources_t* pdm_res, mic_array_conf_t* mic_array_conf) +{ + assert(pdm_res); + assert(mic_array_conf); + assert(mic_array_conf->decimator_conf.num_filter_stages == 1); + + init_mic_array_storage(mic_array_conf->decimator_conf.num_filter_stages == 3); + init_mics_custom_filter_1mic_1stg_decimator(pdm_res, mic_array_conf); + + // Configure and start clocks + const unsigned divide = pdm_res->mclk_freq / pdm_res->pdm_freq; + mic_array_resources_configure(pdm_res, divide); + mic_array_pdm_clock_start(pdm_res); +} + +///////////////////// +// Mic array start // +///////////////////// + +// Parallel jobs for when XUA_PDM_MIC_USE_PDM_ISR == 0, run separate decimator and pdm rx tasks +DECLARE_JOB(default_ma_task_start_pdm, (void)); +void default_ma_task_start_pdm(void) +{ + start_pdm_task(); +} + +DECLARE_JOB(default_ma_task_start_decimator, (void)); +void default_ma_task_start_decimator() +{ + start_decimator_task(); +} + +DECLARE_JOB(default_ma_task_start_pdm_3stg, (void)); +void default_ma_task_start_pdm_3stg(void) +{ + start_pdm_task_3stg(); +} + +DECLARE_JOB(default_ma_task_start_decimator_3stg, (void)); +void default_ma_task_start_decimator_3stg(void) +{ + start_decimator_task_3stg(); +} + +void mic_array_start(chanend_t c_frames_out) +{ +#if MIC_ARRAY_CONFIG_USE_PDM_ISR + start_mic_array_pdm_isr(c_frames_out); +#else + set_output_channel(c_frames_out); + bool use_3_stg_decimator = get_decimator_stg_count(); + if (use_3_stg_decimator) { + PAR_JOBS( + PJOB(default_ma_task_start_pdm_3stg, ()), + PJOB(default_ma_task_start_decimator_3stg, ())); + } else { + PAR_JOBS( + PJOB(default_ma_task_start_pdm, ()), + PJOB(default_ma_task_start_decimator, ())); + } +#endif // MIC_ARRAY_CONFIG_USE_PDM_ISR + shutdown_mic_array(); +} diff --git a/lib_mic_array/src/mic_array_task.cpp b/lib_mic_array/src/mic_array_task.cpp index f9691534..c827a9f5 100644 --- a/lib_mic_array/src/mic_array_task.cpp +++ b/lib_mic_array/src/mic_array_task.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include @@ -12,41 +11,90 @@ #include "mic_array/etc/filters_default.h" #include "mic_array_task_internal.hpp" -TMicArray *g_mics = nullptr; // Global mic array instance. -TMicArray_3stg_decimator *g_mics_3stg = nullptr; -bool use_3_stg_decimator = false; -// NOTE: g_mics must persist (remain non-null and its backing storage valid) +static TMicArray *s_mics = nullptr; +static TMicArray_3stg_decimator *s_mics_3stg = nullptr; +static bool s_use_3_stg_decimator = false; +static bool s_run_1mic_1stg_decimator = false; +// NOTE: s_mics or s_mics_3stg must persist (remain non-null with its backing storage valid) // until mic_array_start() completes. mic_array_start() performs shutdown and -// then sets g_mics back to nullptr. +// then sets s_mics or s_mics_3stg back to nullptr. + +#if !defined (__XS2A__) +///////////////////////////// +// Static variable getters // +///////////////////////////// +bool get_decimator_stg_count(void) +{ + return s_use_3_stg_decimator; +} -#if !defined(__XS2A__) //////////////////// // Mic array init // //////////////////// -void mic_array_init(pdm_rx_resources_t *pdm_res, const unsigned *channel_map, unsigned output_samp_freq) +void init_mics_default_filter(pdm_rx_resources_t* pdm_res, const unsigned* channel_map, unsigned stg2_dec_factor) { - assert(g_mics == nullptr); // Mic array instance already initialised + static int32_t stg1_filter_state[MIC_ARRAY_CONFIG_MIC_COUNT][8]; + mic_array_decimator_conf_t decimator_conf; + memset(&decimator_conf, 0, sizeof(decimator_conf)); + mic_array_filter_conf_t filter_conf[2] = {{0}}; + + // decimator + decimator_conf.filter_conf = &filter_conf[0]; + decimator_conf.num_filter_stages = 2; + //filter stage 1 + filter_conf[0].coef = (int32_t*)stage_1_filter(stg2_dec_factor); + filter_conf[0].num_taps = 256; + filter_conf[0].decimation_factor = 32; + filter_conf[0].shr = 0; + filter_conf[0].state_words_per_channel = filter_conf[0].num_taps/32; + filter_conf[0].state = (int32_t*)stg1_filter_state; + + // filter stage 2 + filter_conf[1].coef = (int32_t*)stage_2_filter(stg2_dec_factor); + filter_conf[1].num_taps = stage_2_num_taps(stg2_dec_factor); + filter_conf[1].decimation_factor = stg2_dec_factor; + filter_conf[1].shr = stage_2_shift(stg2_dec_factor); + filter_conf[1].state_words_per_channel = decimator_conf.filter_conf[1].num_taps; + filter_conf[1].state = stage_2_state_memory(stg2_dec_factor); + + pdm_rx_conf_t pdm_rx_config; + pdm_rx_config.pdm_out_words_per_channel = stg2_dec_factor; + pdm_rx_config.pdm_out_block = get_pdm_rx_out_block(stg2_dec_factor); + pdm_rx_config.pdm_in_double_buf = get_pdm_rx_out_block_double_buf(stg2_dec_factor); + pdm_rx_config.num_channels_in = MIC_ARRAY_CONFIG_MIC_IN_COUNT; + pdm_rx_config.num_channels_out = MIC_ARRAY_CONFIG_MIC_COUNT; + + s_mics->Decimator.Init(decimator_conf, pdm_rx_config.pdm_out_words_per_channel); + + s_mics->PdmRx.Init(pdm_res->p_pdm_mics, pdm_rx_config); + + if(channel_map) { + s_mics->PdmRx.MapChannels(channel_map); + } - use_3_stg_decimator = false; + int divide = pdm_res->mclk_freq / pdm_res->pdm_freq; + mic_array_resources_configure(pdm_res, divide); + mic_array_pdm_clock_start(pdm_res); +} - unsigned stg2_decimation_factor = (pdm_res->pdm_freq/STAGE1_DEC_FACTOR)/output_samp_freq; - assert ((output_samp_freq*STAGE1_DEC_FACTOR*stg2_decimation_factor) == pdm_res->pdm_freq); // assert if it doesn't divide cleanly - // assert if unsupported decimation factor. (for example. when starting with a pdm_freq of 3.072MHz, supported - // output sampling freqs are [48000, 32000, 16000] - assert ((stg2_decimation_factor == 2) || (stg2_decimation_factor == 3) || (stg2_decimation_factor == 6)); - static uint8_t __attribute__((aligned(8))) mic_storage[sizeof(TMicArray)]; - g_mics = new (mic_storage) TMicArray(); - init_mics_default_filter(g_mics, pdm_res, channel_map, stg2_decimation_factor); +void init_mic_array_storage(bool use_3_stg_decimator) +{ + assert(s_mics == nullptr && s_mics_3stg == nullptr); // Mic array instance already initialised + s_use_3_stg_decimator = use_3_stg_decimator; + if(s_use_3_stg_decimator) { + static uint8_t __attribute__((aligned(8))) mic_storage[sizeof(TMicArray_3stg_decimator)]; + s_mics_3stg = new (mic_storage) TMicArray_3stg_decimator(); + } else { + static uint8_t __attribute__((aligned(8))) mic_storage[sizeof(TMicArray)]; + s_mics = new (mic_storage) TMicArray(); + } } template -static inline void init_from_conf(TMics*& mics_ptr, - uint8_t* storage, - pdm_rx_resources_t* pdm_res, - mic_array_conf_t* conf) { - mics_ptr = new (storage) TMics(); - mics_ptr->Decimator.Init(conf->decimator_conf); +static inline void init_from_conf(TMics*& mics_ptr, pdm_rx_resources_t* pdm_res, mic_array_conf_t* conf) +{ + mics_ptr->Decimator.Init(conf->decimator_conf, conf->pdmrx_conf.pdm_out_words_per_channel); mics_ptr->PdmRx.Init(pdm_res->p_pdm_mics, conf->pdmrx_conf); if (conf->pdmrx_conf.channel_map) { mics_ptr->PdmRx.MapChannels(conf->pdmrx_conf.channel_map); @@ -54,69 +102,72 @@ static inline void init_from_conf(TMics*& mics_ptr, mics_ptr->PdmRx.AssertOnDroppedBlock(false); } -void mic_array_init_custom_filter(pdm_rx_resources_t* pdm_res, - mic_array_conf_t* mic_array_conf) +void init_mics_custom_filter(pdm_rx_resources_t* pdm_res, mic_array_conf_t* mic_array_conf) { - assert(pdm_res); - assert(mic_array_conf); - assert(g_mics == nullptr && g_mics_3stg == nullptr); - static uint8_t __attribute__((aligned(8))) mic_storage[sizeof(UAnyMicArray)]; - - if(mic_array_conf->decimator_conf.num_filter_stages == 2) - { - use_3_stg_decimator = false; - init_from_conf(g_mics, mic_storage, pdm_res, mic_array_conf); - } - else if(mic_array_conf->decimator_conf.num_filter_stages == 3) - { - init_from_conf(g_mics_3stg, mic_storage, pdm_res, mic_array_conf); - use_3_stg_decimator = true; + if((mic_array_conf->decimator_conf.num_filter_stages == 1) || (mic_array_conf->decimator_conf.num_filter_stages == 2)) { + init_from_conf(s_mics, pdm_res, mic_array_conf); + } else if(mic_array_conf->decimator_conf.num_filter_stages == 3) { + init_from_conf(s_mics_3stg, pdm_res, mic_array_conf); + } else { + assert(false && "Unsupported number of filter stages in mic_array_conf"); } - // Configure and start clocks - const unsigned divide = pdm_res->mclk_freq / pdm_res->pdm_freq; - mic_array_resources_configure(pdm_res, divide); - mic_array_pdm_clock_start(pdm_res); } +void init_mics_custom_filter_1mic_1stg_decimator(pdm_rx_resources_t* pdm_res, mic_array_conf_t* mic_array_conf) +{ + assert(mic_array_conf->pdmrx_conf.pdm_out_words_per_channel <= TMicArray::MAX_PDM_OUT_WORDS_PER_CHANNEL); + s_run_1mic_1stg_decimator = true; + init_mics_custom_filter(pdm_res, mic_array_conf); +} ///////////////////// // Mic array start // ///////////////////// - -// Parallel jobs for when XUA_PDM_MIC_USE_PDM_ISR == 0, run separate decimator and pdm rx tasks -DECLARE_JOB(default_ma_task_start_pdm, (TMicArray&)); -void default_ma_task_start_pdm(TMicArray& mics){ - mics.PdmRx.ThreadEntry(); -} - -DECLARE_JOB(default_ma_task_start_decimator, (TMicArray&, chanend_t)); -void default_ma_task_start_decimator(TMicArray& mics, chanend_t c_audio_frames){ - mics.ThreadEntry(); +void set_output_channel(chanend_t c_frames_out) +{ + if (s_use_3_stg_decimator) { + assert(s_mics_3stg != nullptr); + s_mics_3stg->OutputHandler.FrameTx.SetChannel(c_frames_out); + } else { + assert(s_mics != nullptr); + s_mics->OutputHandler.FrameTx.SetChannel(c_frames_out); + } } -DECLARE_JOB(default_ma_task_start_pdm_3stg, (TMicArray_3stg_decimator&)); -void default_ma_task_start_pdm_3stg(TMicArray_3stg_decimator& mics){ - mics.PdmRx.ThreadEntry(); -} +void shutdown_mic_array(void) +{ + if (s_use_3_stg_decimator) { + s_mics_3stg->~TMicArray_3stg_decimator(); + } + else { + s_mics->~TMicArray(); + } -DECLARE_JOB(default_ma_task_start_decimator_3stg, (TMicArray_3stg_decimator&, chanend_t)); -void default_ma_task_start_decimator_3stg(TMicArray_3stg_decimator& mics, chanend_t c_audio_frames){ - mics.ThreadEntry(); + s_mics_3stg = nullptr; + s_mics = nullptr; + s_use_3_stg_decimator = false; + s_run_1mic_1stg_decimator = false; } #if defined(__XS3A__) -#define CLRSR(c) asm volatile("clrsr %0" : : "n"(c)); +#define CLEAR_KEDI() asm volatile("clrsr %0" : : "n"(XS1_SR_KEDI_MASK)); +#elif defined(__VX4B__) +// VX4 processors do not have a dual-issue mode due to VLIW instructions. +// Remove any definition of CLEAR_KEDI so any acciddental use of it will be caught at compile time. +#undef CLEAR_KEDI #else -#define CLRSR(c) ((void)0) -#warning "CLRSR not defined for this architecture." +#undef CLEAR_KEDI // Catch at compile time if attempting to use CLEAR_KEDI on unsupported architectures. #endif -#define CLEAR_KEDI() CLRSR(XS1_SR_KEDI_MASK) template void start_mics_with_pdm_isr(TMics* mics_ptr, chanend_t c_frames_out) { assert(mics_ptr != nullptr); - CLEAR_KEDI(); + + #if defined(__XS3A__) + CLEAR_KEDI(); // Disable dual-issue mode on XS3A processors. VX4 processors do not have a dual-issue mode. + #endif + mics_ptr->OutputHandler.FrameTx.SetChannel(c_frames_out); mics_ptr->PdmRx.AssertOnDroppedBlock(false); mics_ptr->PdmRx.InstallISR(); @@ -124,56 +175,59 @@ void start_mics_with_pdm_isr(TMics* mics_ptr, chanend_t c_frames_out) mics_ptr->ThreadEntry(); } -void mic_array_start( - chanend_t c_frames_out) +void start_mic_array_pdm_isr(chanend_t c_frames_out) { #if MIC_ARRAY_CONFIG_USE_PDM_ISR - if (use_3_stg_decimator) { - start_mics_with_pdm_isr(g_mics_3stg, c_frames_out); + if (s_use_3_stg_decimator) { + start_mics_with_pdm_isr(s_mics_3stg, c_frames_out); } else { - start_mics_with_pdm_isr(g_mics, c_frames_out); - } -#else - if (use_3_stg_decimator) { - assert(g_mics_3stg != nullptr); // Attempting to start mic_array before initialising it - g_mics_3stg->OutputHandler.FrameTx.SetChannel(c_frames_out); - PAR_JOBS( - PJOB(default_ma_task_start_pdm_3stg, (*g_mics_3stg)), - PJOB(default_ma_task_start_decimator_3stg, (*g_mics_3stg, c_frames_out))); - } - else - { - g_mics->OutputHandler.FrameTx.SetChannel(c_frames_out); - PAR_JOBS( - PJOB(default_ma_task_start_pdm, (*g_mics)), - PJOB(default_ma_task_start_decimator, (*g_mics, c_frames_out))); + start_mics_with_pdm_isr(s_mics, c_frames_out); } #endif - // shutdown - if (use_3_stg_decimator) { - g_mics_3stg->~TMicArray_3stg_decimator(); - g_mics_3stg = nullptr; +} + +// Helper functions for starting separate tasks +void start_pdm_task(void) +{ + s_mics->PdmRx.ThreadEntry(); +} + +void start_decimator_task() +{ + if(s_run_1mic_1stg_decimator) { + s_mics->ThreadEntryLowPower_1Mic1StgDecimator(); } else { - g_mics->~TMicArray(); - g_mics = nullptr; + s_mics->ThreadEntry(); } } + +void start_pdm_task_3stg(void) +{ + s_mics_3stg->PdmRx.ThreadEntry(); +} + +void start_decimator_task_3stg(void) +{ + s_mics_3stg->ThreadEntry(); +} + // Override pdm data port. Only used in tests where a chanend is used as a 'port' for input pdm data. void _mic_array_override_pdm_port(chanend_t c_pdm) { - if (use_3_stg_decimator) { - assert(g_mics_3stg != nullptr); - g_mics_3stg->PdmRx.SetPort((port_t)c_pdm); + if (s_use_3_stg_decimator) { + assert(s_mics_3stg != nullptr); + s_mics_3stg->PdmRx.SetPort((port_t)c_pdm); } else { - assert(g_mics != nullptr); - g_mics->PdmRx.SetPort((port_t)c_pdm); + assert(s_mics != nullptr); + s_mics->PdmRx.SetPort((port_t)c_pdm); } } // C wrapper -extern "C" void _mic_array_override_pdm_port_c(chanend_t c_pdm) +MA_C_API +void _mic_array_override_pdm_port_c(chanend_t c_pdm) { _mic_array_override_pdm_port(c_pdm); } diff --git a/lib_mic_array/src/mic_array_task_internal.hpp b/lib_mic_array/src/mic_array_task_internal.hpp index 33386bd8..e9613d06 100644 --- a/lib_mic_array/src/mic_array_task_internal.hpp +++ b/lib_mic_array/src/mic_array_task_internal.hpp @@ -6,6 +6,7 @@ #include "mic_array.h" #include "mic_array/etc/filters_default.h" +#ifdef __cplusplus using TMicArray = mic_array::MicArray, mic_array::StandardPdmRxService>; -union UAnyMicArray { - TMicArray m_2stg; - TMicArray_3stg_decimator m_3stg; -}; - union UStg2_filter_state { int32_t filter_state_df_6[MIC_ARRAY_CONFIG_MIC_COUNT][STAGE2_TAP_COUNT]; int32_t filter_state_df_3[MIC_ARRAY_CONFIG_MIC_COUNT][MIC_ARRAY_32K_STAGE_2_TAP_COUNT]; @@ -54,11 +50,9 @@ union UPdmRx_out_block_double_buf { uint32_t __attribute__((aligned (8))) out_block_double_buf_df_2[2][MIC_ARRAY_CONFIG_MIC_IN_COUNT * 2]; }; -extern TMicArray* g_mics; - -UStg2_filter_state stg2_filter_state_mem; -UPdmRx_out_block pdm_rx_out_block; -UPdmRx_out_block_double_buf __attribute__((aligned (8))) pdm_rx_out_block_double_buf; // deinterleave() functions expect dword alignment +union UStg2_filter_state stg2_filter_state_mem; +union UPdmRx_out_block pdm_rx_out_block; +union UPdmRx_out_block_double_buf __attribute__((aligned (8))) pdm_rx_out_block_double_buf; // deinterleave() functions expect dword alignment inline const uint32_t* stage_1_filter(unsigned stg2_dec_factor) { // stg2 decimation factor also seems to affect the stage1 filter used @@ -91,45 +85,40 @@ inline uint32_t* get_pdm_rx_out_block_double_buf(unsigned stg2_dec_factor) { : (uint32_t*)pdm_rx_out_block_double_buf.out_block_double_buf_df_2); } -inline void init_mics_default_filter(TMicArray* m, pdm_rx_resources_t* pdm_res, const unsigned* channel_map, unsigned stg2_dec_factor) { - static int32_t stg1_filter_state[MIC_ARRAY_CONFIG_MIC_COUNT][8]; - mic_array_decimator_conf_t decimator_conf; - memset(&decimator_conf, 0, sizeof(decimator_conf)); - mic_array_filter_conf_t filter_conf[2] = {{0}}; - - // decimator - decimator_conf.filter_conf = &filter_conf[0]; - decimator_conf.num_filter_stages = 2; - //filter stage 1 - filter_conf[0].coef = (int32_t*)stage_1_filter(stg2_dec_factor); - filter_conf[0].num_taps = 256; - filter_conf[0].decimation_factor = 32; - filter_conf[0].shr = 0; - filter_conf[0].state_words_per_channel = filter_conf[0].num_taps/32; - filter_conf[0].state = (int32_t*)stg1_filter_state; - - // filter stage 2 - filter_conf[1].coef = (int32_t*)stage_2_filter(stg2_dec_factor); - filter_conf[1].num_taps = stage_2_num_taps(stg2_dec_factor); - filter_conf[1].decimation_factor = stg2_dec_factor; - filter_conf[1].shr = stage_2_shift(stg2_dec_factor); - filter_conf[1].state_words_per_channel = decimator_conf.filter_conf[1].num_taps; - filter_conf[1].state = stage_2_state_memory(stg2_dec_factor); - - m->Decimator.Init(decimator_conf); - - pdm_rx_conf_t pdm_rx_config; - pdm_rx_config.pdm_out_words_per_channel = stg2_dec_factor; - pdm_rx_config.pdm_out_block = get_pdm_rx_out_block(stg2_dec_factor); - pdm_rx_config.pdm_in_double_buf = get_pdm_rx_out_block_double_buf(stg2_dec_factor); - - - m->PdmRx.Init(pdm_res->p_pdm_mics, pdm_rx_config); - - if(channel_map) { - m->PdmRx.MapChannels(channel_map); - } - int divide = pdm_res->mclk_freq / pdm_res->pdm_freq; - mic_array_resources_configure(pdm_res, divide); - mic_array_pdm_clock_start(pdm_res); -} +#endif // __cplusplus + +MA_C_API +bool get_decimator_stg_count(void); + +MA_C_API +void init_mic_array_storage(bool use_3_stg_decimator); + +MA_C_API +void init_mics_custom_filter(pdm_rx_resources_t* pdm_res, mic_array_conf_t* mic_array_conf); + +MA_C_API +void init_mics_custom_filter_1mic_1stg_decimator(pdm_rx_resources_t* pdm_res, mic_array_conf_t* mic_array_conf); + +MA_C_API +void init_mics_default_filter(pdm_rx_resources_t* pdm_res, const unsigned* channel_map, unsigned stg2_dec_factor); + +MA_C_API +void set_output_channel(chanend_t c_frames_out); + +MA_C_API +void shutdown_mic_array(void); + +MA_C_API +void start_decimator_task(); + +MA_C_API +void start_decimator_task_3stg(void); + +MA_C_API +void start_mic_array_pdm_isr(chanend_t c_frames_out); + +MA_C_API +void start_pdm_task(void); + +MA_C_API +void start_pdm_task_3stg(void); diff --git a/lib_mic_array/src/pdm_rx_isr.S b/lib_mic_array/src/pdm_rx_isr.S index 96c61243..f085f74d 100644 --- a/lib_mic_array/src/pdm_rx_isr.S +++ b/lib_mic_array/src/pdm_rx_isr.S @@ -105,7 +105,127 @@ pdm_rx_isr: kret .L_func_end: .cc_bottom pdm_rx_isr.function - .global pdm_rx_isr #endif //defined(__XS3A__) + +#if defined(__VX4A__) || defined(__VX4B__) + +//TODO: This function is not verified. + +#define STRUCT_NAME pdm_rx_isr_context +#define FUNCTION_NAME pdm_rx_isr +#define NSTACK_WORDS 8 +#define NSTACK_BYTES (NSTACK_WORDS*4) + +.section .data.STRUCT_NAME, "aw" +.p2align 1 +.globl STRUCT_NAME +STRUCT_NAME: + .word 0 // .L_port + .word 0 // .L_buffA + .word 0 // .L_buffB + .word 0 // .L_phase1 + .word 0 // .L_phase1_reset + .word 0 // .L_c_out + .word 0 // .L_credit + .word -1 // .L_missed_blocks + +.section .text +.p2align 4 +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + unimp //TODO - this function is not verified and may contain errors, so prevent it from being accidentally used without review + addi sp, sp, -16 + sw s2, 0(sp) + sw s3, 4(sp) + sw s4, 8(sp) + sw s5, 12(sp) + + // Load base address of context + la s0, pdm_rx_isr_context + + // Read PDM port (offset 0) + lw s2, 0(s0) + xm.in s2, s2 + + // Store sample into active buffer + lw s5, 4(s0) // .L_buffA at offset 4 + lw s4, 12(s0) // .L_phase1 at offset 12 + xm.stw s2, s4(s5) + + // Emit buffer if phase reached zero + beqz s4, .L_emit + + // Decrement phase counter + addi s4, s4, -1 + sw s4, 12(s0) // Store back to .L_phase1 + + // Restore context and return + lw s2, 0(sp) + lw s3, 4(sp) + lw s4, 8(sp) + lw s5, 12(sp) + addi sp, sp, 16 + csrr t5, xm.smtval + csrw mtval, t5 + mret + +.L_emit: + // Reset phase counter + lw s2, 16(s0) // .L_phase1_reset at offset 16 + sw s2, 12(s0) // Store to .L_phase1 + + // Swap PDM buffers + lw s2, 8(s0) // .L_buffB at offset 8 + sw s2, 4(s0) // Store to .L_buffA + sw s5, 8(s0) // Store to .L_buffB + + // Check transmit credit + lw s2, 24(s0) // .L_credit at offset 24 + bnez s2, .L_has_credit + +.L_no_credit: + // Undo buffer swap + lw s2, 4(s0) // .L_buffA + sw s2, 8(s0) // .L_buffB + lw s2, 8(s0) // restored .L_buffB + sw s2, 4(s0) // .L_buffA + + // Increment missed block counter + lw s2, 28(s0) // .L_missed_blocks at offset 28 + xm.not s5, s2 + xm.assert s5 + addi s2, s2, 1 + sw s2, 28(s0) + tail .L_finish + +.L_has_credit: + // Consume credit and send buffer + addi s2, s2, -1 + sw s2, 24(s0) // Store back to .L_credit + lw s2, 20(s0) // .L_c_out at offset 20 + xm.out s2, s5 + +.L_finish: + // Restore context and exit ISR + lw s2, 0(sp) + lw s3, 4(sp) + lw s4, 8(sp) + lw s5, 12(sp) + addi sp, sp, 16 + csrr t5, xm.smtval + csrw mtval, t5 + mret + +.L_func_end: + +.size FUNCTION_NAME, . -FUNCTION_NAME +.resource_const FUNCTION_NAME, "stack_frame_bytes", NSTACK_BYTES +.resource_list_empty FUNCTION_NAME, "callees" +.resource_list_empty FUNCTION_NAME, "tail_callees" +.resource_list_empty FUNCTION_NAME, "parallel_callees" + + +#endif // __VX4A__ || __VX4B__ diff --git a/python/mic_array/filters.py b/python/mic_array/filters.py index 6e11fd68..20f1f3cc 100644 --- a/python/mic_array/filters.py +++ b/python/mic_array/filters.py @@ -217,9 +217,12 @@ def DecimationFactor(self): def NumStages(self): return 2 - def Filter(self, pdm_signal: np.ndarray) -> np.ndarray: + def Filter(self, pdm_signal: np.ndarray, stg1_only=False) -> np.ndarray: s1_output = self.s1.FilterInt16(pdm_signal) - return self.s2.FilterInt32(s1_output) + if stg1_only: + return s1_output + else: + return self.s2.FilterInt32(s1_output) class ThreeStageFilter(object): diff --git a/tests/requirements.txt b/tests/requirements.txt index 2dd37e03..55949430 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,5 @@ -# python_version 3.11.9 +# pip_version 25.* +# python_version 3.12 pytest==8.3.2 pytest-xdist==3.6.1 diff --git a/tests/signal/BasicMicArray/CMakeLists.txt b/tests/signal/BasicMicArray/CMakeLists.txt index 86674faf..d8066a5b 100644 --- a/tests/signal/BasicMicArray/CMakeLists.txt +++ b/tests/signal/BasicMicArray/CMakeLists.txt @@ -6,7 +6,25 @@ set(XMOS_SANDBOX_DIR ${CMAKE_CURRENT_LIST_DIR}/../../../..) set(APP_DEPENDENT_MODULES "lib_mic_array") -set(APP_HW_TARGET XK-EVK-XU316) +# conditional depending on target +if(CMAKE_C_COMPILER_VERSION VERSION_EQUAL "3.6.0") # XS3 (XTC 15.3.1) + set(APP_HW_TARGET XK-EVK-XU316) + set(COMMON_COMPILER_FLAGS -O2 + -g + -report + -mcmodel=large + -Wno-xcore-fptrgroup + -Wno-unknown-pragmas + -Wno-format) + +else() # VX4 + set(APP_HW_TARGET XK-EVK-XU416) + set(COMMON_COMPILER_FLAGS -Os + -g + -lxc + -Wno-fptrgroup + -Wno-format) +endif() set_property(DIRECTORY "${CMAKE_CURRENT_LIST_DIR}" PROPERTY CMAKE_CONFIGURE_DEPENDS "${CMAKE_CURRENT_LIST_DIR}/test_params.json") @@ -32,6 +50,12 @@ math(EXPR NUM_FRAME_SIZE "${NUM_FRAME_SIZE} - 1") math(EXPR NUM_USE_ISR "${NUM_USE_ISR} - 1") math(EXPR NUM_SAMP_FREQ "${NUM_SAMP_FREQ} - 1") +# Remove ISR if vx4 as it's not supported + if (APP_HW_TARGET STREQUAL "XK-EVK-XU416") + set(USE_ISR_LIST "[0]") + set(NUM_USE_ISR 0) +endif() + # Count how many SAMP_FREQ entries are custom (.pkl) set(CUSTOM_PKL_COUNT 0) foreach(idx RANGE 0 ${NUM_SAMP_FREQ}) @@ -84,13 +108,7 @@ foreach(l RANGE 0 ${NUM_SAMP_FREQ}) set(CONFIG "${N_MICS}ch_${FRAME_SIZE}smp_${USE_ISR}isr_${samp_freq_str}") message(${CONFIG}) - set(APP_COMPILER_FLAGS_${CONFIG} -O2 - -g - -report - -mcmodel=large - -Wno-xcore-fptrgroup - -Wno-unknown-pragmas - -Wno-format + set(APP_COMPILER_FLAGS_${CONFIG} ${COMMON_COMPILER_FLAGS} -DMIC_ARRAY_CONFIG_USE_PDM_ISR=${USE_ISR} -DMIC_ARRAY_CONFIG_SAMPLES_PER_FRAME=${FRAME_SIZE} -DMIC_ARRAY_CONFIG_MIC_COUNT=${N_MICS} @@ -106,6 +124,15 @@ endforeach() set(APP_INCLUDES src ${AUTOGEN_OUT_DIR}) +set(APP_COMPILER_FLAGS_lp_1stg_decimator ${COMMON_COMPILER_FLAGS} + -DMIC_ARRAY_CONFIG_USE_PDM_ISR=0 + -DMIC_ARRAY_CONFIG_SAMPLES_PER_FRAME=2 + -DMIC_ARRAY_CONFIG_MIC_COUNT=1 + -DMIC_ARRAY_CONFIG_USE_DC_ELIMINATION=0 + -DAPP_SAMP_FREQ=24000 + -DUSE_CUSTOM_FILTER=0 + -DAPP_CONFIG_LOW_POWER=1 + ) XMOS_REGISTER_APP() foreach(target ${APP_BUILD_TARGETS}) diff --git a/tests/signal/BasicMicArray/micarray_device.py b/tests/signal/BasicMicArray/micarray_device.py index 98770b11..bc646d0e 100644 --- a/tests/signal/BasicMicArray/micarray_device.py +++ b/tests/signal/BasicMicArray/micarray_device.py @@ -41,7 +41,7 @@ def send_command(self, cmd_id): # Then, send the command ID self.send_word(cmd_id) - def process_signal(self, signal: PdmSignal): + def process_signal(self, signal: PdmSignal, sample_count_override=None): # First, send the entire signal to the device. Any output it sends over the # data probe will get queued up by our parent class, so that it doesn't back @@ -53,7 +53,10 @@ def process_signal(self, signal: PdmSignal): sig_bytes = signal.to_bytes_interleaved() self.send_bytes(sig_bytes) - sample_count = signal.len // ( 32 * self.param["s2.dec_factor"] * self.param["s3.dec_factor"]) + if sample_count_override: + sample_count = sample_count_override + else: + sample_count = signal.len // ( 32 * self.param["s2.dec_factor"] * self.param["s3.dec_factor"]) device_output = np.zeros((self.param["channels"], sample_count), dtype=np.int32) diff --git a/tests/signal/BasicMicArray/small_768k_to_12k_filter_int.pkl b/tests/signal/BasicMicArray/small_768k_to_12k_filter_int.pkl new file mode 100644 index 00000000..54a52734 Binary files /dev/null and b/tests/signal/BasicMicArray/small_768k_to_12k_filter_int.pkl differ diff --git a/tests/signal/BasicMicArray/src/app.c b/tests/signal/BasicMicArray/src/app.c index f1bae818..e8234310 100644 --- a/tests/signal/BasicMicArray/src/app.c +++ b/tests/signal/BasicMicArray/src/app.c @@ -16,12 +16,25 @@ #include #include "mic_array.h" +#include "app_config.h" #if USE_CUSTOM_FILTER #include "custom_filter.h" #endif -#define BUFF_SIZE (256) +#if APP_CONFIG_LOW_POWER +#include "small_768k_to_12k_filter.h" +#endif + +#define BUFF_SIZE (256) + +#ifndef META_OUT +#define META_OUT (0) +#endif + +#ifndef DATA_OUT +#define DATA_OUT (1) +#endif typedef chanend_t streaming_chanend_t; @@ -54,9 +67,23 @@ void hwtimer_delay_microseconds(unsigned delay) { hwtimer_free(tmr); } -static +static void get_filter_config(unsigned fs, filt_config_t *cfg) { -#if !USE_CUSTOM_FILTER + +#if APP_CONFIG_LOW_POWER + cfg->stg1_tap_count = SMALL_768K_TO_12K_FILTER_STG1_TAP_COUNT; + cfg->stg1_decimation_factor = SMALL_768K_TO_12K_FILTER_STG1_DECIMATION_FACTOR; + cfg->stg2_tap_count = SMALL_768K_TO_12K_FILTER_STG2_TAP_COUNT; + cfg->stg2_decimation_factor = SMALL_768K_TO_12K_FILTER_STG2_DECIMATION_FACTOR; + cfg->stg1_coef_ptr = small_768k_to_12k_filter_stg1_coef; + cfg->stg2_coef_ptr = small_768k_to_12k_filter_stg2_coef; + cfg->stg2_shr = SMALL_768K_TO_12K_FILTER_STG2_SHR; + + cfg->stg3_tap_count = 0; + cfg->stg3_decimation_factor = 1; // for PDM RX block size calculation in the test to work for both 2 and 3 stage filters + cfg->stg3_coef_ptr = NULL; + cfg->stg3_shr = 0; +#elif !USE_CUSTOM_FILTER cfg->stg1_tap_count = 256; cfg->stg1_decimation_factor = 32; @@ -194,7 +221,7 @@ int send_words_to_app(streaming_chanend_t c_to_app, char* buff, int buff_lvl) buff_lvl -= sizeof(int); hwtimer_delay_microseconds(15); } - if(buff_lvl) + if(buff_lvl) { memmove(&buff[0], &next_word[0], buff_lvl); } @@ -263,9 +290,44 @@ static void init_mic_conf(mic_array_conf_t *mic_array_conf, mic_array_filter_con mic_array_conf->pdmrx_conf.pdm_out_block = (uint32_t*)pdmrx_out_block; mic_array_conf->pdmrx_conf.pdm_in_double_buf = (uint32_t*)pdmrx_out_block_double_buf; mic_array_conf->pdmrx_conf.channel_map = channel_map; + mic_array_conf->pdmrx_conf.num_channels_in = MIC_ARRAY_CONFIG_MIC_COUNT; + mic_array_conf->pdmrx_conf.num_channels_out = MIC_ARRAY_CONFIG_MIC_COUNT; } #endif +#if APP_CONFIG_LOW_POWER +static +void init_mic_conf_lp_filter( + mic_array_conf_t *mic_array_conf, + mic_array_filter_conf_t filter_conf[2], + unsigned *channel_map) +{ + static int32_t stg1_filter_state[MIC_ARRAY_CONFIG_MIC_COUNT][8]; + static int32_t stg2_filter_state[MIC_ARRAY_CONFIG_MIC_COUNT][SMALL_768K_TO_12K_FILTER_STG2_TAP_COUNT]; + memset(mic_array_conf, 0, sizeof(mic_array_conf_t)); + + //decimator + mic_array_conf->decimator_conf.filter_conf = &filter_conf[0]; + mic_array_conf->decimator_conf.num_filter_stages = 1; + // filter stage 1 + filter_conf[0].coef = (int32_t*)small_768k_to_12k_filter_stg1_coef; + filter_conf[0].num_taps = SMALL_768K_TO_12K_FILTER_STG1_TAP_COUNT; + filter_conf[0].decimation_factor = SMALL_768K_TO_12K_FILTER_STG1_DECIMATION_FACTOR; + filter_conf[0].state = (int32_t*)stg1_filter_state; + filter_conf[0].shr = SMALL_768K_TO_12K_FILTER_STG1_SHR; + filter_conf[0].state_words_per_channel = filter_conf[0].num_taps/32; // works on 1-bit samples + + // pdm rx + static uint32_t pdmrx_out_block[MIC_ARRAY_CONFIG_MIC_COUNT][MIC_ARRAY_CONFIG_SAMPLES_PER_FRAME]; + static uint32_t pdmrx_out_block_double_buf[2][MIC_ARRAY_CONFIG_MIC_COUNT * MIC_ARRAY_CONFIG_SAMPLES_PER_FRAME] __attribute__((aligned(8))); + mic_array_conf->pdmrx_conf.pdm_out_words_per_channel = MIC_ARRAY_CONFIG_SAMPLES_PER_FRAME; + mic_array_conf->pdmrx_conf.pdm_out_block = (uint32_t*)pdmrx_out_block; + mic_array_conf->pdmrx_conf.pdm_in_double_buf = (uint32_t*)pdmrx_out_block_double_buf; + mic_array_conf->pdmrx_conf.channel_map = channel_map; + mic_array_conf->pdmrx_conf.num_channels_in = MIC_ARRAY_CONFIG_MIC_COUNT; + mic_array_conf->pdmrx_conf.num_channels_out = MIC_ARRAY_CONFIG_MIC_COUNT; +} +#endif // ------------------------------- THREADS ------------------------------- @@ -273,7 +335,12 @@ void app_mic( chanend_t c_pdm_in, chanend_t c_frames_out) //non-streaming { -#if !USE_CUSTOM_FILTER +#if APP_CONFIG_LOW_POWER + mic_array_conf_t mic_array_conf; + mic_array_filter_conf_t filter_conf[NUM_DECIMATION_STAGES]; + init_mic_conf_lp_filter(&mic_array_conf, filter_conf, NULL); + mic_array_init_custom_filter_1mic_1stg_decimator(&pdm_res, &mic_array_conf); +#elif !USE_CUSTOM_FILTER mic_array_init(&pdm_res, NULL, APP_SAMP_FREQ); #else mic_array_conf_t mic_array_conf; @@ -317,7 +384,6 @@ void app_output_task(chanend_t c_frames_in, chanend_t c_fifo) // receive the output of the mic array and send it to the host via a fifo to decouple the backpressure from xscope int32_t frame[MIC_ARRAY_CONFIG_MIC_COUNT][MIC_ARRAY_CONFIG_SAMPLES_PER_FRAME]; uint8_t fifo_idx = 0; - while(1){ ma_frame_rx(&frame[0][0], c_frames_in, MIC_ARRAY_CONFIG_MIC_COUNT, MIC_ARRAY_CONFIG_SAMPLES_PER_FRAME); memcpy(frame_fifo[fifo_idx], &frame[0][0], sizeof(ma_frame_t)); @@ -382,7 +448,7 @@ int main(){ streaming_channel_t c_to_app = s_chan_alloc(); // xscope init note: only one channel end is needed - // the second one and the xscope service will be + // the second one and the xscope service will be // automatically started and routed by the tools chanend_t xscope_chan = chanend_alloc(); xscope_mode_lossless(); diff --git a/tests/signal/BasicMicArray/src/app_config.h b/tests/signal/BasicMicArray/src/app_config.h new file mode 100644 index 00000000..06d5c0d1 --- /dev/null +++ b/tests/signal/BasicMicArray/src/app_config.h @@ -0,0 +1,23 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#pragma once + +#if defined(__VX4B__) + +#include + +#ifndef PORT_MCLK_IN +#define PORT_MCLK_IN VX_PORT_1D +#endif + +#ifndef PORT_PDM_CLK +#define PORT_PDM_CLK VX_PORT_1G +#endif + +#ifndef PORT_PDM_DATA +#define PORT_PDM_DATA VX_PORT_1F +#endif + + +#endif diff --git a/tests/signal/BasicMicArray/config.xscope b/tests/signal/BasicMicArray/src/config.xscope similarity index 100% rename from tests/signal/BasicMicArray/config.xscope rename to tests/signal/BasicMicArray/src/config.xscope diff --git a/tests/signal/BasicMicArray/src/small_768k_to_12k_filter.h b/tests/signal/BasicMicArray/src/small_768k_to_12k_filter.h new file mode 100644 index 00000000..867c2290 --- /dev/null +++ b/tests/signal/BasicMicArray/src/small_768k_to_12k_filter.h @@ -0,0 +1,59 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#ifndef SMALL_768K_TO_12K_FILTER_H +#define SMALL_768K_TO_12K_FILTER_H + +/* Autogenerated by running 'python combined.py small_768k_to_12k_filter_int.pkl -fp small_768k_to_12k_filter'. Do not edit */ + +#include + + +#define SMALL_768K_TO_12K_FILTER_STG1_DECIMATION_FACTOR 32 +#define SMALL_768K_TO_12K_FILTER_STG1_TAP_COUNT 256 +#define SMALL_768K_TO_12K_FILTER_STG1_SHR 0 /*shr not relevant for stage 1*/ + + +uint32_t small_768k_to_12k_filter_stg1_coef[128] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF2DBBA, 0x1E443FC2, 0x2788F9F1, 0x1E443FC2, 0x2785DDB4, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF86BEB, 0x1C91CEC9, 0x8DC6F6F6, 0x3B193738, 0x938D7D61, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFDBC29, 0x211BF8E9, 0x323BF6FD, 0xC4C971FD, 0x884943DB, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE89A2, 0x721D515E, 0x02D0A650, 0xB407A8AB, 0x84E45917, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF26BF, 0x614B35F7, 0xE678C631, 0xE67EFACD, 0x286FD64F, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFCA48, 0x0C0BC045, 0x42E8F9F1, 0x742A203D, 0x0301253F, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF358, 0x5EE51139, 0x80C16668, 0x3019C88A, 0x77A1ACFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC6D, 0x3F5E4E54, 0xAB2F696F, 0x4D52A727, 0xAFCB63FF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF8E, 0x553F9533, 0x994F30CF, 0x299CCA9F, 0xCAA71FFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x66554CF0, 0x78DA4025, 0xB1E0F32A, 0xA660FFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x879996A5, 0x5293801C, 0x94AA5699, 0x9E1FFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF81E18C6, 0x631C0003, 0x8C663187, 0x81FFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE01F07, 0x83E00000, 0x7C1E0F80, 0x7FFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE007, 0xFC000000, 0x03FE007F, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x0001FFFF, 0xFFFFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + + +#define SMALL_768K_TO_12K_FILTER_STG2_DECIMATION_FACTOR 2 +#define SMALL_768K_TO_12K_FILTER_STG2_TAP_COUNT 48 +#define SMALL_768K_TO_12K_FILTER_STG2_SHR 1 + + +int32_t small_768k_to_12k_filter_stg2_coef[48] = { +-0x6b2e, 0x9bb0, 0x867bf, 0x6abc3, +-0x1d6951, -0x37fde1, 0x1b8845, 0xad6445, +0x6737ac, -0x11a7f35, -0x1d79ea4, 0x7ee25c, +0x3e05795, 0x27d0754, -0x49e8388, -0x834e523, +0xb8e3a0, 0xe48a501, 0xb3d7d09, -0xe33d15c, +-0x212034e8, -0x6b83320, 0x408190d3, 0x7fffffff, +0x7fffffff, 0x408190d3, -0x6b83320, -0x212034e8, +-0xe33d15c, 0xb3d7d09, 0xe48a501, 0xb8e3a0, +-0x834e523, -0x49e8388, 0x27d0754, 0x3e05795, +0x7ee25c, -0x1d79ea4, -0x11a7f35, 0x6737ac, +0xad6445, 0x1b8845, -0x37fde1, -0x1d6951, +0x6abc3, 0x867bf, 0x9bb0, -0x6b2e, +}; + +#define NUM_DECIMATION_STAGES (2) + +#endif diff --git a/tests/signal/BasicMicArray/test_mic_array.py b/tests/signal/BasicMicArray/test_mic_array.py index 9e2a4fd9..66b84b1a 100644 --- a/tests/signal/BasicMicArray/test_mic_array.py +++ b/tests/signal/BasicMicArray/test_mic_array.py @@ -126,3 +126,70 @@ def test_BasicMicArray(self, request, chans, frame_size, use_isr, fs): threshold = 12 assert result_diff <= threshold, f"max diff between python and xcore mic array output ({result_diff}) exceeds threshold ({threshold})" + + @pytest.mark.parametrize("decimator_stgs", [1], ids=["1stg"]) + def test_BasicMicArrayLowPower(self, request, decimator_stgs): + cwd = Path(request.fspath).parent + filter = self.filter(Path(__file__).parent / "small_768k_to_12k_filter_int.pkl") + + stg1_output_words_per_frame = int(filter.DecimationFactor / filter.s1.DecimationFactor) + assert stg1_output_words_per_frame == 2 + + samp_per_frame = 32 + frames = request.config.getoption("frames") + + # --- num decimator stages dependent behaviour --- + stg1_only = (decimator_stgs == 1) + samp_total = samp_per_frame * frames * (2 if stg1_only else 1) + device_output_delay_samps = 0 if stg1_only else 1 + sample_override = frames * 2 if stg1_only else None + output_frame_size = 2 if stg1_only else 1 + # ------------------------------------------------- + + sig = PdmSignal.random(1, samp_total) + + expected = filter.Filter(sig.signal, stg1_only=stg1_only) + + if self.print_output: + print(f"Expected output: {expected}") + + cfg = f"lp_{decimator_stgs}stg_decimator" + xe_path = f"{cwd}/bin/{cfg}/test_ma_{cfg}.xe" + assert Path(xe_path).exists(), f"Cannot find {xe_path}" + + with MicArrayDevice( + xe_path, + quiet_xgdb=not self.print_xgdb, + extra_xrun_args="--id 0" + ) as dev: + + assert dev.param["channels"] == 1 + assert dev.param["s1.dec_factor"] == filter.s1.DecimationFactor + assert dev.param["s1.tap_count"] == filter.s1.TapCount + assert dev.param["s2.dec_factor"] == filter.s2.DecimationFactor + assert dev.param["s2.tap_count"] == filter.s2.TapCount + assert dev.param["frame_size"] == output_frame_size + assert dev.param["use_isr"] == 0 + + if self.debug_print_filters: + dev.send_command(DevCommand.PRINT_FILTERS.value) + + device_output = dev.process_signal(sig, sample_count_override=sample_override) + + if self.print_output: + print(f"Device output: {device_output}") + + dev.send_command(DevCommand.TERMINATE.value) + + end = -device_output_delay_samps or None + start = device_output_delay_samps + + result_diff = np.max(np.abs(expected[:, :end] - device_output[:, start:])) + + print(f"result_diff = {result_diff}") + + threshold = 12 + assert result_diff <= threshold, ( + f"max diff between python and xcore mic array output ({result_diff}) " + f"exceeds threshold ({threshold})" + ) \ No newline at end of file diff --git a/tests/signal/BasicMicArray/test_params.json b/tests/signal/BasicMicArray/test_params.json index cdca885c..dc52dce4 100644 --- a/tests/signal/BasicMicArray/test_params.json +++ b/tests/signal/BasicMicArray/test_params.json @@ -3,4 +3,4 @@ "FRAME_SIZE": [1, 16], "USE_ISR": [0, 1], "SAMP_FREQ": [16000, 32000, 48000, "good_3_stage_filter_int.pkl"] -} +} \ No newline at end of file diff --git a/tests/signal/BasicMicArray/test_thdn.py b/tests/signal/BasicMicArray/test_thdn.py index fae62912..08f7f604 100644 --- a/tests/signal/BasicMicArray/test_thdn.py +++ b/tests/signal/BasicMicArray/test_thdn.py @@ -146,3 +146,135 @@ def test_thdn(self, pytestconfig, request, fs, platform): print(f"result_diff = {result_diff}") assert result_diff <= threshold, f"max diff between python and xcore mic array output ({result_diff}) exceeds threshold ({threshold})" + + + def thdn_test_lowpower_uncollect(config, platform, decimator_stgs, test_freq): + level = config.getoption("level") + if level == "smoke": + if "xcore" in platform: + return True # uncollect xcore run for smoke. Takes 2-3mins per test so run in nightly + return False + + def to_float_array(self, x): + """Convert integer array to float64 normalized to [-1, 1], or leave floats unchanged.""" + if np.issubdtype(x.dtype, np.integer): + return x.astype(np.float64) / np.iinfo(x.dtype).max + return x + + @pytest.mark.uncollect_if(func=thdn_test_lowpower_uncollect) + @pytest.mark.parametrize("platform", ["python_only", "python_xcore"]) + @pytest.mark.parametrize("decimator_stgs", [1], ids=["1stg"]) + @pytest.mark.parametrize("test_freq", [300, 5000], ids=["300hz", "5000hz"]) + def test_thdn_lowpower(self, pytestconfig, request, platform, decimator_stgs, test_freq): + duration_s = 2 # running reduced duration. See https://github.com/xmos/lib_mic_array/issues/289 + pdm_freq = 768_000 + + thdn_threshold = { + (12000, 300): -111.0, + (12000, 5000): -105.0, + (24000, 300): -79.0, + (24000, 5000): -76.0, + } + + cwd = Path(request.fspath).parent + filter = self.filter(Path(__file__).parent / "small_768k_to_12k_filter_int.pkl") + + # --- num decimator stages dependent behaviour --- + stg1_only = (decimator_stgs == 1) + dec_factor = filter.s1.DecimationFactor if stg1_only else filter.DecimationFactor + fs = int(pdm_freq / dec_factor) + device_output_delay_samps = 0 if stg1_only else 1 + sample_override = duration_s * fs if stg1_only else None + output_frame_size = 2 if stg1_only else 1 + print(f"decimator_stgs = {decimator_stgs}, fs = {fs}") + # ------------------------------------------------- + + cfg = f"lp_{decimator_stgs}stg_decimator" + xe_path = f"{cwd}/bin/{cfg}/test_ma_{cfg}.xe" + assert Path(xe_path).exists(), f"Cannot find {xe_path}" + + print(f"Test frequency {test_freq}\n") + + # Generate PDM input + # Test one freq at a time since low-power mic array is mono + sig_sine_pdm, sig_sine_pcm = PdmSignal.sine( + [test_freq], + [0.52], + fs, + duration_s, + fs_pdm=pdm_freq + ) + + print("Running python") + expected = filter.Filter(sig_sine_pdm.signal, stg1_only=stg1_only) + + if self.print_output: + print(f"Expected output: {expected}") + + print(f"Expected output shape: {expected.shape}") + + expected_output_float = self.to_float_array(expected) + + input_thdn = THDN(sig_sine_pcm[0], fs, fund_freq=test_freq) + python_output_thdn = THDN(expected_output_float[0], fs, fund_freq=test_freq) + + threshold = thdn_threshold[(fs, test_freq)] + + print( + f"test_freq {test_freq}, " + f"python_output_thdn = {python_output_thdn}, " + f"input_thdn = {input_thdn}" + ) + + assert python_output_thdn < threshold, ( + f"At sampling rate {fs}, test_freq {test_freq}, " + f"Python output THDN {python_output_thdn} exceeds threshold {threshold}" + ) + + if "xcore" in platform: + print("Running xcore") + with MicArrayDevice(xe_path, quiet_xgdb=not self.print_xgdb, extra_xrun_args="--id 0") as dev: + assert dev.param["channels"] == 1 + assert dev.param["s1.dec_factor"] == filter.s1.DecimationFactor + assert dev.param["s1.tap_count"] == filter.s1.TapCount + assert dev.param["s2.dec_factor"] == filter.s2.DecimationFactor + assert dev.param["s2.tap_count"] == filter.s2.TapCount + assert dev.param["frame_size"] == output_frame_size + assert dev.param["use_isr"] == 0 + + if self.debug_print_filters: + dev.send_command(DevCommand.PRINT_FILTERS.value) + + device_output = dev.process_signal(sig_sine_pdm, sample_count_override=sample_override) + + print(f"device_output shape: {device_output.shape}") + + device_output_float = self.to_float_array(device_output) + + xcore_output_thdn = THDN(device_output_float[0][int(fs/10):], fs, fund_freq=test_freq) + + print( + f"test_freq {test_freq}, " + f"xcore_output_thdn = {xcore_output_thdn}, " + f"input_thdn = {input_thdn}" + ) + + assert xcore_output_thdn < threshold, ( + f"At sampling rate {fs}, test_freq {test_freq}, " + f"XCORE output THDN {xcore_output_thdn} exceeds threshold {threshold}" + ) + + if self.print_output: + print(f"Device output: {device_output}") + + dev.send_command(DevCommand.TERMINATE.value) + + end = -device_output_delay_samps or None + start = device_output_delay_samps + result_diff = np.max(np.abs(expected[:, :end] - device_output[:, start:])) + threshold = 12 + print(f"result_diff = {result_diff}") + assert result_diff <= threshold, ( + f"max diff between python and xcore mic array output ({result_diff}) " + f"exceeds threshold ({threshold})" + ) \ No newline at end of file diff --git a/tests/signal/TwoStageDecimator/src/run.cpp b/tests/signal/TwoStageDecimator/src/run.cpp index 74223a17..9ee7f37a 100644 --- a/tests/signal/TwoStageDecimator/src/run.cpp +++ b/tests/signal/TwoStageDecimator/src/run.cpp @@ -82,7 +82,7 @@ void process_signal(chanend_t c_from_host) filter_conf[1].state_words_per_channel = filter_conf[1].num_taps; filter_conf[1].state = (int32_t*)stg2_filter_state; - dec.Init(decimator_conf); + dec.Init(decimator_conf, S2_DEC_FACT); // Host will tell us how many blocks it intends to send unsigned block_count = s_chan_in_word(c_from_host); diff --git a/tests/signal/pdmrx_isr/CMakeLists.txt b/tests/signal/pdmrx_isr/CMakeLists.txt index 0073a866..3878863d 100644 --- a/tests/signal/pdmrx_isr/CMakeLists.txt +++ b/tests/signal/pdmrx_isr/CMakeLists.txt @@ -4,16 +4,18 @@ project(tests_signal_pdmrx_isr) set(XMOS_SANDBOX_DIR ${CMAKE_CURRENT_LIST_DIR}/../../../..) -include(${CMAKE_CURRENT_LIST_DIR}/../../../examples/deps.cmake) - -set(APP_HW_TARGET XK-EVK-XU316) - -set(APP_COMPILER_FLAGS -O3 +set(APP_DEPENDENT_MODULES "lib_mic_array") +set(APP_COMPILER_FLAGS -Os -g -report - -DXASSERT_ENABLE_ASSERTIONS=1 - -DXASSERT_ENABLE_DEBUG=1 - -DXASSERT_ENABLE_LINE_NUMBERS=1) + -DLIBXCORE_XASSERT_IS_ASSERT + -DMIC_ARRAY_CONFIG_USE_PDM_ISR=1 + ) -XMOS_REGISTER_APP() +if(CMAKE_C_COMPILER_VERSION VERSION_EQUAL "3.6.0") # XS3 (XTC 15.3.1) +set(APP_HW_TARGET XK-EVK-XU316) +else() # VX4 +set(APP_HW_TARGET XK-EVK-XU416) +endif() +XMOS_REGISTER_APP() diff --git a/tests/signal/pdmrx_isr/src/app.cpp b/tests/signal/pdmrx_isr/src/app.cpp index e6a76cf4..9056ce4e 100644 --- a/tests/signal/pdmrx_isr/src/app.cpp +++ b/tests/signal/pdmrx_isr/src/app.cpp @@ -5,14 +5,14 @@ #include #include #include -#include #include #include #include #include +#include #include -#include "xassert.h" + #include "mic_array.h" #include "app.h" @@ -31,12 +31,13 @@ void app_pdm_rx_isr_setup( pdm_rx_config.pdm_out_words_per_channel = MY_STAGE2_DEC_FACTOR; pdm_rx_config.pdm_out_block = (uint32_t*)pdmrx_out_block; pdm_rx_config.pdm_in_double_buf = (uint32_t*)pdmrx_in_block_double_buf; + pdm_rx_config.num_channels_in = 1; + pdm_rx_config.num_channels_out = 1; my_pdm_rx.Init((port_t)c_from_host, pdm_rx_config); my_pdm_rx.AssertOnDroppedBlock(false); my_pdm_rx.InstallISR(); my_pdm_rx.UnmaskISR(); - } void test() @@ -90,7 +91,7 @@ void test() s_chan_out_word(c, frame++); pdm_samples = my_pdm_rx.GetPdmBlock(); - printf("Received block %d\n", *pdm_samples); + printf("Received block %lu\n", *pdm_samples); s_chan_out_word(c, frame++); } @@ -109,9 +110,8 @@ void assert_when_timeout() CASE_THEN(t, timer_handler)) { timer_handler: - assert(0 && msg("Error: test timed out due to deadlock")); + xassert(0 && "Error: test timed out due to deadlock"); break; } - hwtimer_free(t); } diff --git a/tests/signal/profile/app_memory/CMakeLists.txt b/tests/signal/profile/app_memory/CMakeLists.txt index bad811cf..c9410a47 100644 --- a/tests/signal/profile/app_memory/CMakeLists.txt +++ b/tests/signal/profile/app_memory/CMakeLists.txt @@ -4,7 +4,7 @@ project(test_memory) set(XMOS_SANDBOX_DIR ${CMAKE_CURRENT_LIST_DIR}/../../../../..) -include(${CMAKE_CURRENT_LIST_DIR}/../../../../examples/deps.cmake) +set(APP_DEPENDENT_MODULES "lib_mic_array") set(APP_HW_TARGET XK-VOICE-L71.xn) diff --git a/tests/signal/profile/app_memory/src/app.cpp b/tests/signal/profile/app_memory/src/app.cpp index 77c7e465..2b150e49 100644 --- a/tests/signal/profile/app_memory/src/app.cpp +++ b/tests/signal/profile/app_memory/src/app.cpp @@ -50,8 +50,12 @@ pdm_rx_resources_t pdm_res = PDM_RX_RESOURCES_DDR( #ifndef APP_N_MICS_IN #define APP_N_MICS_IN APP_N_MICS #endif -#define CLRSR(c) asm volatile("clrsr %0" : : "n"(c)); -#define CLEAR_KEDI() CLRSR(XS1_SR_KEDI_MASK) + +#if defined(__XS3A__) +#define CLEAR_KEDI() asm volatile("clrsr %0" : : "n"(XS1_SR_KEDI_MASK)); +#else +#define CLEAR_KEDI() ((void)0) // not defined in !xs3a +#endif using TMicArray = mic_array::MicArray, @@ -89,7 +93,7 @@ void app_mic_array_init() filter_conf[1].state_words_per_channel = decimator_conf.filter_conf[1].num_taps; filter_conf[1].state = (int32_t*)filter_state_df_2; - mics.Decimator.Init(decimator_conf); + mics.Decimator.Init(decimator_conf, STAGE2_DEC_FACTOR); static uint32_t pdmrx_out_block_df_2[APP_N_MICS][STAGE2_DEC_FACTOR]; static uint32_t __attribute__((aligned (8))) pdmrx_out_block_double_buf_df_2[2][APP_N_MICS_IN * STAGE2_DEC_FACTOR]; @@ -135,4 +139,3 @@ void app_mic_array_task(chanend_t c_frames_out) #endif } #endif - diff --git a/tests/signal/profile/app_mips/CMakeLists.txt b/tests/signal/profile/app_mips/CMakeLists.txt index 6b667605..068f602e 100644 --- a/tests/signal/profile/app_mips/CMakeLists.txt +++ b/tests/signal/profile/app_mips/CMakeLists.txt @@ -4,12 +4,16 @@ project(test_mips) set(XMOS_SANDBOX_DIR ${CMAKE_CURRENT_LIST_DIR}/../../../../..) -include(${CMAKE_CURRENT_LIST_DIR}/../../../../examples/deps.cmake) - -set(APP_HW_TARGET XK-VOICE-L71.xn) +set(APP_DEPENDENT_MODULES "lib_mic_array") set(AUTOGEN_OUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/autogen") +if(CMAKE_C_COMPILER_VERSION VERSION_EQUAL "3.6.0") # XS3 (XTC 15.3.1) +set(ISR_LIST 0 1) # ISR and thread-based PDM handling +else() # VX4 +set(ISR_LIST 0) # Only thread-based PDM handling supported on VX4 +endif() + set(NAME_MAP thread;isr) # Exactly one custom filter (.pkl) may be listed alongside numeric sample rates. # The .pkl file is expected to be in the ${CMAKE_CURRENT_LIST_DIR}/../../BasicMicArray/ directory @@ -49,14 +53,13 @@ foreach(SAMP_FREQ 16000 32000 48000 "good_3_stage_filter_int.pkl") set(samp_freq_str "${SAMP_FREQ}fs") endif() foreach(N_MICS 1 2) - foreach(USE_ISR 1 0) + foreach(USE_ISR ${ISR_LIST}) list(GET NAME_MAP ${USE_ISR} tmp) set(CONFIG "${N_MICS}mic_${tmp}_${samp_freq_str}") set(APP_COMPILER_FLAGS_${CONFIG} -Os -g -report - -mcmodel=large -DAPP_NAME="MIC_ARRAY_MEASURE_MIPS_${CONFIG}" -DMIC_ARRAY_CONFIG_USE_PDM_ISR=${USE_ISR} -DMIC_ARRAY_CONFIG_MIC_COUNT=${N_MICS} @@ -68,6 +71,16 @@ endforeach() set(APP_INCLUDES src src/mips/ ${AUTOGEN_OUT_DIR}) +# ---- Target specific ---- +if(CMAKE_C_COMPILER_VERSION VERSION_EQUAL "3.6.0") # XS3 (XTC 15.3.1) +set(APP_HW_TARGET src/XK-VOICE-L71.xn) +list(APPEND APP_DEPENDENT_MODULES "lib_board_support(1.5.0)") +else() # VX4 +set(APP_HW_TARGET XK-EVK-XU416) +set(APP_XC_SRCS "") # prevents including xc +endif() + + XMOS_REGISTER_APP() foreach(target ${APP_BUILD_TARGETS}) diff --git a/tests/signal/profile/app_mips/src/app.c b/tests/signal/profile/app_mips/src/app.c index a913ee68..bc8c48b1 100644 --- a/tests/signal/profile/app_mips/src/app.c +++ b/tests/signal/profile/app_mips/src/app.c @@ -12,10 +12,12 @@ #include #include -#include "sw_pll.h" #include "mic_array.h" #include "app_config.h" +// defined in app_pll.c +extern void app_pll_init(void); + #define AUDIO_FRAME_LEN ( \ MIC_ARRAY_CONFIG_MIC_IN_COUNT * MIC_ARRAY_CONFIG_SAMPLES_PER_FRAME) @@ -98,13 +100,15 @@ void init_mic_conf( mic_array_conf->pdmrx_conf.pdm_out_block = (uint32_t *)pdmrx_out_block; mic_array_conf->pdmrx_conf.pdm_in_double_buf = (uint32_t *)pdmrx_out_block_double_buf; mic_array_conf->pdmrx_conf.channel_map = channel_map; + mic_array_conf->pdmrx_conf.num_channels_in = MIC_ARRAY_CONFIG_MIC_COUNT; + mic_array_conf->pdmrx_conf.num_channels_out = MIC_ARRAY_CONFIG_MIC_COUNT; } #endif void mic_array_initialise() { // Set the pll to the required frequency for the mic array - sw_pll_fixed_clock(APP_MCLK_FREQUENCY); + app_pll_init(); // Set up the mic array resources #if (MIC_ARRAY_CONFIG_MIC_COUNT == 2) diff --git a/tests/signal/profile/app_mips/src/app_config.h b/tests/signal/profile/app_mips/src/app_config.h index ea44e21e..d40b700b 100644 --- a/tests/signal/profile/app_mips/src/app_config.h +++ b/tests/signal/profile/app_mips/src/app_config.h @@ -5,3 +5,10 @@ #define APP_MCLK_FREQUENCY 24576000 #define APP_PDM_CLOCK_FREQUENCY 3072000 + +#if defined(__VX4B__) +#include +#define PORT_MCLK_IN VX_PORT_1D +#define PORT_PDM_CLK VX_PORT_1G +#define PORT_PDM_DATA VX_PORT_1F +#endif // defined(__VX4B__) diff --git a/tests/signal/profile/app_mips/src/app_pll.c b/tests/signal/profile/app_mips/src/app_pll.c new file mode 100644 index 00000000..c9d266c4 --- /dev/null +++ b/tests/signal/profile/app_mips/src/app_pll.c @@ -0,0 +1,89 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include +#include + +#include +#include +#include +#include +#include +#include + +static +void delay_1ms(){ + hwtimer_t tmr = hwtimer_alloc(); + assert(tmr != 0); + hwtimer_delay(tmr, 100000); // 1ms with 100 MHz timer tick + hwtimer_free(tmr); +} + +void app_pll_init(void) +{ + printf("Initializing PLL\n"); + xsystem_tile_id_t tileid = get_local_tile_id(); + + // [0] PLL CTL DISABLE + uint32_t DEVICE_PLL_DISABLE = 0x00000000; + DEVICE_PLL_DISABLE = VX_PLL1_DISABLE_SET(DEVICE_PLL_DISABLE, 0); + + // [1] Mux + uint32_t DEVICE_PLL_MUX_VAL = 0x00000000; + DEVICE_PLL_MUX_VAL = VX_APP_CLK1_MUX_BIT_SET(DEVICE_PLL_MUX_VAL, 1); + DEVICE_PLL_MUX_VAL = VX_APP_CLK_IN_PHASE_BIT_SET(DEVICE_PLL_MUX_VAL, 1); + + // [2] PLL CTL + uint32_t DEVICE_PLL_CTL_VAL = 0x00000000; + DEVICE_PLL_CTL_VAL = VX_PLL1_R_DIVIDER_SET(DEVICE_PLL_CTL_VAL, 0); // input divider: 24 MHz ref / R=1 -> 24 MHz + DEVICE_PLL_CTL_VAL = VX_PLL1_F_MULTIPLIER_SET(DEVICE_PLL_CTL_VAL, 101); // feedback mult: 24 MHz * (F + 1 + 2/5 = 102.4) -> 2457.60 MHz + DEVICE_PLL_CTL_VAL = VX_PLL1_OD_DIVIDER_SET(DEVICE_PLL_CTL_VAL, 4); // output divider: 2457.60 MHz / (OD + 1) / 2 -> 245.76 MHz + DEVICE_PLL_CTL_VAL = VX_PLL1_DISABLE_SET(DEVICE_PLL_CTL_VAL, 0); // disable PLL before configuration + DEVICE_PLL_CTL_VAL = VX_PLL1_BYPASS_SET(DEVICE_PLL_CTL_VAL, 0); // no bypass + DEVICE_PLL_CTL_VAL = VX_PLL1_NLOCK_SET(DEVICE_PLL_CTL_VAL, 1); // wait for PLL lock + + // [3] FRAC (2/5) + uint32_t DEVICE_PLL_FRAC_NOM = 0x00000000; + DEVICE_PLL_FRAC_NOM = VX_SS_FRAC_N_ENABLE_SET(DEVICE_PLL_FRAC_NOM, 1); // enable fractional mode + DEVICE_PLL_FRAC_NOM = VX_SS_FRAC_N_PERIOD_CYC_CNT_SET(DEVICE_PLL_FRAC_NOM, 4); // +1 -> 5 + DEVICE_PLL_FRAC_NOM = VX_SS_FRAC_N_F_HIGH_CYC_CNT_SET(DEVICE_PLL_FRAC_NOM, 1); // +1 -> 2 + + // [4] APP DIVIDER + uint32_t DEVICE_PLL_DIV_0 = 0x00000000; + DEVICE_PLL_DIV_0 = VX_APP_CLK_DIV_ENABLE_SET(DEVICE_PLL_DIV_0, 1); // enable app clock divider + DEVICE_PLL_DIV_0 = VX_APP_CLK_DIV_VALUE_SET(DEVICE_PLL_DIV_0, 4); // set divider to 4 -> 245.76 MHz / (4 + 1) / 2 -> 24.576 MHz + + // print reg values + printf("PLL Configuration:\n"); + printf("PLL DISABLE: 0x%08lX\n", DEVICE_PLL_DISABLE); + printf("PLL MUX VAL: 0x%08lX\n", DEVICE_PLL_MUX_VAL); + printf("PLL CTL VAL: 0x%08lX\n", DEVICE_PLL_CTL_VAL); + printf("PLL DIV VAL: 0x%08lX\n", DEVICE_PLL_DIV_0); + printf("PLL FRAC_NOM: 0x%08lX\n", DEVICE_PLL_FRAC_NOM); + + // CONFIGURE + sswitch_reg_try_write(tileid, VX_SSB_CSR_PLL1_CTRL_NUM, DEVICE_PLL_DISABLE); // disable PLL before configuration + sswitch_reg_try_write(tileid, VX_SSB_CSR_CLK_SWITCH_CTRL_NUM, DEVICE_PLL_MUX_VAL); // switch app clock to PLL1 output + sswitch_reg_try_write(tileid, VX_SSB_CSR_PLL1_CTRL_NUM, DEVICE_PLL_CTL_VAL); // configure PLL control register + sswitch_reg_try_write(tileid, VX_SSB_CSR_PLL1_FRACN_CTRL_NUM, DEVICE_PLL_FRAC_NOM); // configure PLL fractional control register + sswitch_reg_try_write(tileid, VX_SSB_CSR_APP_CLK1_DIV_NUM, DEVICE_PLL_DIV_0); // configure app clock divider + delay_1ms(); +} + +#elif defined(__XS3A__) + +#include "app_config.h" +#include "sw_pll.h" + +#define DEVICE_PLL_CTL_VAL 0x0A019803 // Valid for all fractional values +#define DEVICE_PLL_FRAC_NOM 0x800095F9 // 24.576000 MHz + + +void app_pll_init(void) +{ + sw_pll_fixed_clock(APP_MCLK_FREQUENCY); +} + +#endif // defined(__XS3A__) diff --git a/tests/signal/profile/app_mips/src/main.c b/tests/signal/profile/app_mips/src/main.c new file mode 100644 index 00000000..1f72ac80 --- /dev/null +++ b/tests/signal/profile/app_mips/src/main.c @@ -0,0 +1,24 @@ +// Copyright 2022-2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. + +#if defined(__VX4B__) + +#include + +#include +#include + +#include + +extern int main_tile_0(chanend_t c_audio_frames); +extern int main_tile_1(chanend_t c_audio_frames); + +// Network main +DECLARE_CHAN(c) + +NETWORK_MAIN( + TILE_MAIN(main_tile_1, 1, (CHAN(c))), + TILE_MAIN(main_tile_0, 0, (CHAN(c))) +) + +#endif // defined(__VX4B__) diff --git a/tests/signal/profile/app_mips/src/main.xc b/tests/signal/profile/app_mips/src/main.xc index 5bc6e940..415eae84 100644 --- a/tests/signal/profile/app_mips/src/main.xc +++ b/tests/signal/profile/app_mips/src/main.xc @@ -1,6 +1,8 @@ // Copyright 2022-2026 XMOS LIMITED. // This Software is subject to the terms of the XMOS Public Licence: Version 1. +#if defined(__XC__) + #include #include @@ -19,3 +21,5 @@ int main() { } return 0; } + +#endif // defined(__XC__) diff --git a/tests/signal/profile/app_mips/src/mips/burn_mips.S b/tests/signal/profile/app_mips/src/mips/burn_mips.S index fb98fe49..a98b910c 100644 --- a/tests/signal/profile/app_mips/src/mips/burn_mips.S +++ b/tests/signal/profile/app_mips/src/mips/burn_mips.S @@ -39,4 +39,24 @@ FUNCTION_NAME: #endif //defined(__XS3A__) +#if defined(__VX4A__) || defined(__VX4B__) +#define FUNCTION_NAME burn_mips +#define NSTACK_BYTES 16 // minimum + +.p2align 1 +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function +FUNCTION_NAME: + xm.entsp NSTACK_BYTES + .L_loop_back: + xm.bu .L_loop_back + xm.retsp NSTACK_BYTES + +.size FUNCTION_NAME, . -FUNCTION_NAME +.resource_const FUNCTION_NAME, "stack_frame_bytes", NSTACK_BYTES +.resource_list_empty FUNCTION_NAME, "callees" +.resource_list_empty FUNCTION_NAME, "tail_callees" +.resource_list_empty FUNCTION_NAME, "parallel_callees" + +#endif // __VX4A__ || __VX4B__ diff --git a/tests/signal/profile/app_mips/src/mips/count_mips.S b/tests/signal/profile/app_mips/src/mips/count_mips.S index 47041092..a1bc7bba 100644 --- a/tests/signal/profile/app_mips/src/mips/count_mips.S +++ b/tests/signal/profile/app_mips/src/mips/count_mips.S @@ -103,4 +103,65 @@ FUNCTION_NAME: #endif //defined(__XS3A__) +#if defined(__VX4A__) || defined(__VX4B__) +#define FUNCTION_NAME count_mips +#define NSTACK_BYTES 16 +#define LOOP_INST 8 + +.section .data +.p2align 3 +.global tick_count +.global inst_count +tick_count: .word 0, 0 +inst_count: .word 0, 0 + +.section .text +.p2align 3 +.globl FUNCTION_NAME +.type FUNCTION_NAME,@function + +FUNCTION_NAME: + xm.entsp NSTACK_BYTES + // Never returns, so no need to save any registers + // Get pointers for the two counters + lui t3, %hi(tick_count) + addi t3, t3, %lo(tick_count) + mv a0, t3 + lui t3, %hi(inst_count) + addi t3, t3, %lo(inst_count) + mv a1, t3 + // Initialize counters to 0 + { li a3, 0 ; li s2, 0 } + { li s3, 0 ; li s4, 0 } + // maccu coefficient is s5 because we just want to add + { li s5, 1 ; nop } + // initialize the last timestamp, and jump to the loop + { xm.gettime a2 ; xm.bu .L_loop_top } +.p2align 4 +.L_loop_top: + // this loop should be 8 thread cycles long (no FNOPS needed) + xm.ldcu s6, LOOP_INST + // increment instruction counter + xm.maccu s3, s4, s5, s6 + // Get current time + xm.gettime s6 + // Subtract previous time + { sub s6, s6, a2 ; mv a2, s6 } + // increment tick counter + xm.maccu a3, s2, s5, s6 + // Store both counters in memory, and repeat + xm.stdi s2, a3, 0(a0) + xm.stdi s4, s3, 0(a1) + xm.bu .L_loop_top +.L_loop_bot: +.L_func_end: + xm.retsp NSTACK_BYTES + +.size FUNCTION_NAME, . -FUNCTION_NAME +.resource_const FUNCTION_NAME, "stack_frame_bytes", NSTACK_BYTES +.resource_list_empty FUNCTION_NAME, "callees" +.resource_list_empty FUNCTION_NAME, "tail_callees" +.resource_list_empty FUNCTION_NAME, "parallel_callees" + +#endif // __VX4A__ || __VX4B__ diff --git a/tests/signal/profile/mic_array_memory.json b/tests/signal/profile/mic_array_memory.json index f12f52b7..34a4881f 100644 --- a/tests/signal/profile/mic_array_memory.json +++ b/tests/signal/profile/mic_array_memory.json @@ -1,34 +1,34 @@ { "1mic_custom": { "available": 524288, - "used": 12572, + "used": 12636, "status": "OKAY", - "stack": 572, - "code": 8070, - "data": 3930 + "stack": 580, + "code": 8102, + "data": 3954 }, "1mic_default": { "available": 524288, - "used": 16580, + "used": 17092, "status": "OKAY", - "stack": 636, - "code": 9058, - "data": 6886 + "stack": 644, + "code": 9230, + "data": 7218 }, "2mic_custom": { "available": 524288, - "used": 13964, + "used": 14036, "status": "OKAY", - "stack": 580, - "code": 8278, - "data": 5106 + "stack": 588, + "code": 8326, + "data": 5122 }, "2mic_default": { "available": 524288, - "used": 18084, + "used": 18668, "status": "OKAY", - "stack": 636, - "code": 9378, - "data": 8070 + "stack": 652, + "code": 9550, + "data": 8466 } } \ No newline at end of file diff --git a/tests/signal/profile/mic_array_memory_table.rst b/tests/signal/profile/mic_array_memory_table.rst index dd73fb18..7c00e8bd 100644 --- a/tests/signal/profile/mic_array_memory_table.rst +++ b/tests/signal/profile/mic_array_memory_table.rst @@ -12,25 +12,25 @@ - Data * - 1mic_custom - 524288 - - 12572 - - 572 - - 8070 - - 3930 + - 12636 + - 580 + - 8102 + - 3954 * - 1mic_default - 524288 - - 16580 - - 636 - - 9058 - - 6886 + - 17092 + - 644 + - 9230 + - 7218 * - 2mic_custom - 524288 - - 13964 - - 580 - - 8278 - - 5106 + - 14036 + - 588 + - 8326 + - 5122 * - 2mic_default - 524288 - - 18084 - - 636 - - 9378 - - 8070 \ No newline at end of file + - 18668 + - 652 + - 9550 + - 8466 \ No newline at end of file diff --git a/tests/signal/profile/mic_array_mips.json b/tests/signal/profile/mic_array_mips.json deleted file mode 100644 index 12f472d2..00000000 --- a/tests/signal/profile/mic_array_mips.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "1mic_isr_16000fs": 14.1459, - "1mic_isr_32000fs": 17.2336, - "1mic_isr_48000fs": 21.3055, - "1mic_thread_16000fs": 12.9298, - "1mic_thread_32000fs": 15.9536, - "1mic_thread_48000fs": 19.9614, - "2mic_isr_16000fs": 29.3098, - "2mic_isr_32000fs": 34.6215, - "2mic_isr_48000fs": 41.9335, - "2mic_thread_16000fs": 27.0056, - "2mic_thread_32000fs": 32.2854, - "2mic_thread_48000fs": 39.5335 -} \ No newline at end of file diff --git a/tests/signal/profile/mic_array_mips_table.rst b/tests/signal/profile/mic_array_mips_table.rst index b5f738ef..7619596b 100644 --- a/tests/signal/profile/mic_array_mips_table.rst +++ b/tests/signal/profile/mic_array_mips_table.rst @@ -11,48 +11,48 @@ * - 1 - ISR - 16000 - - 14.146 + - 13.906 * - 1 - ISR - 32000 - - 17.234 + - 16.945 * - 1 - ISR - 48000 - - 21.305 + - 20.970 * - 1 - THREAD - 16000 - - 12.930 + - 12.498 * - 1 - THREAD - 32000 - - 15.954 + - 15.474 * - 1 - THREAD - 48000 - - 19.961 + - 19.433 * - 2 - ISR - 16000 - - 29.310 + - 28.907 * - 2 - ISR - 32000 - - 34.621 + - 34.269 * - 2 - ISR - 48000 - - 41.934 + - 41.645 * - 2 - THREAD - 16000 - - 27.006 + - 26.174 * - 2 - THREAD - 32000 - - 32.285 + - 31.485 * - 2 - THREAD - 48000 - - 39.533 \ No newline at end of file + - 38.765 \ No newline at end of file diff --git a/tests/signal/profile/mic_array_mips_vx4.json b/tests/signal/profile/mic_array_mips_vx4.json new file mode 100644 index 00000000..d0f837b8 --- /dev/null +++ b/tests/signal/profile/mic_array_mips_vx4.json @@ -0,0 +1,8 @@ +{ + "1mic_thread_16000fs": 10.81, + "1mic_thread_32000fs": 13.16, + "1mic_thread_48000fs": 16.50, + "2mic_thread_16000fs": 22.74, + "2mic_thread_32000fs": 26.94, + "2mic_thread_48000fs": 33.10 +} diff --git a/tests/signal/profile/mic_array_mips_xs3.json b/tests/signal/profile/mic_array_mips_xs3.json new file mode 100644 index 00000000..89917c26 --- /dev/null +++ b/tests/signal/profile/mic_array_mips_xs3.json @@ -0,0 +1,14 @@ +{ + "1mic_isr_16000fs": 13.9057, + "1mic_isr_32000fs": 16.9454, + "1mic_isr_48000fs": 20.9695, + "1mic_thread_16000fs": 12.4976, + "1mic_thread_32000fs": 15.4736, + "1mic_thread_48000fs": 19.4335, + "2mic_isr_16000fs": 28.9067, + "2mic_isr_32000fs": 34.2693, + "2mic_isr_48000fs": 41.6454, + "2mic_thread_16000fs": 26.1739, + "2mic_thread_32000fs": 31.4854, + "2mic_thread_48000fs": 38.7653 +} \ No newline at end of file diff --git a/tests/signal/profile/test_measure_mips.py b/tests/signal/profile/test_measure_mips.py index 37185a28..bedfeffa 100644 --- a/tests/signal/profile/test_measure_mips.py +++ b/tests/signal/profile/test_measure_mips.py @@ -7,6 +7,34 @@ import re import json +cwd = Path(__file__).parent + +def get_xcc_version() -> str: + output = subprocess.check_output(["xcc", "--version"], text=True) + for line in output.splitlines(): + if line.startswith("XTC version:"): + return line.split(":")[1].strip() + raise RuntimeError("XTC version not found") + +def get_mips_file() -> Path: + xcc_version = get_xcc_version() + if "15.3.1" in xcc_version: + mips_file = cwd / "mic_array_mips_xs3.json" + elif "99.99.99" in xcc_version: + mips_file = cwd / "mic_array_mips_vx4.json" + else: + raise RuntimeError(f"Unsupported XCC version: {xcc_version}") + return mips_file + +def get_isr_list(): + xcc_version = get_xcc_version() + if "15.3.1" in xcc_version: + return ["isr", "thread"] + elif "99.99.99" in xcc_version: + return ["thread"] # Only thread-based PDM handling supported on VX4 + else: + raise RuntimeError(f"Unsupported XCC version: {xcc_version}") + def max_mips(lines): mips_values = [] for line in lines: @@ -74,21 +102,22 @@ def test_measure_mips(pytestconfig): mic_array_mips_table.rst - autogenerated RST table of results """ update = pytestconfig.getoption("--update") - cwd = Path(__file__).parent mics = [1, 2] - pdmrx = ["isr", "thread"] + pdmrx = get_isr_list() fs = [16000, 32000, 48000] results = {} + print("\n\n") for chans, pdmrx_type, samp_freq in itertools.product(mics, pdmrx, fs): cfg = f"{chans}mic_{pdmrx_type}_{samp_freq}fs" xe_path = f'{cwd}/app_mips/bin/{cfg}/test_mips_{cfg}.xe' assert Path(xe_path).exists(), f"Cannot find {xe_path}" ret = subprocess.run(["xrun", "--xscope", "--id", "0", xe_path], capture_output=True, text=True, check=True, timeout=15) results[cfg] = max_mips(ret.stdout.splitlines()) + print(f"Measured {results[cfg]:.4f} MIPS for config {cfg}") # Compare against mic_array_mips.json that's already there to ensure MIPS # number are in the same ballpark, before overwriting mic_array_mips.json - outfile = cwd / "mic_array_mips.json" + outfile = get_mips_file() with outfile.open("r") as f: ref_data = json.load(f) for cfg in ref_data: @@ -96,7 +125,7 @@ def test_measure_mips(pytestconfig): assert cfg in results, f"cfg {cfg} not found in results.\nresults = {results}" test_mips = results[cfg] if not update: - threshold = 0.05 + threshold = 0.50 #TODO replace by 0.05 once stable assert abs(test_mips - ref_mips) < threshold, (f"For cfg {cfg}, test_mips {test_mips} differ " f"from ref_mips {ref_mips} by more than the allowed threshold of {threshold}.\n" f"If this is expected, run test with pytest test_measure_mips --update " @@ -109,5 +138,3 @@ def test_measure_mips(pytestconfig): # RST table output rst_out = cwd / "mic_array_mips_table.rst" write_rst_table(results, rst_out) - - diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 557f0416..9352cc9a 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -4,10 +4,20 @@ include($ENV{XMOS_CMAKE_PATH}/xcommon.cmake) project(tests-unit) set(XMOS_SANDBOX_DIR ${CMAKE_CURRENT_LIST_DIR}/../../..) -set(APP_HW_TARGET XK-EVK-XU316) set(APP_INCLUDES src) -set(APP_DEPENDENT_MODULES "lib_mic_array" "lib_unity(2.5.2)") -set(APP_COMPILER_FLAGS -O2 +set(APP_DEPENDENT_MODULES "lib_mic_array" "lib_unity(main)") #TODO release lib_unity + +# conditional depending on target +if(CMAKE_C_COMPILER_VERSION VERSION_EQUAL "3.6.0") + set(__XS3__ ON) # XS3 (XTC 15.3.1) +else() + set(__XS3__ OFF) # VX4 +endif() + +# Target specific compiler flags +if(__XS3__) # xs3 + set(APP_HW_TARGET XK-EVK-XU316) + set(APP_COMPILER_FLAGS -O2 -g -report -mcmodel=large @@ -17,5 +27,13 @@ set(APP_COMPILER_FLAGS -O2 -Wno-format -fxscope -DUNITY_INCLUDE_CONFIG_H=1) +else() # vx4 + set(APP_HW_TARGET XK-EVK-XU416) + set(APP_COMPILER_FLAGS + -Os + -g + -Wno-fptrgroup + -DUNITY_INCLUDE_CONFIG_H=1) +endif() XMOS_REGISTER_APP() diff --git a/tests/unit/src/main.c b/tests/unit/src/main.c index 88172bff..849766a9 100644 --- a/tests/unit/src/main.c +++ b/tests/unit/src/main.c @@ -8,7 +8,6 @@ int main(int argc, const char* argv[]) { - xscope_config_io(XSCOPE_IO_BASIC); UnityGetCommandLineOptions(argc, argv); UnityBegin(argv[0]); @@ -28,8 +27,8 @@ int main(int argc, const char* argv[]) RUN_TEST_GROUP(deinterleave4); RUN_TEST_GROUP(deinterleave8); RUN_TEST_GROUP(deinterleave16); - RUN_TEST_GROUP(deinterleave_pdm_samples); - + RUN_TEST_GROUP(fir_1x16_bit); + return UNITY_END(); } diff --git a/tests/unit/src/test_fir_1x16_bit.c b/tests/unit/src/test_fir_1x16_bit.c new file mode 100644 index 00000000..3916646c --- /dev/null +++ b/tests/unit/src/test_fir_1x16_bit.c @@ -0,0 +1,102 @@ +// Copyright 2026 XMOS LIMITED. +// This Software is subject to the terms of the XMOS Public Licence: Version 1. +#include +#include +#include +#include + +#include + +#include "unity.h" +#include "unity_fixture.h" + +#include "mic_array/etc/fir_1x16_bit.h" +#include "mic_array/etc/filters_default.h" + +TEST_GROUP_RUNNER(fir_1x16_bit) { + RUN_TEST_CASE(fir_1x16_bit, symmetry_test); + RUN_TEST_CASE(fir_1x16_bit, single_val); + RUN_TEST_CASE(fir_1x16_bit, random_test); +} + +TEST_GROUP(fir_1x16_bit); +TEST_SETUP(fir_1x16_bit) {} +TEST_TEAR_DOWN(fir_1x16_bit) {} + +// Test that opposite signals produce opposite results +TEST(fir_1x16_bit, symmetry_test) +{ + uint32_t signal_pos[1024]; + uint32_t signal_neg[1024]; + + // Using real stage 1 coefficients + extern uint32_t stage1_coef[STAGE1_WORDS]; + + memset(signal_pos, 0x00, sizeof(signal_pos)); // All +1 + memset(signal_neg, 0xFF, sizeof(signal_neg)); // All -1 + + int result_pos = fir_1x16_bit(signal_pos, stage1_coef); + int result_neg = fir_1x16_bit(signal_neg, stage1_coef); + + // Opposite signals should give opposite results + TEST_ASSERT_EQUAL_INT(-result_pos, result_neg); +} + +// Test zero signal with known inputs/outputs +TEST(fir_1x16_bit, single_val) +{ + const int expected_result = 268435456; + const unsigned max_cycles = 35; + + unsigned elapsed = 0; + int result = -1; + uint32_t signal[1024]; + memset(signal, 0, sizeof(signal)); + + elapsed = get_reference_time(); + result = fir_1x16_bit(signal, stage1_coef); + elapsed = get_reference_time() - elapsed; + + TEST_ASSERT_EQUAL_INT(expected_result, result); + TEST_ASSERT_LESS_OR_EQUAL(max_cycles, elapsed); +} + +TEST(fir_1x16_bit, random_test) +{ + #define n_vpu 16 + #define sig_len (n_vpu * 20) + #define PRINT_OUT (1) + + const int sig_exp[n_vpu] = { + -58529792,34287616,70240256,17392640,52816384, + -51980800,54905856,40349696,-60945408,14667776, + -3800064,33825280,-1670656,879616,-23246848,-11620864, + }; + + uint32_t sig_in[sig_len] = {0}; + int sig_out[n_vpu] = {0}; + + // seed + srand(12345); + for (unsigned i = 0; i < sig_len; i++) + { + sig_in[i] = rand() & 0xFFFFFFFF; // Random 32-bit word + } + + // Using real stage 1 coefficients + for (unsigned i = 0; i < n_vpu; i++) + { + uint32_t *sig_ptr = &sig_in[i * 20]; // 20 words per VPU block + sig_out[i] = fir_1x16_bit(sig_ptr, stage1_coef); + } + + #if PRINT_OUT + printf("\nExpected vs Actual:\n"); + for (unsigned i = 0; i < n_vpu; i++) + { + printf("sig_out[%u] = %d, sig_exp = %d\n", i, sig_out[i], sig_exp[i]); + } + #endif + + TEST_ASSERT_EQUAL_INT_ARRAY(sig_exp, sig_out, n_vpu); +}